summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAssaf Gordon <assafgordon@gmail.com>2012-12-06 22:30:23 +0000
committerPádraig Brady <P@draigBrady.com>2013-02-04 23:40:32 +0000
commit8a303a8963dd255566f1625243ff19b029e0ecec (patch)
treea22eb5e5885b4044e244f2b0bc0387bf04b0849b
parentbe7932e863de07c4c7e4fc3c1db3eb6d04ba9af5 (diff)
downloadcoreutils-8a303a8963dd255566f1625243ff19b029e0ecec.tar.gz
numfmt: a new command to format numbers
* AUTHORS: Add my name. * NEWS: Mention the new program. * README: Reference the new program. * src/numfmt.c: New file. * src/.gitignore: Ignore the new binary. * build-aux/gen-lists-of-programs.sh: Update. * scripts/git-hooks/commit-msg: Allow numfmt: commit prefix. * po/POTFILES.in: Add new c file. * tests/misc/numfmt.pl: A new test file giving >93% coverage. * tests/local.mk: Reference the new test. * man/.gitignore: Ignore the new man page. * man/local.mk: Reference the new man page. * man/numfmt.x: A new template. * doc/coreutils.texi: Document the new command.
-rw-r--r--AUTHORS1
-rw-r--r--NEWS4
-rw-r--r--README6
-rwxr-xr-xbuild-aux/gen-lists-of-programs.sh1
-rw-r--r--doc/coreutils.texi328
-rw-r--r--man/.gitignore1
-rw-r--r--man/local.mk1
-rw-r--r--man/numfmt.x4
-rw-r--r--po/POTFILES.in1
-rwxr-xr-xscripts/git-hooks/commit-msg2
-rw-r--r--src/.gitignore1
-rw-r--r--src/numfmt.c1522
-rw-r--r--tests/local.mk1
-rw-r--r--tests/misc/numfmt.pl936
14 files changed, 2803 insertions, 6 deletions
diff --git a/AUTHORS b/AUTHORS
index 552e9d491..df21e90ac 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -52,6 +52,7 @@ nice: David MacKenzie
nl: Scott Bartram, David MacKenzie
nohup: Jim Meyering
nproc: Giuseppe Scrivano
+numfmt: Assaf Gordon
od: Jim Meyering
paste: David M. Ihnat, David MacKenzie
pathchk: Paul Eggert, David MacKenzie, Jim Meyering
diff --git a/NEWS b/NEWS
index 60db55533..f3af75c3e 100644
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,10 @@ GNU coreutils NEWS -*- outline -*-
* Noteworthy changes in release ?.? (????-??-??) [?]
+** New programs
+
+ numfmt: reformat numbers
+
** New features
df now accepts the --output[=FIELD_LIST] option to define the list of columns
diff --git a/README b/README
index 44da8e891..a7cbacffb 100644
--- a/README
+++ b/README
@@ -11,9 +11,9 @@ The programs that can be built with this package are:
csplit cut date dd df dir dircolors dirname du echo env expand expr
factor false fmt fold groups head hostid hostname id install join kill
link ln logname ls md5sum mkdir mkfifo mknod mktemp mv nice nl nohup
- nproc od paste pathchk pinky pr printenv printf ptx pwd readlink realpath
- rm rmdir runcon seq sha1sum sha224sum sha256sum sha384sum sha512sum shred
- shuf sleep sort split stat stdbuf stty sum sync tac tail tee test
+ nproc numfmt od paste pathchk pinky pr printenv printf ptx pwd readlink
+ realpath rm rmdir runcon seq sha1sum sha224sum sha256sum sha384sum sha512sum
+ shred shuf sleep sort split stat stdbuf stty sum sync tac tail tee test
timeout touch tr true truncate tsort tty uname unexpand uniq unlink
uptime users vdir wc who whoami yes
diff --git a/build-aux/gen-lists-of-programs.sh b/build-aux/gen-lists-of-programs.sh
index 212ce027b..bf63ee30a 100755
--- a/build-aux/gen-lists-of-programs.sh
+++ b/build-aux/gen-lists-of-programs.sh
@@ -85,6 +85,7 @@ normal_progs='
nl
nproc
nohup
+ numfmt
od
paste
pathchk
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index e29af8bec..e248af0e2 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -85,6 +85,7 @@
* nl: (coreutils)nl invocation. Number lines and write files.
* nohup: (coreutils)nohup invocation. Immunize to hangups.
* nproc: (coreutils)nproc invocation. Print the number of processors.
+* numfmt: (coreutils)numfmt invocation. Reformat numbers.
* od: (coreutils)od invocation. Dump files in octal, etc.
* paste: (coreutils)paste invocation. Merge lines of files.
* pathchk: (coreutils)pathchk invocation. Check file name portability.
@@ -238,9 +239,12 @@ Output of entire files
Formatting file contents
* fmt invocation:: Reformat paragraph text
+* numfmt invocation:: Reformat numbers
* pr invocation:: Paginate or columnate files for printing
* fold invocation:: Wrap input lines to fit in specified width
+@command{numfmt}: General Options, Units
+
Output of parts of files
* head invocation:: Output the first part of files
@@ -764,8 +768,8 @@ other exit status values and a few associate different
meanings with the values @samp{0} and @samp{1}.
Here are some of the exceptions:
@command{chroot}, @command{env}, @command{expr}, @command{nice},
-@command{nohup}, @command{printenv}, @command{sort}, @command{stdbuf},
-@command{test}, @command{timeout}, @command{tty}.
+@command{nohup}, @command{numfmt}, @command{printenv}, @command{sort},
+@command{stdbuf}, @command{test}, @command{timeout}, @command{tty}.
@node Backup options
@@ -2122,6 +2126,7 @@ These commands reformat the contents of files.
@menu
* fmt invocation:: Reformat paragraph text.
+* numfmt invocation:: Reformat numbers.
* pr invocation:: Paginate or columnate files for printing.
* fold invocation:: Wrap input lines to fit in specified width.
@end menu
@@ -2231,6 +2236,325 @@ leaving the code unchanged.
@exitstatus
+@node numfmt invocation
+@section @command{numfmt}: Reformat numbers
+
+@pindex numfmt
+
+@command{numfmt} reads numbers in various representations and reformats them
+as requested. The most common usage is converting numbers to/from @emph{human}
+representation (e.g. @samp{4G} @expansion{} @samp{4,000,000,000}).
+
+@example
+numfmt [@var{option}]@dots{} [@var{number}]
+@end example
+
+@command{numfmt} converts each @var{number} on the command-line according to the
+specified options (see below). If no @var{number}s are given, it reads numbers
+from standard input. @command{numfmt} can optionally extract numbers from
+specific columns, maintaining proper line padding and alignment.
+
+@exitstatus
+
+See @option{--invalid} for additional information regarding exit status.
+
+@subsection General options
+
+The program accepts the following options. Also see @ref{Common options}.
+
+@table @samp
+
+@item --from=@var{unit}
+@opindex --from
+Auto-scales input numbers according to @var{unit}. See @emph{Units} below.
+The default is no scaling, meaning suffixes (e.g. @samp{M}, @samp{G}) will
+trigger an error.
+
+@item --from-unit=@var{n}
+@opindex --from-unit
+Specify the input unit size (instead of the default 1). Use this option when the
+input numbers represent other units (e.g. if the input number @samp{10}
+represents 10 units of 512 bytes, use @samp{--from=unit=512}).
+
+@item --to=@var{unit}
+@opindex --to
+Auto-scales output numbers according to @var{unit}. See @emph{Units} below.
+The default is no scaling, meaning all the digits of the number are printed.
+
+@item --to-unit=@var{n}
+@opindex --to-unit
+Specify the output unit size (instead of the default 1). Use this option when
+the output numbers represent other units (e.g. to represent @samp{4,000,000}
+bytes in blocks of 1KB, use @samp{--to=si --to=units=1000}).
+
+@item --round=@var{method}
+@opindex --round
+@opindex --round=up
+@opindex --round=down
+@opindex --round=from-zero
+@opindex --round=towards-zero
+@opindex --round=nearest
+When converting number representations, round the number according to
+@var{method}, which can be @samp{up}, @samp{down},
+@samp{from-zero} (the default), @samp{towards-zero}, @samp{nearest}.
+
+@item --suffix=@var{suffix}
+@opindex --suffix
+Add @samp{SUFFIX} to the output numbers, and accept optional @samp{SUFFIX} in
+input numbers.
+
+@item --padding=@var{n}
+@opindex --padding
+Pad the output numbers to @var{n} characters, by adding spaces. If @var{n} is
+a positive number, numbers will be right-aligned. If @var{n} is a negative
+number, numbers will be left-aligned. By default, numbers are automatically
+aligned based on the input line's width (only with the default delimiter).
+
+@item --grouping
+@opindex --grouping
+Group digits in output numbers according to the current locale's grouping rules
+(e.g @emph{Thousands Separator} character, commonly @samp{.} (dot) or @samp{,}
+comma). This option has no effect in @samp{POSIX/C} locale.
+
+@item --header[=@var{n}]
+@opindex --header
+@opindex --header=N
+Print the first @var{n} (default: 1) lines without any conversion.
+
+@item --field=@var{n}
+@opindex --field
+Convert the number in input field @var{n} (default: 1).
+
+@item --format=@var{format}
+@opindex --format
+Use printf-style floating FORMAT string. The @var{format} string must contain
+one @samp{%f} directive, optionally with @samp{'}, @samp{-}, or width
+modifiers. @samp{'} modified will enable @option{--grouping}. @samp{-} modifier
+will enable left-aligned @option{--padding}. Width modifier will enable
+right-aligned @option{--padding}.
+
+@item --invalid=@var{mode}
+@opindex --invalid
+The default action on input errors is to exit immediately with status code 2.
+@option{--invalid=@samp{abort}} explicitly specifies this default mode.
+With a @var{mode} of @samp{fail}, print a warning for @emph{each} conversion
+error, and exit with status 2. With a @var{mode} of @samp{warn}, exit with
+status 0, even in the presence of conversion errors, and with a @var{mode} of
+@samp{ignore} do not even print diagnostics.
+
+@item -d @var{d}
+@itemx --delimiter=@var{d}
+@opindex -d
+@opindex --delimiter
+Use the character @var{d} as input field separator (default: whitespace).
+@emph{Note}: Using non-default delimiter turns off automatic padding.
+
+@item --debug
+@opindex --debug
+Print (to standard error) warning messages about possible erroneous usage.
+
+@end table
+
+@subsection Possible @var{unit}s:
+
+The following are the possible @var{unit} options with @option{--from=UNITS} and
+@option{--to=UNITS}:
+
+@table @var
+
+@item none
+No scaling is performed. For input numbers, no suffixes are accepted, and any
+trailing characters following the number will trigger an error. For output
+numbers, all digits of the numbers will be printed.
+
+@item si
+Auto-scale numbers according to the @emph{International System of Units (SI)}
+standard.
+For input numbers, accept one of the following suffixes.
+For output numbers, values larger than 1000 will be rounded, and printed with
+one of the following suffixes:
+
+@example
+@samp{K} => @math{1000^1 = 10^3} (Kilo)
+@samp{M} => @math{1000^2 = 10^6} (Mega)
+@samp{G} => @math{1000^3 = 10^9} (Giga)
+@samp{T} => @math{1000^4 = 10^{12}} (Tera)
+@samp{P} => @math{1000^5 = 10^{15}} (Peta)
+@samp{E} => @math{1000^6 = 10^{18}} (Exa)
+@samp{Z} => @math{1000^7 = 10^{21}} (Zetta)
+@samp{Y} => @math{1000^8 = 10^{24}} (Yotta)
+@end example
+
+@item iec
+Auto-scale numbers according to the @emph{International Electronical
+Commission (IEC)} standard.
+For input numbers, accept one of the following suffixes.
+For output numbers, values larger than 1024 will be rounded, and printed with
+one of the following suffixes:
+
+@example
+@samp{K} => @math{1024^1 = 2^{10}} (Kibi)
+@samp{M} => @math{1024^2 = 2^{20}} (Mebi)
+@samp{G} => @math{1024^3 = 2^{30}} (Gibi)
+@samp{T} => @math{1024^4 = 2^{40}} (Tebi)
+@samp{P} => @math{1024^5 = 2^{50}} (Pebi)
+@samp{E} => @math{1024^6 = 2^{60}} (Exbi)
+@samp{Z} => @math{1024^7 = 2^{70}} (Zebi)
+@samp{Y} => @math{1024^8 = 2^{80}} (Yobi)
+@end example
+
+The @option{iec} option uses a single letter suffix (e.g. @samp{G}), which is
+not fully standard, as the @emph{iec} standard recommends a two-letter symbol
+(e.g @samp{Gi}) - but in practice, this method common. Compare with
+the @option{iec-i} option.
+
+@item iec-i
+Auto-scale numbers according to the @emph{International Electronical
+Commission (IEC)} standard.
+For input numbers, accept one of the following suffixes.
+For output numbers, values larger than 1024 will be rounded, and printed with
+one of the following suffixes:
+
+@example
+@samp{Ki} => @math{1024^1 = 2^{10}} (Kibi)
+@samp{Mi} => @math{1024^2 = 2^{20}} (Mebi)
+@samp{Gi} => @math{1024^3 = 2^{30}} (Gibi)
+@samp{Ti} => @math{1024^4 = 2^{40}} (Tebi)
+@samp{Pi} => @math{1024^5 = 2^{50}} (Pebi)
+@samp{Ei} => @math{1024^6 = 2^{60}} (Exbi)
+@samp{Zi} => @math{1024^7 = 2^{70}} (Zebi)
+@samp{Yi} => @math{1024^8 = 2^{80}} (Yobi)
+@end example
+
+The @option{iec-i} option uses a two-letter suffix symbol (e.g. @samp{Gi}),
+as the @emph{iec} standard recommends, but this is not always common in
+practice. Compare with the @option{iec} option.
+
+@item auto
+@samp{auto} can only be used with @option{--from}. With this method, numbers
+with @samp{K},@samp{M},@samp{G},@samp{T},@samp{P},@samp{E},@samp{Z},@samp{Y}
+suffixes are interpreted as @emph{SI} values, and numbers with @samp{Ki},
+@samp{Mi},@samp{Gi},@samp{Ti},@samp{Pi},@samp{Ei},@samp{Zi},@samp{Yi} suffixes
+are interpreted as @emph{IEC} values.
+
+@end table
+
+@subsection Examples of using @command{numfmt}
+
+Converting a single number from/to @emph{human} representation:
+@example
+$ nunfmt --to=si 500000
+500K
+
+$ numfmt --to=iec 500000
+489K
+
+$ numfmt --to=iec-i 500000
+489Ki
+
+$ numfmt --from=si 1M
+1000000
+
+$ numfmt --from=iec 1M
+1048576
+
+# with '--from=auto', M=Mega, Mi=Mebi
+$ numfmt --from=auto 1M
+1000000
+$ numfmt --from=auto 1Mi
+1048576
+@end example
+
+Converting from @samp{SI} to @samp{IEC} scales (e.g. when a harddisk capacity is
+advertised as @samp{1TB}, while checking the drive's capacity gives lower
+values):
+
+@example
+$ numfmt --from=si --to=iec 1T
+932G
+@end example
+
+
+Converting a single field from an input file / piped input (these contrived
+examples are for demonstration purposes only, as both @command{ls} and
+@command{df} support the @option{--human-readable} option to
+output sizes in human-readable format):
+
+@example
+# Third field (file size) will be shown in SI representation
+$ ls -log | numfmt --field 3 --header --to=si | head -n4
+-rw-r--r-- 1 94K Aug 23 2011 ABOUT-NLS
+-rw-r--r-- 1 3.7K Jan 7 16:15 AUTHORS
+-rw-r--r-- 1 36K Jun 1 2011 COPYING
+-rw-r--r-- 1 0 Jan 7 15:15 ChangeLog
+
+# Second field (size) will be shown in IEC representation
+$ df --block-size=1 | numfmt --field 2 --header --to=iec | head -n4
+File system 1B-blocks Used Available Use% Mounted on
+rootfs 132G 104741408 26554036 80% /
+tmpfs 794M 7580 804960 1% /run/shm
+/dev/sdb1 694G 651424756 46074696 94% /home
+@end example
+
+
+Output can be tweaked using @option{--padding} or @option{--format}:
+
+@example
+# Pad to 10 characters, right-aligned
+$ du -s * | numfmt --to=si --padding=10
+ 2.5K config.log
+ 108 config.status
+ 1.7K configure
+ 20 configure.ac
+
+# Pad to 10 characters, left-aligned
+$ du -s * | numfmt --to=si --padding=-10
+2.5K config.log
+108 config.status
+1.7K configure
+20 configure.ac
+
+# Pad to 10 characters, left-aligned, using 'format'
+$ du -s * | numfmt --to=si --format="%10f"
+ 2.5K config.log
+ 108 config.status
+ 1.7K configure
+ 20 configure.ac
+
+# Pad to 10 characters, left-aligned, using 'format'
+$ du -s * | numfmt --to=si --padding="%-10f"
+2.5K config.log
+108 config.status
+1.7K configure
+20 configure.ac
+@end example
+
+With locales that support grouping digits, using @option{--grouping} or
+@option{--format} enables grouping. In @samp{POSIX} locale, grouping is silently
+ignored:
+
+@example
+$ LC_ALL=C numfmt --from=iec --grouping 2G
+2147483648
+
+$ LC_ALL=en_US.utf8 numfmt --from=iec --grouping 2G
+2,147,483,648
+
+$ LC_ALL=ta_IN numfmt --from=iec --grouping 2G
+2,14,74,83,648
+
+$ LC_ALL=C ./src/numfmt --from=iec --format="==%'15f==" 2G
+== 2147483648==
+
+$ LC_ALL=en_US.utf8 ./src/numfmt --from=iec --format="==%'15f==" 2G
+== 2,147,483,648==
+
+$ LC_ALL=en_US.utf8 ./src/numfmt --from=iec --format="==%'-15f==" 2G
+==2,147,483,648 ==
+
+$ LC_ALL=ta_IN ./src/numfmt --from=iec --format="==%'15f==" 2G
+== 2,14,74,83,648==
+@end example
@node pr invocation
@section @command{pr}: Paginate or columnate files for printing
diff --git a/man/.gitignore b/man/.gitignore
index aa6fa5133..aef400292 100644
--- a/man/.gitignore
+++ b/man/.gitignore
@@ -49,6 +49,7 @@ nice.1
nl.1
nohup.1
nproc.1
+numfmt.1
od.1
paste.1
pathchk.1
diff --git a/man/local.mk b/man/local.mk
index 7422acbf3..b5e6e21da 100644
--- a/man/local.mk
+++ b/man/local.mk
@@ -115,6 +115,7 @@ man/nice.1: src/nice
man/nl.1: src/nl
man/nohup.1: src/nohup
man/nproc.1: src/nproc
+man/numfmt.1: src/numfmt
man/od.1: src/od
man/paste.1: src/paste
man/pathchk.1: src/pathchk
diff --git a/man/numfmt.x b/man/numfmt.x
new file mode 100644
index 000000000..6deaff1c8
--- /dev/null
+++ b/man/numfmt.x
@@ -0,0 +1,4 @@
+[NAME]
+numfmt \- Convert numbers from/to human-readable strings
+[DESCRIPTION]
+.\" Add any additional description here
diff --git a/po/POTFILES.in b/po/POTFILES.in
index 5a11ef657..21617cc7c 100644
--- a/po/POTFILES.in
+++ b/po/POTFILES.in
@@ -91,6 +91,7 @@ src/nice.c
src/nl.c
src/nohup.c
src/nproc.c
+src/numfmt.c
src/od.c
src/operand2sig.c
src/paste.c
diff --git a/scripts/git-hooks/commit-msg b/scripts/git-hooks/commit-msg
index 56286d486..173210380 100755
--- a/scripts/git-hooks/commit-msg
+++ b/scripts/git-hooks/commit-msg
@@ -16,7 +16,7 @@ my @valid = qw(
cp csplit cut date dd df dir dircolors dirname du echo env expand
expr factor false fmt fold groups head hostid hostname id install
join kill link ln logname ls md5sum mkdir mkfifo mknod mktemp
- mv nice nl nohup nproc od paste pathchk pinky pr printenv printf
+ mv nice nl nohup nproc numfmt od paste pathchk pinky pr printenv printf
ptx pwd readlink realpath rm rmdir runcon seq sha1sum sha224sum sha256sum
sha384sum sha512sum shred shuf sleep sort split stat stdbuf stty
sum sync tac tail tee test timeout touch tr true truncate tsort
diff --git a/src/.gitignore b/src/.gitignore
index 18cccc1d7..25573dfa2 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -59,6 +59,7 @@ nice
nl
nohup
nproc
+numfmt
od
paste
pathchk
diff --git a/src/numfmt.c b/src/numfmt.c
new file mode 100644
index 000000000..5929dbab5
--- /dev/null
+++ b/src/numfmt.c
@@ -0,0 +1,1522 @@
+/* Reformat numbers like 11505426432 to the more human-readable 11G
+ Copyright (C) 2012 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+#include <float.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <langinfo.h>
+
+#include "mbsalign.h"
+#include "argmatch.h"
+#include "error.h"
+#include "quote.h"
+#include "system.h"
+#include "xstrtol.h"
+#include "xstrndup.h"
+
+/* The official name of this program (e.g., no 'g' prefix). */
+#define PROGRAM_NAME "numfmt"
+
+#define AUTHORS proper_name ("Assaf Gordon")
+
+/* Exit code when some numbers fail to convert. */
+enum { EXIT_CONVERSION_WARNINGS = 2 };
+
+enum
+{
+ FROM_OPTION = CHAR_MAX + 1,
+ FROM_UNIT_OPTION,
+ TO_OPTION,
+ TO_UNIT_OPTION,
+ ROUND_OPTION,
+ SUFFIX_OPTION,
+ GROUPING_OPTION,
+ PADDING_OPTION,
+ FIELD_OPTION,
+ DEBUG_OPTION,
+ DEV_DEBUG_OPTION,
+ HEADER_OPTION,
+ FORMAT_OPTION,
+ INVALID_OPTION
+};
+
+enum scale_type
+{
+ scale_none, /* the default: no scaling. */
+ scale_auto, /* --from only. */
+ scale_SI,
+ scale_IEC,
+ scale_IEC_I /* 'i' suffix is required. */
+};
+
+static char const *const scale_from_args[] =
+{
+ "none", "auto", "si", "iec", "iec-i", NULL
+};
+
+static enum scale_type const scale_from_types[] =
+{
+ scale_none, scale_auto, scale_SI, scale_IEC, scale_IEC_I
+};
+
+static char const *const scale_to_args[] =
+{
+ "none", "si", "iec", "iec-i", NULL
+};
+
+static enum scale_type const scale_to_types[] =
+{
+ scale_none, scale_SI, scale_IEC, scale_IEC_I
+};
+
+
+enum round_type
+{
+ round_ceiling,
+ round_floor,
+ round_from_zero,
+ round_to_zero,
+ round_nearest,
+};
+
+static char const *const round_args[] =
+{
+ "up", "down", "from-zero", "towards-zero", "nearest", NULL
+};
+
+static enum round_type const round_types[] =
+{
+ round_ceiling, round_floor, round_from_zero, round_to_zero, round_nearest
+};
+
+
+enum inval_type
+{
+ inval_abort,
+ inval_fail,
+ inval_warn,
+ inval_ignore
+};
+
+static char const *const inval_args[] =
+{
+ "abort", "fail", "warn", "ignore", NULL
+};
+
+static enum inval_type const inval_types[] =
+{
+ inval_abort, inval_fail, inval_warn, inval_ignore
+};
+
+static struct option const longopts[] =
+{
+ {"from", required_argument, NULL, FROM_OPTION},
+ {"from-unit", required_argument, NULL, FROM_UNIT_OPTION},
+ {"to", required_argument, NULL, TO_OPTION},
+ {"to-unit", required_argument, NULL, TO_UNIT_OPTION},
+ {"round", required_argument, NULL, ROUND_OPTION},
+ {"padding", required_argument, NULL, PADDING_OPTION},
+ {"suffix", required_argument, NULL, SUFFIX_OPTION},
+ {"grouping", no_argument, NULL, GROUPING_OPTION},
+ {"delimiter", required_argument, NULL, 'd'},
+ {"field", required_argument, NULL, FIELD_OPTION},
+ {"debug", no_argument, NULL, DEBUG_OPTION},
+ {"-devdebug", no_argument, NULL, DEV_DEBUG_OPTION},
+ {"header", optional_argument, NULL, HEADER_OPTION},
+ {"format", required_argument, NULL, FORMAT_OPTION},
+ {"invalid", required_argument, NULL, INVALID_OPTION},
+ {GETOPT_HELP_OPTION_DECL},
+ {GETOPT_VERSION_OPTION_DECL},
+ {NULL, 0, NULL, 0}
+};
+
+/* If delimiter has this value, blanks separate fields. */
+enum { DELIMITER_DEFAULT = CHAR_MAX + 1 };
+
+/* Maximum number of digits we can safely handle
+ without precision loss, if scaling is 'none'. */
+enum { MAX_UNSCALED_DIGITS = 18 };
+
+/* Maximum number of digits we can work with.
+ This is equivalent to 999Y.
+ NOTE: 'long double' can handle more than that, but there's
+ no official suffix assigned beyond Yotta (1000^8). */
+enum { MAX_ACCEPTABLE_DIGITS = 27 };
+
+static enum scale_type scale_from = scale_none;
+static enum scale_type scale_to = scale_none;
+static enum round_type _round = round_from_zero;
+static enum inval_type _invalid = inval_abort;
+static const char *suffix = NULL;
+static uintmax_t from_unit_size = 1;
+static uintmax_t to_unit_size = 1;
+static int grouping = 0;
+static char *padding_buffer = NULL;
+static size_t padding_buffer_size = 0;
+static long int padding_width = 0;
+static const char *format_str = NULL;
+static char *format_str_prefix = NULL;
+static char *format_str_suffix = NULL;
+
+/* By default, any conversion error will terminate the program. */
+static int conv_exit_code = EXIT_CONVERSION_WARNINGS;
+
+
+/* auto-pad each line based on skipped whitespace. */
+static int auto_padding = 0;
+static mbs_align_t padding_alignment = MBS_ALIGN_RIGHT;
+static long int field = 1;
+static int delimiter = DELIMITER_DEFAULT;
+
+/* if non-zero, the first 'header' lines from STDIN are skipped. */
+static uintmax_t header = 0;
+
+/* Debug for users: print warnings to STDERR about possible
+ error (similar to sort's debug). */
+static int debug = 0;
+
+/* debugging for developers - to be removed in final version? */
+static int dev_debug = 0;
+
+/* will be set according to the current locale. */
+static const char *decimal_point;
+static int decimal_point_length;
+
+
+static inline int
+default_scale_base (enum scale_type scale)
+{
+ switch (scale)
+ {
+ case scale_IEC:
+ case scale_IEC_I:
+ return 1024;
+
+ case scale_none:
+ case scale_auto:
+ case scale_SI:
+ default:
+ return 1000;
+ }
+}
+
+static inline int
+valid_suffix (const char suf)
+{
+ static const char *valid_suffixes = "KMGTPEZY";
+ return (strchr (valid_suffixes, suf) != NULL);
+}
+
+static inline int
+suffix_power (const char suf)
+{
+ switch (suf)
+ {
+ case 'K': /* kilo or kibi. */
+ return 1;
+
+ case 'M': /* mega or mebi. */
+ return 2;
+
+ case 'G': /* giga or gibi. */
+ return 3;
+
+ case 'T': /* tera or tebi. */
+ return 4;
+
+ case 'P': /* peta or pebi. */
+ return 5;
+
+ case 'E': /* exa or exbi. */
+ return 6;
+
+ case 'Z': /* zetta or 2**70. */
+ return 7;
+
+ case 'Y': /* yotta or 2**80. */
+ return 8;
+
+ default: /* should never happen. assert? */
+ return 0;
+ }
+}
+
+static inline const char *
+suffix_power_character (unsigned int power)
+{
+ switch (power)
+ {
+ case 0:
+ return "";
+
+ case 1:
+ return "K";
+
+ case 2:
+ return "M";
+
+ case 3:
+ return "G";
+
+ case 4:
+ return "T";
+
+ case 5:
+ return "P";
+
+ case 6:
+ return "E";
+
+ case 7:
+ return "Z";
+
+ case 8:
+ return "Y";
+
+ default:
+ return "(error)";
+ }
+}
+
+/* Similar to 'powl(3)' but without requiring 'libm'. */
+static long double
+powerld (long double base, unsigned int x)
+{
+ long double result = base;
+ if (x == 0)
+ return 1; /* note for test coverage: this is never
+ reached, as 'powerld' won't be called if
+ there's no suffix, hence, no "power". */
+
+ /* TODO: check for overflow, inf? */
+ while (--x)
+ result *= base;
+ return result;
+}
+
+/* Similar to 'fabs(3)' but without requiring 'libm'. */
+static inline long double
+absld (long double val)
+{
+ return val < 0 ? -val : val;
+}
+
+/* Scale down 'val', returns 'updated val' and 'x', such that
+ val*base^X = original val
+ Similar to "frexpl(3)" but without requiring 'libm',
+ allowing only integer scale, limited functionality and error checking. */
+static long double
+expld (long double val, unsigned int base, unsigned int /*output */ *x)
+{
+ unsigned int power = 0;
+
+ if (val >= -LDBL_MAX && val <= LDBL_MAX)
+ {
+ while (absld (val) >= base)
+ {
+ ++power;
+ val /= base;
+ }
+ }
+ if (x)
+ *x = power;
+ return val;
+}
+
+/* EXTREMELY limited 'ceil' - without 'libm'.
+ Assumes values that fit in intmax_t. */
+static inline intmax_t
+simple_round_ceiling (long double val)
+{
+ intmax_t intval = val;
+ if (intval < val)
+ intval++;
+ return intval;
+}
+
+/* EXTREMELY limited 'floor' - without 'libm'.
+ Assumes values that fit in intmax_t. */
+static inline intmax_t
+simple_round_floor (long double val)
+{
+ return -simple_round_ceiling (-val);
+}
+
+/* EXTREMELY limited 'round away from zero'.
+ Assumes values that fit in intmax_t. */
+static inline intmax_t
+simple_round_from_zero (long double val)
+{
+ return val < 0 ? simple_round_floor (val) : simple_round_ceiling (val);
+}
+
+/* EXTREMELY limited 'round away to zero'.
+ Assumes values that fit in intmax_t. */
+static inline intmax_t
+simple_round_to_zero (long double val)
+{
+ return val;
+}
+
+/* EXTREMELY limited 'round' - without 'libm'.
+ Assumes values that fit in intmax_t. */
+static inline intmax_t
+simple_round_nearest (long double val)
+{
+ return val < 0 ? val - 0.5 : val + 0.5;
+}
+
+static inline intmax_t
+simple_round (long double val, enum round_type t)
+{
+ switch (t)
+ {
+ case round_ceiling:
+ return simple_round_ceiling (val);
+
+ case round_floor:
+ return simple_round_floor (val);
+
+ case round_from_zero:
+ return simple_round_from_zero (val);
+
+ case round_to_zero:
+ return simple_round_to_zero (val);
+
+ case round_nearest:
+ return simple_round_nearest (val);
+
+ default:
+ /* to silence the compiler - this should never happen. */
+ return 0;
+ }
+}
+
+enum simple_strtod_error
+{
+ SSE_OK = 0,
+ SSE_OK_PRECISION_LOSS,
+ SSE_OVERFLOW,
+ SSE_INVALID_NUMBER,
+
+ /* the following are returned by 'simple_strtod_human'. */
+ SSE_VALID_BUT_FORBIDDEN_SUFFIX,
+ SSE_INVALID_SUFFIX,
+ SSE_MISSING_I_SUFFIX
+};
+
+/* Read an *integer* INPUT_STR,
+ but return the integer value in a 'long double' VALUE
+ hence, no UINTMAX_MAX limitation.
+ NEGATIVE is updated, and is stored separately from the VALUE
+ so that signbit() isn't required to determine the sign of -0..
+ ENDPTR is required (unlike strtod) and is used to store a pointer
+ to the character after the last character used in the conversion.
+
+ Note locale'd grouping is not supported,
+ nor is skipping of white-space supported.
+
+ Returns:
+ SSE_OK - valid number.
+ SSE_OK_PRECISION_LOSS - if more than 18 digits were used.
+ SSE_OVERFLOW - if more than 27 digits (999Y) were used.
+ SSE_INVALID_NUMBER - if no digits were found. */
+static enum simple_strtod_error
+simple_strtod_int (const char *input_str,
+ char **endptr, long double *value, bool *negative)
+{
+ enum simple_strtod_error e = SSE_OK;
+
+ long double val = 0;
+ unsigned int digits = 0;
+
+ if (*input_str == '-')
+ {
+ input_str++;
+ *negative = true;
+ }
+ else
+ *negative = false;
+
+ *endptr = (char *) input_str;
+ while (*endptr && isdigit (**endptr))
+ {
+ int digit = (**endptr) - '0';
+
+ /* can this happen in some strange locale? */
+ if (digit < 0 || digit > 9)
+ return SSE_INVALID_NUMBER;
+
+ if (digits > MAX_UNSCALED_DIGITS)
+ e = SSE_OK_PRECISION_LOSS;
+
+ ++digits;
+ if (digits > MAX_ACCEPTABLE_DIGITS)
+ return SSE_OVERFLOW;
+
+ val *= 10;
+ val += digit;
+
+ ++(*endptr);
+ }
+ if (digits == 0)
+ return SSE_INVALID_NUMBER;
+ if (*negative)
+ val = -val;
+
+ if (value)
+ *value = val;
+
+ return e;
+}
+
+/* Read a floating-point INPUT_STR represented as "NNNN[.NNNNN]",
+ and return the value in a 'long double' VALUE.
+ ENDPTR is required (unlike strtod) and is used to store a pointer
+ to the character after the last character used in the conversion.
+ PRECISION is optional and used to indicate fractions are present.
+
+ Note locale'd grouping is not supported,
+ nor is skipping of white-space supported.
+
+ Returns:
+ SSE_OK - valid number.
+ SSE_OK_PRECISION_LOSS - if more than 18 digits were used.
+ SSE_OVERFLOW - if more than 27 digits (999Y) were used.
+ SSE_INVALID_NUMBER - if no digits were found. */
+static enum simple_strtod_error
+simple_strtod_float (const char *input_str,
+ char **endptr,
+ long double *value,
+ size_t *precision)
+{
+ bool negative;
+ enum simple_strtod_error e = SSE_OK;
+
+ if (precision)
+ *precision = 0;
+
+ /* TODO: accept locale'd grouped values for the integral part. */
+ e = simple_strtod_int (input_str, endptr, value, &negative);
+ if (e != SSE_OK && e != SSE_OK_PRECISION_LOSS)
+ return e;
+
+
+ /* optional decimal point + fraction. */
+ if (STREQ_LEN (*endptr, decimal_point, decimal_point_length))
+ {
+ char *ptr2;
+ long double val_frac = 0;
+ bool neg_frac;
+
+ (*endptr) += decimal_point_length;
+ enum simple_strtod_error e2 =
+ simple_strtod_int (*endptr, &ptr2, &val_frac, &neg_frac);
+ if (e2 != SSE_OK && e2 != SSE_OK_PRECISION_LOSS)
+ return e2;
+ if (e2 == SSE_OK_PRECISION_LOSS)
+ e = e2; /* propagate warning. */
+ if (neg_frac)
+ return SSE_INVALID_NUMBER;
+
+ /* number of digits in the fractions. */
+ size_t exponent = ptr2 - *endptr;
+
+ val_frac = ((long double) val_frac) / powerld (10, exponent);
+
+ if (value)
+ {
+ if (negative)
+ *value -= val_frac;
+ else
+ *value += val_frac;
+ }
+
+ if (precision)
+ *precision = exponent;
+
+ *endptr = ptr2;
+ }
+ return e;
+}
+
+/* Read a 'human' INPUT_STR represented as "NNNN[.NNNNN] + suffix",
+ and return the value in a 'long double' VALUE,
+ with the precision of the input returned in PRECISION.
+ ENDPTR is required (unlike strtod) and is used to store a pointer
+ to the character after the last character used in the conversion.
+ ALLOWED_SCALING determines the scaling supported.
+
+ TODO:
+ support locale'd grouping
+ accept scentific and hex floats (probably use strtold directly)
+
+ Returns:
+ SSE_OK - valid number.
+ SSE_OK_PRECISION_LOSS - if more than 18 digits were used.
+ SSE_OVERFLOW - if more than 27 digits (999Y) were used.
+ SSE_INVALID_NUMBER - if no digits were found.
+ SSE_VALID_BUT_FORBIDDEN_SUFFIX
+ SSE_INVALID_SUFFIX
+ SSE_MISSING_I_SUFFIX */
+static enum simple_strtod_error
+simple_strtod_human (const char *input_str,
+ char **endptr, long double *value, size_t *precision,
+ enum scale_type allowed_scaling)
+{
+ int power = 0;
+ /* 'scale_auto' is checked below. */
+ int scale_base = default_scale_base (allowed_scaling);
+
+ if (dev_debug)
+ error (0, 0, _("simple_strtod_human:\n input string: '%s'\n "
+ "locale decimal-point: '%s'\n"), input_str, decimal_point);
+
+ enum simple_strtod_error e =
+ simple_strtod_float (input_str, endptr, value, precision);
+ if (e != SSE_OK && e != SSE_OK_PRECISION_LOSS)
+ return e;
+
+ if (dev_debug)
+ error (0, 0, _(" parsed numeric value: %Lf\n"
+ " input precision = %d\n"), *value, (int)*precision);
+
+ if (**endptr != '\0')
+ {
+ /* process suffix. */
+
+ /* Skip any blanks between the number and suffix. */
+ while (isblank (**endptr))
+ (*endptr)++;
+
+ if (!valid_suffix (**endptr))
+ return SSE_INVALID_SUFFIX;
+
+ if (allowed_scaling == scale_none)
+ return SSE_VALID_BUT_FORBIDDEN_SUFFIX;
+
+ power = suffix_power (**endptr);
+ (*endptr)++; /* skip first suffix character. */
+
+ if (allowed_scaling == scale_auto && **endptr == 'i')
+ {
+ /* auto-scaling enabled, and the first suffix character
+ is followed by an 'i' (e.g. Ki, Mi, Gi). */
+ scale_base = 1024;
+ (*endptr)++; /* skip second ('i') suffix character. */
+ if (dev_debug)
+ error (0, 0, _(" Auto-scaling, found 'i', switching to base %d\n"),
+ scale_base);
+ }
+
+ *precision = 0; /* Reset, to select precision based on scale. */
+ }
+
+ if (allowed_scaling == scale_IEC_I)
+ {
+ if (**endptr == 'i')
+ (*endptr)++;
+ else
+ return SSE_MISSING_I_SUFFIX;
+ }
+
+ long double multiplier = powerld (scale_base, power);
+
+ if (dev_debug)
+ error (0, 0, _(" suffix power=%d^%d = %Lf\n"),
+ scale_base, power, multiplier);
+
+ /* TODO: detect loss of precision and overflows. */
+ (*value) = (*value) * multiplier;
+
+ if (dev_debug)
+ error (0, 0, _(" returning value: %Lf (%LG)\n"), *value, *value);
+
+ return e;
+}
+
+
+static void
+simple_strtod_fatal (enum simple_strtod_error err, char const *input_str)
+{
+ char const *msgid = NULL;
+
+ switch (err)
+ {
+ case SSE_OK_PRECISION_LOSS:
+ case SSE_OK:
+ /* should never happen - this function isn't called when OK. */
+ abort ();
+
+ case SSE_OVERFLOW:
+ msgid = N_("value too large to be converted: '%s'");
+ break;
+
+ case SSE_INVALID_NUMBER:
+ msgid = N_("invalid number: '%s'");
+ break;
+
+ case SSE_VALID_BUT_FORBIDDEN_SUFFIX:
+ msgid = N_("rejecting suffix in input: '%s' (consider using --from)");
+ break;
+
+ case SSE_INVALID_SUFFIX:
+ msgid = N_("invalid suffix in input: '%s'");
+ break;
+
+ case SSE_MISSING_I_SUFFIX:
+ msgid = N_("missing 'i' suffix in input: '%s' (e.g Ki/Mi/Gi)");
+ break;
+
+ }
+
+ if (_invalid != inval_ignore)
+ error (conv_exit_code, 0, gettext (msgid), input_str);
+}
+
+/* Convert VAL to a human format string in BUF. */
+static void
+double_to_human (long double val, int precision,
+ char *buf, size_t buf_size,
+ enum scale_type scale, int group, enum round_type round)
+{
+ if (dev_debug)
+ error (0, 0, _("double_to_human:\n"));
+
+ if (scale == scale_none)
+ {
+ val *= powerld (10, precision);
+ val = simple_round (val, round);
+ val /= powerld (10, precision);
+
+ if (dev_debug)
+ error (0, 0,
+ (group) ?
+ _(" no scaling, returning (grouped) value: %'.*Lf\n") :
+ _(" no scaling, returning value: %.*Lf\n"), precision, val);
+
+ int i = snprintf (buf, buf_size, (group) ? "%'.*Lf" : "%.*Lf",
+ precision, val);
+ if (i < 0 || i >= (int) buf_size)
+ error (EXIT_FAILURE, 0,
+ _("failed to prepare value '%Lf' for printing"), val);
+ return;
+ }
+
+ /* Scaling requested by user. */
+ double scale_base = default_scale_base (scale);
+
+ /* Normalize val to scale. */
+ unsigned int power = 0;
+ val = expld (val, scale_base, &power);
+ if (dev_debug)
+ error (0, 0, _(" scaled value to %Lf * %0.f ^ %d\n"),
+ val, scale_base, power);
+
+ /* Perform rounding. */
+ int ten_or_less = 0;
+ if (absld (val) < 10)
+ {
+ /* for values less than 10, we allow one decimal-point digit,
+ so adjust before rounding. */
+ ten_or_less = 1;
+ val *= 10;
+ }
+ val = simple_round (val, round);
+ /* two special cases after rounding:
+ 1. a "999.99" can turn into 1000 - so scale down
+ 2. a "9.99" can turn into 10 - so don't display decimal-point. */
+ if (absld (val) >= scale_base)
+ {
+ val /= scale_base;
+ power++;
+ }
+ if (ten_or_less)
+ val /= 10;
+
+ /* should "7.0" be printed as "7" ?
+ if removing the ".0" is preferred, enable the fourth condition. */
+ int show_decimal_point = (val != 0) && (absld (val) < 10) && (power > 0);
+ /* && (absld (val) > simple_round_floor (val))) */
+
+ if (dev_debug)
+ error (0, 0, _(" after rounding, value=%Lf * %0.f ^ %d\n"),
+ val, scale_base, power);
+
+ snprintf (buf, buf_size, (show_decimal_point) ? "%.1Lf%s" : "%.0Lf%s",
+ val, suffix_power_character (power));
+
+ if (scale == scale_IEC_I && power > 0)
+ strncat (buf, "i", buf_size - strlen (buf) - 1);
+
+ if (dev_debug)
+ error (0, 0, _(" returning value: '%s'\n"), buf);
+
+ return;
+}
+
+/* Convert a string of decimal digits, N_STRING, with an optional suffix
+ to an integral value. Upon successful conversion, return that value.
+ If it cannot be converted, give a diagnostic and exit. */
+static uintmax_t
+unit_to_umax (const char *n_string)
+{
+ strtol_error s_err;
+ char *end = NULL;
+ uintmax_t n;
+
+ s_err = xstrtoumax (n_string, &end, 10, &n, "KMGTPEZY");
+
+ if (s_err != LONGINT_OK || *end || n == 0)
+ error (EXIT_FAILURE, 0, _("invalid unit size: '%s'"), n_string);
+
+ return n;
+}
+
+
+static void
+setup_padding_buffer (size_t min_size)
+{
+ if (padding_buffer_size > min_size)
+ return;
+
+ padding_buffer_size = min_size + 1;
+ padding_buffer = realloc (padding_buffer, padding_buffer_size);
+ if (!padding_buffer)
+ error (EXIT_FAILURE, 0, _("out of memory (requested %zu bytes)"),
+ padding_buffer_size);
+}
+
+void
+usage (int status)
+{
+ if (status != EXIT_SUCCESS)
+ emit_try_help ();
+ else
+ {
+ printf (_("\
+Usage: %s [OPTIONS] [NUMBER]\n\
+"), program_name);
+ fputs (_("\
+Reformat NUMBER(s) from stdin or command arguments.\n\
+"), stdout);
+ emit_mandatory_arg_note ();
+ fputs (_("\
+ --from=UNIT auto-scale input numbers to UNITs. Default is 'none'.\n\
+ See UNIT below.\n\
+ --from-unit=N specify the input unit size (instead of the default 1).\n\
+ --to=UNIT auto-scale output numbers to UNITs.\n\
+ See UNIT below.\n\
+ --to-unit=N the output unit size (instead of the default 1).\n\
+ --round=METHOD the rounding method to use when scaling. METHOD can be:\n\
+ up, down, from-zero (default), towards-zero, nearest\n\
+ --suffix=SUFFIX add SUFFIX to output numbers, and accept optional SUFFIX\n\
+ in input numbers.\n\
+ --padding=N pad the output to N characters.\n\
+ Positive N will right-aligned. Negative N will left-align.\n\
+ Note: if the output is wider than N, padding is ignored.\n\
+ Default is to automatically pad if whitespace is found.\n\
+ --grouping group digits together (e.g. 1,000,000).\n\
+ Uses the locale-defined grouping (i.e. have no effect\n\
+ in C/POSIX locales).\n\
+ --header[=N] print (without converting) the first N header lines.\n\
+ N defaults to 1 if not specified.\n\
+ --field N replace the number in input field N (default is 1)\n\
+ -d, --delimiter=X use X instead of whitespace for field delimiter\n\
+ --format=FORMAT use printf style floating-point FORMAT.\n\
+ See FORMAT below for details.\n\
+ --invalid=MODE failure mode for invalid numbers: MODE can be:\n\
+ abort (the default), fail, warn, ignore.\n\
+ --debug print warnings about invalid input.\n\
+ \n\
+"), stdout);
+ fputs (HELP_OPTION_DESCRIPTION, stdout);
+ fputs (VERSION_OPTION_DESCRIPTION, stdout);
+
+
+ fputs (_("\
+\n\
+UNIT options:\n\
+ none No auto-scaling is done. Suffixes will trigger an error.\n\
+ auto Accept optional single-letter/two-letter suffix:\n\
+ 1K = 1000\n\
+ 1Ki = 1024\n\
+ 1G = 1000000\n\
+ 1Gi = 1048576\n\
+ si Accept optional single letter suffix:\n\
+ 1K = 1000\n\
+ 1G = 1000000\n\
+ ...\n\
+ iec Accept optional single letter suffix:\n\
+ 1K = 1024\n\
+ 1G = 1048576\n\
+ ...\n\
+ iec-i Accept optional two-letter suffix:\n\
+ 1Ki = 1024\n\
+ 1Gi = 1048576\n\
+ ...\n\
+\n\
+"), stdout);
+
+ fputs (_("\
+\n\
+FORMAT must be suitable for printing one floating-point argument '%f'.\n\
+Optional quote (%'f) will enable --grouping (if supported by current locale).\n\
+Optional width value (%10f) will pad output. Optional negative width values\n\
+(%-10f) will left-pad output.\n\
+\n\
+"), stdout);
+
+ printf (_("\
+\n\
+Exit status is 0 if all input numbers were successfully converted.\n\
+By default, %s will stop at the first conversion error with exit status 2.\n\
+With --invalid='fail' a warning is printed for each conversion error\n\
+and the exit status is 2. With --invalid='warn' each conversion error is\n\
+diagnosed, but the exit status is 0. With --invalid='ignore' conversion\n\
+errors are not diagnosed and the exit status is 0.\n\
+\n\
+"), program_name);
+
+
+
+ printf (_("\
+\n\
+Examples:\n\
+ $ %s --to=si 1000\n\
+ -> \"1.0K\"\n\
+ $ %s --to=iec 2048\n\
+ -> \"2.0K\"\n\
+ $ %s --to=iec-i 4096\n\
+ -> \"4.0Ki\"\n\
+ $ echo 1K | %s --from=si\n\
+ -> \"1000\"\n\
+ $ echo 1K | %s --from=iec\n\
+ -> \"1024\"\n\
+ $ df | %s --header --field 2 --to=si\n\
+ $ ls -l | %s --header --field 5 --to=iec\n\
+ $ ls -lh | %s --header --field 5 --from=iec --padding=10\n\
+ $ ls -lh | %s --header --field 5 --from=iec --format %%10f\n\
+"),
+ program_name, program_name, program_name,
+ program_name, program_name, program_name,
+ program_name, program_name, program_name);
+ emit_ancillary_info ();
+ }
+ exit (status);
+}
+
+/* Given 'fmt' (a printf(3) compatible format string), extracts the following:
+ 1. padding (e.g. %20f)
+ 2. alignment (e.g. %-20f)
+ 3. grouping (e.g. %'f)
+
+ Only a limited subset of printf(3) syntax is supported.
+
+ TODO:
+ support .precision
+ support %e %g etc. rather than just %f
+
+ NOTES:
+ 1. This function sets the global variables:
+ padding_width, padding_alignment, grouping,
+ format_str_prefix, format_str_suffix
+ 2. The function aborts on any errors. */
+static void
+parse_format_string (char const *fmt)
+{
+ size_t i;
+ size_t prefix_len = 0;
+ size_t suffix_pos;
+ long int pad = 0;
+ char *endptr = NULL;
+
+ for (i = 0; !(fmt[i] == '%' && fmt[i + 1] != '%'); i += (fmt[i] == '%') + 1)
+ {
+ if (!fmt[i])
+ error (EXIT_FAILURE, 0,
+ _("format %s has no %% directive"), quote (fmt));
+ prefix_len++;
+ }
+
+ i++;
+ i += strspn (fmt + i, " ");
+ if (fmt[i] == '\'')
+ {
+ grouping = 1;
+ i++;
+ }
+ i += strspn (fmt + i, " ");
+ errno = 0;
+ pad = strtol (fmt + i, &endptr, 10);
+ if (errno != 0)
+ error (EXIT_FAILURE, 0,
+ _("invalid format %s (width overflow)"), quote (fmt));
+
+ if (endptr != (fmt + i) && pad != 0)
+ {
+ if (pad < 0)
+ {
+ padding_alignment = MBS_ALIGN_LEFT;
+ padding_width = -pad;
+ }
+ else
+ {
+ padding_width = pad;
+ }
+ }
+ i = endptr - fmt;
+
+ if (fmt[i] == '\0')
+ error (EXIT_FAILURE, 0, _("format %s ends in %%"), quote (fmt));
+
+ if (fmt[i] != 'f')
+ error (EXIT_FAILURE, 0, _("invalid format %s,"
+ " directive must be %%['][-][N]f"),
+ quote (fmt));
+ i++;
+ suffix_pos = i;
+
+ for (; fmt[i] != '\0'; i += (fmt[i] == '%') + 1)
+ if (fmt[i] == '%' && fmt[i + 1] != '%')
+ error (EXIT_FAILURE, 0, _("format %s has too many %% directives"),
+ quote (fmt));
+
+ if (prefix_len)
+ {
+ format_str_prefix = xstrndup (fmt, prefix_len);
+ if (!format_str_prefix)
+ error (EXIT_FAILURE, 0, _("out of memory (requested %zu bytes)"),
+ prefix_len + 1);
+ }
+ if (fmt[suffix_pos] != '\0')
+ {
+ format_str_suffix = strdup (fmt + suffix_pos);
+ if (!format_str_suffix)
+ error (EXIT_FAILURE, 0, _("out of memory (requested %zu bytes)"),
+ strlen (fmt + suffix_pos));
+ }
+
+ if (dev_debug)
+ error (0, 0, _("format String:\n input: %s\n grouping: %s\n"
+ " padding width: %zu\n alignment: %s\n"
+ " prefix: '%s'\n suffix: '%s'\n"),
+ quote (fmt), (grouping) ? "yes" : "no",
+ padding_width,
+ (padding_alignment == MBS_ALIGN_LEFT) ? "Left" : "Right",
+ format_str_prefix, format_str_suffix);
+}
+
+/* Parse a numeric value (with optional suffix) from a string.
+ Returns a long double value, with input precision.
+
+ If there's an error converting the string to value - exits with
+ an error.
+
+ If there are any trailing characters after the number
+ (besides a valid suffix) - exits with an error. */
+static enum simple_strtod_error
+parse_human_number (const char *str, long double /*output */ *value,
+ size_t *precision)
+{
+ char *ptr = NULL;
+
+ enum simple_strtod_error e =
+ simple_strtod_human (str, &ptr, value, precision, scale_from);
+ if (e != SSE_OK && e != SSE_OK_PRECISION_LOSS)
+ {
+ simple_strtod_fatal (e, str);
+ return e;
+ }
+
+ if (ptr && *ptr != '\0')
+ {
+ if (_invalid != inval_ignore)
+ error (conv_exit_code, 0, _("invalid suffix in input '%s': '%s'"),
+ str, ptr);
+ e = SSE_INVALID_SUFFIX;
+ }
+ return e;
+}
+
+
+/* Print the given VAL, using the requested representation.
+ The number is printed to STDOUT, with padding and alignment. */
+static int
+prepare_padded_number (const long double val, size_t precision)
+{
+ /* Generate Output. */
+ char buf[128];
+
+ /* Can't reliably print too-large values without auto-scaling. */
+ unsigned int x;
+ expld (val, 10, &x);
+ if (scale_to == scale_none && x > MAX_UNSCALED_DIGITS)
+ {
+ if (_invalid != inval_ignore)
+ error (conv_exit_code, 0, _("value too large to be printed: '%Lg'"
+ " (consider using --to)"), val);
+ return 0;
+ }
+
+ if (x > MAX_ACCEPTABLE_DIGITS - 1)
+ {
+ if (_invalid != inval_ignore)
+ error (conv_exit_code, 0, _("value too large to be printed: '%Lg'"
+ " (cannot handle values > 999Y)"), val);
+ return 0;
+ }
+
+ double_to_human (val, precision, buf, sizeof (buf), scale_to, grouping,
+ _round);
+ if (suffix)
+ strncat (buf, suffix, sizeof (buf) - strlen (buf) -1);
+
+ if (dev_debug)
+ error (0, 0, _("formatting output:\n value: %Lf\n humanized: '%s'\n"),
+ val, buf);
+
+
+ if (padding_width && strlen (buf) < padding_width)
+ {
+ size_t w = padding_width;
+ mbsalign (buf, padding_buffer, padding_buffer_size, &w,
+ padding_alignment, MBA_UNIBYTE_ONLY);
+
+ if (dev_debug)
+ error (0, 0, _(" After padding: '%s'\n"), padding_buffer);
+
+ }
+ else
+ {
+ setup_padding_buffer (strlen (buf) + 1);
+ strcpy (padding_buffer, buf);
+ }
+
+ return 1;
+}
+
+static void
+print_padded_number (void)
+{
+ if (format_str_prefix)
+ fputs (format_str_prefix, stdout);
+
+ fputs (padding_buffer, stdout);
+
+ if (format_str_suffix)
+ fputs (format_str_suffix, stdout);
+}
+
+/* Converts the TEXT number string to the requested representation,
+ and handles automatic suffix addition. */
+static int
+process_suffixed_number (char *text, long double *result, size_t *precision)
+{
+ if (suffix && strlen (text) > strlen (suffix))
+ {
+ char *possible_suffix = text + strlen (text) - strlen (suffix);
+
+ if (STREQ (suffix, possible_suffix))
+ {
+ /* trim suffix, ONLY if it's at the end of the text. */
+ *possible_suffix = '\0';
+ if (dev_debug)
+ error (0, 0, _("trimming suffix '%s'\n"), suffix);
+ }
+ else
+ {
+ if (dev_debug)
+ error (0, 0, _("no valid suffix found\n"));
+ }
+ }
+
+ /* Skip white space - always. */
+ char *p = text;
+ while (*p && isblank (*p))
+ ++p;
+ const unsigned int skip_count = text - p;
+
+ /* setup auto-padding. */
+ if (auto_padding)
+ {
+ if (skip_count > 0 || field > 1)
+ {
+ padding_width = strlen (text);
+ setup_padding_buffer (padding_width);
+ }
+ else
+ {
+ padding_width = 0;
+ }
+ if (dev_debug)
+ error (0, 0, _("setting Auto-Padding to %ld characters\n"),
+ padding_width);
+ }
+
+ long double val = 0;
+ enum simple_strtod_error e = parse_human_number (p, &val, precision);
+ if (e == SSE_OK_PRECISION_LOSS && debug)
+ error (0, 0, _("large input value '%s': possible precision loss"), p);
+
+ if (from_unit_size != 1 || to_unit_size != 1)
+ val = (val * from_unit_size) / to_unit_size;
+
+ *result = val;
+
+ return (e == SSE_OK || e == SSE_OK_PRECISION_LOSS);
+}
+
+/* Skip the requested number of fields in the input string.
+ Returns a pointer to the *delimiter* of the requested field,
+ or a pointer to NUL (if reached the end of the string). */
+static inline char *
+__attribute ((pure))
+skip_fields (char *buf, int fields)
+{
+ char *ptr = buf;
+ if (delimiter != DELIMITER_DEFAULT)
+ {
+ if (*ptr == delimiter)
+ fields--;
+ while (*ptr && fields--)
+ {
+ while (*ptr && *ptr == delimiter)
+ ++ptr;
+ while (*ptr && *ptr != delimiter)
+ ++ptr;
+ }
+ }
+ else
+ while (*ptr && fields--)
+ {
+ while (*ptr && isblank (*ptr))
+ ++ptr;
+ while (*ptr && !isblank (*ptr))
+ ++ptr;
+ }
+ return ptr;
+}
+
+/* Parse a delimited string, and extracts the requested field.
+ NOTE: the input buffer is modified.
+
+ TODO:
+ Maybe support multiple fields, though can always pipe output
+ into another numfmt to process other fields.
+ Maybe default to processing all fields rather than just first?
+
+ Output:
+ _PREFIX, _DATA, _SUFFIX will point to the relevant positions
+ in the input string, or be NULL if such a part doesn't exist. */
+static void
+extract_fields (char *line, int _field,
+ char ** _prefix, char ** _data, char ** _suffix)
+{
+ char *ptr = line;
+ *_prefix = NULL;
+ *_data = NULL;
+ *_suffix = NULL;
+
+ if (dev_debug)
+ error (0, 0, _("extracting Fields:\n input: '%s'\n field: %d\n"),
+ line, _field);
+
+ if (field > 1)
+ {
+ /* skip the requested number of fields. */
+ *_prefix = line;
+ ptr = skip_fields (line, field - 1);
+ if (*ptr == '\0')
+ {
+ /* not enough fields in the input - print warning? */
+ if (dev_debug)
+ error (0, 0, _(" TOO FEW FIELDS!\n prefix: '%s'\n"), *_prefix);
+ return;
+ }
+
+ *ptr = '\0';
+ ++ptr;
+ }
+
+ *_data = ptr;
+ *_suffix = skip_fields (*_data, 1);
+ if (**_suffix)
+ {
+ /* there is a suffix (i.e. the field is not the last on the line),
+ so null-terminate the _data before it. */
+ **_suffix = '\0';
+ ++(*_suffix);
+ }
+ else
+ *_suffix = NULL;
+
+ if (dev_debug)
+ error (0, 0, _(" prefix: '%s'\n number: '%s'\n suffix: '%s'\n"),
+ *_prefix, *_data, *_suffix);
+}
+
+
+/* Convert a number in a given line of text.
+ NEWLINE specifies whether to output a '\n' for this "line". */
+static int
+process_line (char *line, bool newline)
+{
+ char *pre, *num, *suf;
+ long double val = 0;
+ size_t precision = 0;
+ int valid_number = 0;
+
+ extract_fields (line, field, &pre, &num, &suf);
+ if (!num)
+ if (_invalid != inval_ignore)
+ error (conv_exit_code, 0, _("input line is too short, "
+ "no numbers found to convert in field %ld"),
+ field);
+
+ if (num)
+ {
+ valid_number = process_suffixed_number (num, &val, &precision);
+ if (valid_number)
+ valid_number = prepare_padded_number (val, precision);
+ }
+
+ if (pre)
+ fputs (pre, stdout);
+
+ if (pre && num)
+ fputc ((delimiter == DELIMITER_DEFAULT) ? ' ' : delimiter, stdout);
+
+ if (valid_number)
+ {
+ print_padded_number ();
+ }
+ else
+ {
+ if (num)
+ fputs (num, stdout);
+ }
+
+ if (suf)
+ {
+ fputc ((delimiter == DELIMITER_DEFAULT) ? ' ' : delimiter, stdout);
+ fputs (suf, stdout);
+ }
+
+ if (newline)
+ putchar ('\n');
+
+ return valid_number;
+}
+
+int
+main (int argc, char **argv)
+{
+ int valid_numbers = 1;
+
+ initialize_main (&argc, &argv);
+ set_program_name (argv[0]);
+ setlocale (LC_ALL, "");
+ bindtextdomain (PACKAGE, LOCALEDIR);
+ textdomain (PACKAGE);
+
+ decimal_point = nl_langinfo (RADIXCHAR);
+ if (decimal_point == NULL || strlen (decimal_point) == 0)
+ decimal_point = ".";
+ decimal_point_length = strlen (decimal_point);
+
+ atexit (close_stdout);
+
+ while (true)
+ {
+ int c = getopt_long (argc, argv, "d:", longopts, NULL);
+
+ if (c == -1)
+ break;
+
+ switch (c)
+ {
+ case FROM_OPTION:
+ scale_from = XARGMATCH ("--from", optarg,
+ scale_from_args, scale_from_types);
+ break;
+
+ case FROM_UNIT_OPTION:
+ from_unit_size = unit_to_umax (optarg);
+ break;
+
+ case TO_OPTION:
+ scale_to =
+ XARGMATCH ("--to", optarg, scale_to_args, scale_to_types);
+ break;
+
+ case TO_UNIT_OPTION:
+ to_unit_size = unit_to_umax (optarg);
+ break;
+
+ case ROUND_OPTION:
+ _round = XARGMATCH ("--round", optarg, round_args, round_types);
+ break;
+
+ case GROUPING_OPTION:
+ grouping = 1;
+ break;
+
+ case PADDING_OPTION:
+ if (xstrtol (optarg, NULL, 10, &padding_width, "") != LONGINT_OK
+ || padding_width == 0)
+ error (EXIT_FAILURE, 0, _("invalid padding value '%s'"), optarg);
+ if (padding_width < 0)
+ {
+ padding_alignment = MBS_ALIGN_LEFT;
+ padding_width = -padding_width;
+ }
+ /* TODO: We probably want to apply a specific --padding
+ to --header lines too. */
+ break;
+
+ case FIELD_OPTION:
+ if (xstrtol (optarg, NULL, 10, &field, "") != LONGINT_OK
+ || field <= 0)
+ error (EXIT_FAILURE, 0, _("invalid field value '%s'"), optarg);
+ break;
+
+ case 'd':
+ /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */
+ if (optarg[0] != '\0' && optarg[1] != '\0')
+ error (EXIT_FAILURE, 0,
+ _("the delimiter must be a single character"));
+ delimiter = optarg[0];
+ break;
+
+ case SUFFIX_OPTION:
+ suffix = optarg;
+ break;
+
+ case DEBUG_OPTION:
+ debug = 1;
+ break;
+
+ case DEV_DEBUG_OPTION:
+ dev_debug = 1;
+ debug = 1;
+ break;
+
+ case HEADER_OPTION:
+ if (optarg)
+ {
+ if (xstrtoumax (optarg, NULL, 10, &header, "") != LONGINT_OK
+ || header == 0)
+ error (EXIT_FAILURE, 0, _("invalid header value '%s'"),
+ optarg);
+ }
+ else
+ {
+ header = 1;
+ }
+ break;
+
+ case FORMAT_OPTION:
+ format_str = optarg;
+ break;
+
+ case INVALID_OPTION:
+ _invalid = XARGMATCH ("--invalid", optarg, inval_args, inval_types);
+ break;
+
+ case_GETOPT_HELP_CHAR;
+ case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
+
+ default:
+ usage (EXIT_FAILURE);
+ }
+ }
+
+ if (format_str != NULL && grouping)
+ error (EXIT_FAILURE, 0, _("--grouping cannot be combined with --format"));
+ if (format_str != NULL && padding_width > 0)
+ error (EXIT_FAILURE, 0, _("--padding cannot be combined with --format"));
+
+ /* Warn about no-op. */
+ if (debug && scale_from == scale_none && scale_to == scale_none
+ && !grouping && (padding_width == 0) && (format_str == NULL))
+ error (0, 0, _("no conversion option specified"));
+
+ if (format_str)
+ parse_format_string (format_str);
+
+ if (grouping)
+ {
+ if (scale_to != scale_none)
+ error (EXIT_FAILURE, 0, _("grouping cannot be combined with --to"));
+ if (debug && (strlen (nl_langinfo (THOUSEP)) == 0))
+ error (0, 0, _("grouping has no effect in this locale"));
+ }
+
+
+ setup_padding_buffer (padding_width);
+ auto_padding = (padding_width == 0 && delimiter == DELIMITER_DEFAULT);
+
+ if (_invalid != inval_abort)
+ conv_exit_code = 0;
+
+ if (argc > optind)
+ {
+ if (debug && header)
+ error (0, 0, _("--header ignored with command-line input"));
+
+ for (; optind < argc; optind++)
+ valid_numbers &= process_line (argv[optind], true);
+ }
+ else
+ {
+ char *line = NULL;
+ size_t line_allocated = 0;
+ ssize_t len;
+
+ while (header-- && getline (&line, &line_allocated, stdin) > 0)
+ fputs (line, stdout);
+
+ while ((len = getline (&line, &line_allocated, stdin)) > 0)
+ {
+ bool newline = line[len - 1] == '\n';
+ if (newline)
+ line[len - 1] = '\0';
+ valid_numbers &= process_line (line, newline);
+ }
+
+ IF_LINT (free (line));
+
+ if (ferror (stdin))
+ error (0, errno, _("error reading input"));
+ }
+
+ free (padding_buffer);
+ free (format_str_prefix);
+ free (format_str_suffix);
+
+
+ if (debug && !valid_numbers)
+ error (0, 0, _("failed to convert some of the input numbers"));
+
+ int exit_status = EXIT_SUCCESS;
+ if (!valid_numbers && _invalid != inval_warn && _invalid != inval_ignore)
+ exit_status = EXIT_CONVERSION_WARNINGS;
+
+ exit (exit_status);
+}
diff --git a/tests/local.mk b/tests/local.mk
index 6043bb6bd..83b4ab927 100644
--- a/tests/local.mk
+++ b/tests/local.mk
@@ -288,6 +288,7 @@ all_tests = \
tests/misc/nohup.sh \
tests/misc/nproc-avail.sh \
tests/misc/nproc-positive.sh \
+ tests/misc/numfmt.pl \
tests/misc/od-N.sh \
tests/misc/od-multiple-t.sh \
tests/misc/od-x8.sh \
diff --git a/tests/misc/numfmt.pl b/tests/misc/numfmt.pl
new file mode 100644
index 000000000..c542483f0
--- /dev/null
+++ b/tests/misc/numfmt.pl
@@ -0,0 +1,936 @@
+#!/usr/bin/perl
+# Basic tests for "numfmt".
+
+# Copyright (C) 2012 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+use strict;
+
+(my $program_name = $0) =~ s|.*/||;
+my $prog = 'numfmt';
+
+# TODO: add localization tests with "grouping"
+# Turn off localization of executable's output.
+@ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;
+
+my $locale = $ENV{LOCALE_FR_UTF8};
+! defined $locale || $locale eq 'none'
+ and $locale = 'C';
+
+my @Tests =
+ (
+ ['1', '1234', {OUT => "1234"}],
+ ['2', '--from=si 1K', {OUT => "1000"}],
+ ['3', '--from=iec 1K', {OUT => "1024"}],
+ ['4', '--from=auto 1K', {OUT => "1000"}],
+ ['5', '--from=auto 1Ki', {OUT => "1024"}],
+ ['5.1', '--from=iec-i 1Ki', {OUT => "1024"}],
+
+ ['6', {IN_PIPE => "1234\n"}, {OUT => "1234"}],
+ ['7', '--from=si', {IN_PIPE => "2K\n"}, {OUT => "2000"}],
+ ['7a', '--invalid=fail', {IN_PIPE => "no_NL"}, {OUT => "no_NL"},
+ {ERR => "$prog: invalid number: 'no_NL'\n"},
+ {EXIT => '2'}],
+
+ ['8', '--to=si 2000', {OUT => "2.0K"}],
+ ['9', '--to=si 2001', {OUT => "2.1K"}],
+ ['10', '--to=si 1999', {OUT => "2.0K"}],
+ ['11', '--to=si --round=down 2001', {OUT => "2.0K"}],
+ ['12', '--to=si --round=down 1999', {OUT => "1.9K"}],
+ ['13', '--to=si --round=up 1901', {OUT => "2.0K"}],
+ ['14', '--to=si --round=down 1901', {OUT => "1.9K"}],
+ ['15', '--to=si --round=nearest 1901', {OUT => "1.9K"}],
+ ['16', '--to=si --round=nearest 1945', {OUT => "1.9K"}],
+ ['17', '--to=si --round=nearest 1955', {OUT => "2.0K"}],
+
+ ['18', '--to=iec 2048', {OUT => "2.0K"}],
+ ['19', '--to=iec 2049', {OUT => "2.1K"}],
+ ['20', '--to=iec 2047', {OUT => "2.0K"}],
+ ['21', '--to=iec --round=down 2049', {OUT => "2.0K"}],
+ ['22', '--to=iec --round=down 2047', {OUT => "1.9K"}],
+ ['23', '--to=iec --round=up 2040', {OUT => "2.0K"}],
+ ['24', '--to=iec --round=down 2040', {OUT => "1.9K"}],
+ ['25', '--to=iec --round=nearest 1996', {OUT => "1.9K"}],
+ ['26', '--to=iec --round=nearest 1997', {OUT => "2.0K"}],
+ ['27', '--to=iec-i 2048', {OUT => "2.0Ki"}],
+
+ ['neg-1', '-- -1234', {OUT => "-1234"}],
+ ['neg-2', '--padding=5 -- -1234', {OUT => "-1234"}],
+ ['neg-3', '--padding=6 -- -1234', {OUT => " -1234"}],
+ ['neg-4', '--to=iec -- 9100 -9100', {OUT => "8.9K\n-8.9K"}],
+ ['neg-5', '-- -0.1', {OUT => "-0.1"}],
+ ['neg-6', '-- -0', {OUT => "0"}],
+ ['neg-7', '-- -0.-1',
+ {ERR => "$prog: invalid number: '-0.-1'\n"},
+ {EXIT => '2'}],
+
+ ['float-1', '1.1', {OUT => "1.1"}],
+ ['float-2', '1.22', {OUT => "1.22"}],
+ ['float-3', '1.22.',
+ {ERR => "$prog: invalid suffix in input: '1.22.'\n"},
+ {EXIT => '2'}],
+
+ ['unit-1', '--from-unit=512 4', {OUT => "2048"}],
+ ['unit-2', '--to-unit=512 2048', {OUT => "4"}],
+ ['unit-3', '--from-unit=512 --from=si 4M', {OUT => "2048000000"}],
+ ['unit-4', '--from-unit=512 --from=iec --to=iec 4M', {OUT => "2.0G"}],
+ ['unit-5', '--from-unit=AA --from=iec --to=iec 4M',
+ {ERR => "$prog: invalid unit size: 'AA'\n"},
+ {EXIT => '1'}],
+ ['unit-6', '--from-unit=54W --from=iec --to=iec 4M',
+ {ERR => "$prog: invalid unit size: '54W'\n"},
+ {EXIT => '1'}],
+ # Not fully documented.. "--{from,to}-unit" can accept IEC suffixes
+ ['unit-7', '--from-unit=2K --to=iec 30', {OUT=>"60K"}],
+ ['unit-8', '--from-unit=1234567890123456789012345 --to=iec 30',
+ {ERR => "$prog: invalid unit size: '1234567890123456789012345'\n"},
+ {EXIT => '1'}],
+ ['unit-9', '--from-unit=0 1',
+ {ERR => "$prog: invalid unit size: '0'\n"},
+ {EXIT => '1'}],
+ ['unit-10', '--to-unit=0 1',
+ {ERR => "$prog: invalid unit size: '0'\n"},
+ {EXIT => '1'}],
+
+ # Test Suffix logic
+ ['suf-1', '4000', {OUT=>'4000'}],
+ ['suf-2', '4Q',
+ {ERR => "$prog: invalid suffix in input: '4Q'\n"},
+ {EXIT => '2'}],
+ ['suf-2.1', '4M',
+ {ERR => "$prog: rejecting suffix " .
+ "in input: '4M' (consider using --from)\n"},
+ {EXIT => '2'}],
+ ['suf-3', '--from=si 4M', {OUT=>'4000000'}],
+ ['suf-4', '--from=si 4Q',
+ {ERR => "$prog: invalid suffix in input: '4Q'\n"},
+ {EXIT => '2'}],
+ ['suf-5', '--from=si 4MQ',
+ {ERR => "$prog: invalid suffix in input '4MQ': 'Q'\n"},
+ {EXIT => '2'}],
+
+ ['suf-6', '--from=iec 4M', {OUT=>'4194304'}],
+ ['suf-7', '--from=auto 4M', {OUT=>'4000000'}],
+ ['suf-8', '--from=auto 4Mi', {OUT=>'4194304'}],
+ ['suf-9', '--from=auto 4MiQ',
+ {ERR => "$prog: invalid suffix in input '4MiQ': 'Q'\n"},
+ {EXIT => '2'}],
+ ['suf-10', '--from=auto 4QiQ',
+ {ERR => "$prog: invalid suffix in input: '4QiQ'\n"},
+ {EXIT => '2'}],
+
+ # characters after a white space are OK - printed as-is
+ ['suf-11', '"4 M"', {OUT=>'4 M'}],
+
+ # Custom suffix
+ ['suf-12', '--suffix=Foo 70Foo', {OUT=>'70Foo'}],
+ ['suf-13', '--suffix=Foo 70', {OUT=>'70Foo'}],
+ ['suf-14', '--suffix=Foo --from=si 70K', {OUT=>'70000Foo'}],
+ ['suf-15', '--suffix=Foo --from=si 70KFoo', {OUT=>'70000Foo'}],
+ ['suf-16', '--suffix=Foo --to=si 7000Foo', {OUT=>'7.0KFoo'}],
+ ['suf-17', '--suffix=Foo --to=si 7000Bar',
+ {ERR => "$prog: invalid suffix in input: '7000Bar'\n"},
+ {EXIT => '2'}],
+ ['suf-18', '--suffix=Foo --to=si 7000FooF',
+ {ERR => "$prog: invalid suffix in input: '7000FooF'\n"},
+ {EXIT => '2'}],
+ # space(s) between number and suffix. Note only field 1 is used
+ # by default so specify the NUL delimiter to consider the whole "line".
+ ['suf-19', "-d '' --from=si '4.0 K'", {OUT => "4000"}],
+
+ ## GROUPING
+
+ # "C" locale - no grouping (locale-specific tests, below)
+ ['grp-1', '--from=si --grouping 7M', {OUT=>'7000000'}],
+ ['grp-2', '--from=si --to=si --grouping 7M',
+ {ERR => "$prog: grouping cannot be combined with --to\n"},
+ {EXIT => '1'}],
+
+
+ ## Padding
+ ['pad-1', '--padding=10 5', {OUT=>' 5'}],
+ ['pad-2', '--padding=-10 5', {OUT=>'5 '}],
+ ['pad-3', '--padding=A 5',
+ {ERR => "$prog: invalid padding value 'A'\n"},
+ {EXIT => '1'}],
+ ['pad-3.1', '--padding=0 5',
+ {ERR => "$prog: invalid padding value '0'\n"},
+ {EXIT => '1'}],
+ ['pad-4', '--padding=10 --to=si 50000', {OUT=>' 50K'}],
+ ['pad-5', '--padding=-10 --to=si 50000', {OUT=>'50K '}],
+
+ # padding too narrow
+ ['pad-6', '--padding=2 --to=si 1000', {OUT=>'1.0K'}],
+
+
+ # Padding + suffix
+ ['pad-7', '--padding=10 --suffix=foo --to=si 50000',
+ {OUT=>' 50Kfoo'}],
+ ['pad-8', '--padding=-10 --suffix=foo --to=si 50000',
+ {OUT=>'50Kfoo '}],
+
+
+ # Delimiters
+ ['delim-1', '--delimiter=: --from=auto 40M:', {OUT=>'40000000:'}],
+ ['delim-2', '--delimiter="" --from=auto "40 M"',{OUT=>'40000000'}],
+ ['delim-3', '--delimiter=" " --from=auto "40M Foo"',{OUT=>'40000000 Foo'}],
+ ['delim-4', '--delimiter=: --from=auto 40M:60M', {OUT=>'40000000:60M'}],
+ ['delim-5', '-d: --field=2 --from=auto :40M:60M', {OUT=>':40000000:60M'}],
+ ['delim-6', '--delimiter=: --field 3 --from=auto 40M:60M',
+ {EXIT=>2},
+ {ERR=>"$prog: input line is too short, no numbers found " .
+ "to convert in field 3\n"}],
+
+ #Fields
+ ['field-1', '--field A',
+ {ERR => "$prog: invalid field value 'A'\n"},
+ {EXIT => '1'}],
+ ['field-1.1', '--field -5',
+ {ERR => "$prog: invalid field value '-5'\n"},
+ {EXIT => '1'}],
+ ['field-2', '--field 2 --from=auto "Hello 40M World 90G"',
+ {OUT=>'Hello 40000000 World 90G'}],
+ ['field-3', '--field 3 --from=auto "Hello 40M World 90G"',
+ {ERR=>"$prog: invalid number: 'World'\n"},
+ {EXIT => 2},],
+ # Last field - no text after number
+ ['field-4', '--field 4 --from=auto "Hello 40M World 90G"',
+ {OUT=>"Hello 40M World 90000000000"}],
+ # Last field - a delimiter after the number
+ ['field-5', '--field 4 --from=auto "Hello 40M World 90G "',
+ {OUT=>"Hello 40M World 90000000000 "}],
+
+ # Mix Fields + Delimiters
+ ['field-6', '--delimiter=: --field 2 --from=auto "Hello:40M:World:90G"',
+ {OUT=>"Hello:40000000:World:90G"}],
+
+ # not enough fields
+ ['field-8', '--field 3 --to=si "Hello World"',
+ {EXIT=>2},
+ {ERR=>"$prog: input line is too short, no numbers found " .
+ "to convert in field 3\n"}],
+
+ # Auto-consume white-space, setup auto-padding
+ ['whitespace-1', '--to=si --field 2 "A 500 B"', {OUT=>"A 500 B"}],
+ ['whitespace-2', '--to=si --field 2 "A 5000 B"', {OUT=>"A 5.0K B"}],
+ ['whitespace-3', '--to=si " 500"', {OUT=>" 500"}],
+ ['whitespace-4', '--to=si " 6500"', {OUT=>" 6.5K"}],
+ # NOTE: auto-padding is not enabled if the value is on the first
+ # field and there's no white-space before it.
+ ['whitespace-5', '--to=si "6000000"', {OUT=>"6.0M"}],
+ # but if there is whitespace, assume auto-padding is desired.
+ ['whitespace-6', '--to=si " 6000000"', {OUT=>" 6.0M"}],
+
+ # auto-padding - lines have same padding-width
+ # (padding_buffer will be alloc'd just once)
+ ['whitespace-7', '--to=si --field 2',
+ {IN_PIPE=>"rootfs 100000\n" .
+ "udevxx 2000000\n"},
+ {OUT =>"rootfs 100K\n" .
+ "udevxx 2.0M"}],
+ # auto-padding - second line requires a
+ # larger padding (padding-buffer needs to be realloc'd)
+ ['whitespace-8', '--to=si --field 2',
+ {IN_PIPE=>"rootfs 100000\n" .
+ "udev 20000000\n"},
+ {OUT =>"rootfs 100K\n" .
+ "udev 20M"}],
+
+
+ # Corner-cases:
+ # weird mix of identical suffix,delimiters
+ # The priority is:
+ # 1. delimiters (and fields) are parsed (in process_line()
+ # 2. optional custom suffix is removed (in process_suffixed_number())
+ # 3. Remaining suffixes must be valid SI/IEC (in human_xstrtol())
+
+ # custom suffix comes BEFORE SI/IEC suffix,
+ # so these are 40 of "M", not 40,000,000.
+ ['mix-1', '--suffix=M --from=si 40M', {OUT=>"40M"}],
+
+ # These are fourty-million Ms .
+ ['mix-2', '--suffix=M --from=si 40MM', {OUT=>"40000000M"}],
+
+ ['mix-3', '--suffix=M --from=auto 40MM', {OUT=>"40000000M"}],
+ ['mix-4', '--suffix=M --from=auto 40MiM', {OUT=>"41943040M"}],
+ ['mix-5', '--suffix=M --to=si --from=si 4MM', {OUT=>"4.0MM"}],
+
+ # This might be confusing to the user, but it's legit:
+ # The M in the output is the custom suffix, not Mega.
+ ['mix-6', '--suffix=M 40', {OUT=>"40M"}],
+ ['mix-7', '--suffix=M 4000000', {OUT=>"4000000M"}],
+ ['mix-8', '--suffix=M --to=si 4000000', {OUT=>"4.0MM"}],
+
+ # The output 'M' is the custom suffix.
+ ['mix-10', '--delimiter=M --suffix=M 40', {OUT=>"40M"}],
+
+ # The INPUT 'M' is a delimiter (delimiters are top priority)
+ # The output contains one M for custom suffix, and one 'M' delimiter.
+ ['mix-11', '--delimiter=M --suffix=M 40M', {OUT=>"40MM"}],
+
+ # Same as above, the "M" is NOT treated as a mega SI prefix,
+ ['mix-12', '--delimiter=M --from=si --suffix=M 40M', {OUT=>"40MM"}],
+
+ # The 'M' is treated as a delimiter, and so the input value is '4000'
+ ['mix-13', '--delimiter=M --to=si --from=auto 4000M5000M9000',
+ {OUT=>"4.0KM5000M9000"}],
+ # 'M' is the delimiter, so the second input field is '5000'
+ ['mix-14', '--delimiter=M --field 2 --from=auto --to=si 4000M5000M9000',
+ {OUT=>"4000M5.0KM9000"}],
+
+
+
+ ## Header testing
+
+ # header - silently ignored with command line parameters
+ ['header-1', '--header --to=iec 4096', {OUT=>"4.0K"}],
+
+ # header warning with --debug
+ ['header-2', '--debug --header --to=iec 4096', {OUT=>"4.0K"},
+ {ERR=>"$prog: --header ignored with command-line input\n"}],
+
+ ['header-3', '--header=A',
+ {ERR=>"$prog: invalid header value 'A'\n"},
+ {EXIT => 1},],
+ ['header-4', '--header=0',
+ {ERR=>"$prog: invalid header value '0'\n"},
+ {EXIT => 1},],
+ ['header-5', '--header=-6',
+ {ERR=>"$prog: invalid header value '-6'\n"},
+ {EXIT => 1},],
+ ['header-6', '--debug --header --to=iec',
+ {IN_PIPE=>"size\n5000\n90000\n"},
+ {OUT=>"size\n4.9K\n88K"}],
+ ['header-7', '--debug --header=3 --to=iec',
+ {IN_PIPE=>"hello\nworld\nsize\n5000\n90000\n"},
+ {OUT=>"hello\nworld\nsize\n4.9K\n88K"}],
+ # header, but no actual content
+ ['header-8', '--header=2 --to=iec',
+ {IN_PIPE=>"hello\nworld\n"},
+ {OUT=>"hello\nworld"}],
+ # not enough header lines
+ ['header-9', '--header=3 --to=iec',
+ {IN_PIPE=>"hello\nworld\n"},
+ {OUT=>"hello\nworld"}],
+
+
+ ## human_strtod testing
+
+ # NO_DIGITS_FOUND
+ ['strtod-1', '--from=si "foo"',
+ {ERR=>"$prog: invalid number: 'foo'\n"},
+ {EXIT=> 2}],
+ ['strtod-2', '--from=si ""',
+ {ERR=>"$prog: invalid number: ''\n"},
+ {EXIT=> 2}],
+
+ # INTEGRAL_OVERFLOW
+ ['strtod-3', '--from=si "1234567890123456789012345678901234567890'.
+ '1234567890123456789012345678901234567890"',
+ {ERR=>"$prog: value too large to be converted: '" .
+ "1234567890123456789012345678901234567890" .
+ "1234567890123456789012345678901234567890'\n",
+ },
+ {EXIT=> 2}],
+
+ # FRACTION_NO_DIGITS_FOUND
+ ['strtod-5', '--from=si 12.',
+ {ERR=>"$prog: invalid number: '12.'\n"},
+ {EXIT=>2}],
+ ['strtod-6', '--from=si 12.K',
+ {ERR=>"$prog: invalid number: '12.K'\n"},
+ {EXIT=>2}],
+
+ # whitespace is not allowed after decimal-point
+ ['strtod-6.1', '--from=si --delimiter=, "12. 2"',
+ {ERR=>"$prog: invalid number: '12. 2'\n"},
+ {EXIT=>2}],
+
+ # FRACTION_OVERFLOW
+ ['strtod-7', '--from=si "12.1234567890123456789012345678901234567890'.
+ '1234567890123456789012345678901234567890"',
+ {ERR=>"$prog: value too large to be converted: '" .
+ "12.1234567890123456789012345678901234567890" .
+ "1234567890123456789012345678901234567890'\n",
+ },
+ {EXIT=> 2}],
+
+ # INVALID_SUFFIX
+ ['strtod-9', '--from=si 12.2Q',
+ {ERR=>"$prog: invalid suffix in input: '12.2Q'\n"},
+ {EXIT=>2}],
+
+ # VALID_BUT_FORBIDDEN_SUFFIX
+ ['strtod-10', '12M',
+ {ERR => "$prog: rejecting suffix " .
+ "in input: '12M' (consider using --from)\n"},
+ {EXIT=>2}],
+
+ # MISSING_I_SUFFIX
+ ['strtod-11', '--from=iec-i 12M',
+ {ERR => "$prog: missing 'i' suffix in input: " .
+ "'12M' (e.g Ki/Mi/Gi)\n"},
+ {EXIT=>2}],
+
+ #
+ # Test double_to_human()
+ #
+
+ # 1K and smaller
+ ['dbl-to-human-1','--to=si 800', {OUT=>"800"}],
+ ['dbl-to-human-2','--to=si 0', {OUT=>"0"}],
+ ['dbl-to-human-2.1','--to=si 999', {OUT=>"999"}],
+ ['dbl-to-human-2.2','--to=si 1000', {OUT=>"1.0K"}],
+ #NOTE: the following are consistent with "ls -lh" output
+ ['dbl-to-human-2.3','--to=iec 999', {OUT=>"999"}],
+ ['dbl-to-human-2.4','--to=iec 1023', {OUT=>"1023"}],
+ ['dbl-to-human-2.5','--to=iec 1024', {OUT=>"1.0K"}],
+ ['dbl-to-human-2.6','--to=iec 1025', {OUT=>"1.1K"}],
+ ['dbl-to-human-2.7','--to=iec 0', {OUT=>"0"}],
+ # no "i" suffix if output has no suffix
+ ['dbl-to-human-2.8','--to=iec-i 0', {OUT=>"0"}],
+
+ # values resulting in "N.Nx" output
+ ['dbl-to-human-3','--to=si 8000', {OUT=>"8.0K"}],
+ ['dbl-to-human-3.1','--to=si 8001', {OUT=>"8.1K"}],
+ ['dbl-to-human-4','--to=si --round=down 8001', {OUT=>"8.0K"}],
+
+ ['dbl-to-human-5','--to=si --round=down 3500', {OUT=>"3.5K"}],
+ ['dbl-to-human-6','--to=si --round=nearest 3500', {OUT=>"3.5K"}],
+ ['dbl-to-human-7','--to=si --round=up 3500', {OUT=>"3.5K"}],
+
+ ['dbl-to-human-8','--to=si --round=down 3501', {OUT=>"3.5K"}],
+ ['dbl-to-human-9','--to=si --round=nearest 3501', {OUT=>"3.5K"}],
+ ['dbl-to-human-10','--to=si --round=up 3501', {OUT=>"3.6K"}],
+
+ ['dbl-to-human-11','--to=si --round=nearest 3550', {OUT=>"3.6K"}],
+ ['dbl-to-human-12','--to=si --from=si 999.89K', {OUT=>"1.0M"}],
+ ['dbl-to-human-13','--to=si --from=si 9.9K', {OUT=>"9.9K"}],
+ ['dbl-to-human-14','--to=si 9900', {OUT=>"9.9K"}],
+ ['dbl-to-human-15','--to=iec --from=si 3.3K', {OUT=>"3.3K"}],
+ ['dbl-to-human-16','--to=iec --round=down --from=si 3.3K', {OUT=>"3.2K"}],
+
+ # values resulting in 'NNx' output
+ ['dbl-to-human-17','--to=si 9999', {OUT=>"10K"}],
+ ['dbl-to-human-18','--to=si --round=down 35000', {OUT=>"35K"}],
+ ['dbl-to-human-19','--to=iec 35000', {OUT=>"35K"}],
+ ['dbl-to-human-20','--to=iec --round=down 35000', {OUT=>"34K"}],
+ ['dbl-to-human-21','--to=iec 35000000', {OUT=>"34M"}],
+ ['dbl-to-human-22','--to=iec --round=down 35000000', {OUT=>"33M"}],
+ ['dbl-to-human-23','--to=si 35000001', {OUT=>"36M"}],
+ ['dbl-to-human-24','--to=si --from=si 9.99M', {OUT=>"10M"}],
+ ['dbl-to-human-25','--to=si --from=iec 9.99M', {OUT=>"11M"}],
+ ['dbl-to-human-25.1','--to=iec 99999', {OUT=>"98K"}],
+
+ # values resulting in 'NNNx' output
+ ['dbl-to-human-26','--to=si 999000000000', {OUT=>"999G"}],
+ ['dbl-to-human-27','--to=iec 999000000000', {OUT=>"931G"}],
+ ['dbl-to-human-28','--to=si 123600000000000', {OUT=>"124T"}],
+ ['dbl-to-human-29','--to=si 998123', {OUT=>"999K"}],
+ ['dbl-to-human-30','--to=si --round=nearest 998123', {OUT=>"998K"}],
+ ['dbl-to-human-31','--to=si 99999', {OUT=>"100K"}],
+ ['dbl-to-human-32','--to=iec 102399', {OUT=>"100K"}],
+ ['dbl-to-human-33','--to=iec-i 102399', {OUT=>"100Ki"}],
+
+
+ # Default --round=from-zero
+ ['round-1','--to-unit=1024 -- 6000 -6000',
+ {OUT=>"6\n-6"}],
+ ['round-2','--to-unit=1024 -- 6000.0 -6000.0',
+ {OUT=>"5.9\n-5.9"}],
+ ['round-3','--to-unit=1024 -- 6000.00 -6000.00',
+ {OUT=>"5.86\n-5.86"}],
+ ['round-4','--to-unit=1024 -- 6000.000 -6000.000',
+ {OUT=>"5.860\n-5.860"}],
+ ['round-5','--to-unit=1024 -- 6000.0000 -6000.0000',
+ {OUT=>"5.8594\n-5.8594"}],
+ # --round=up
+ ['round-1-up','--round=up --to-unit=1024 -- 6000 -6000',
+ {OUT=>"6\n-5"}],
+ ['round-2-up','--round=up --to-unit=1024 -- 6000.0 -6000.0',
+ {OUT=>"5.9\n-5.8"}],
+ ['round-3-up','--round=up --to-unit=1024 -- 6000.00 -6000.00',
+ {OUT=>"5.86\n-5.85"}],
+ ['round-4-up','--round=up --to-unit=1024 -- 6000.000 -6000.000',
+ {OUT=>"5.860\n-5.859"}],
+ ['round-5-up','--round=up --to-unit=1024 -- 6000.0000 -6000.0000',
+ {OUT=>"5.8594\n-5.8593"}],
+ # --round=down
+ ['round-1-down','--round=down --to-unit=1024 -- 6000 -6000',
+ {OUT=>"5\n-6"}],
+ ['round-2-down','--round=down --to-unit=1024 -- 6000.0 -6000.0',
+ {OUT=>"5.8\n-5.9"}],
+ ['round-3-down','--round=down --to-unit=1024 -- 6000.00 -6000.00',
+ {OUT=>"5.85\n-5.86"}],
+ ['round-4-down','--round=down --to-unit=1024 -- 6000.000 -6000.000',
+ {OUT=>"5.859\n-5.860"}],
+ ['round-5-down','--round=down --to-unit=1024 -- 6000.0000 -6000.0000',
+ {OUT=>"5.8593\n-5.8594"}],
+ # --round=towards-zero
+ ['round-1-to-zero','--ro=towards-zero --to-u=1024 -- 6000 -6000',
+ {OUT=>"5\n-5"}],
+ ['round-2-to-zero','--ro=towards-zero --to-u=1024 -- 6000.0 -6000.0',
+ {OUT=>"5.8\n-5.8"}],
+ ['round-3-to-zero','--ro=towards-zero --to-u=1024 -- 6000.00 -6000.00',
+ {OUT=>"5.85\n-5.85"}],
+ ['round-4-to-zero','--ro=towards-zero --to-u=1024 -- 6000.000 -6000.000',
+ {OUT=>"5.859\n-5.859"}],
+ ['round-5-to-zero','--ro=towards-zero --to-u=1024 -- 6000.0000 -6000.0000',
+ {OUT=>"5.8593\n-5.8593"}],
+ # --round=nearest
+ ['round-1-near','--ro=nearest --to-u=1024 -- 6000 -6000',
+ {OUT=>"6\n-6"}],
+ ['round-2-near','--ro=nearest --to-u=1024 -- 6000.0 -6000.0',
+ {OUT=>"5.9\n-5.9"}],
+ ['round-3-near','--ro=nearest --to-u=1024 -- 6000.00 -6000.00',
+ {OUT=>"5.86\n-5.86"}],
+ ['round-4-near','--ro=nearest --to-u=1024 -- 6000.000 -6000.000',
+ {OUT=>"5.859\n-5.859"}],
+ ['round-5-near','--ro=nearest --to-u=1024 -- 6000.0000 -6000.0000',
+ {OUT=>"5.8594\n-5.8594"}],
+
+
+ # Large Values
+ ['large-1','1000000000000000', {OUT=>"1000000000000000"}],
+ # 18 digits is OK
+ ['large-2','1000000000000000000', {OUT=>"1000000000000000000"}],
+ # 19 digits is too much (without output scaling)
+ ['large-3','10000000000000000000',
+ {ERR => "$prog: value too large to be printed: '1e+19' " .
+ "(consider using --to)\n"},
+ {EXIT=>2}],
+
+ # Test input:
+ # Up to 27 digits is OK.
+ ['large-3.1', '--to=si 1', {OUT=> "1"}],
+ ['large-3.2', '--to=si 10', {OUT=> "10"}],
+ ['large-3.3', '--to=si 100', {OUT=> "100"}],
+ ['large-3.4', '--to=si 1000', {OUT=>"1.0K"}],
+ ['large-3.5', '--to=si 10000', {OUT=> "10K"}],
+ ['large-3.6', '--to=si 100000', {OUT=>"100K"}],
+ ['large-3.7', '--to=si 1000000', {OUT=>"1.0M"}],
+ ['large-3.8', '--to=si 10000000', {OUT=> "10M"}],
+ ['large-3.9', '--to=si 100000000', {OUT=>"100M"}],
+ ['large-3.10','--to=si 1000000000', {OUT=>"1.0G"}],
+ ['large-3.11','--to=si 10000000000', {OUT=> "10G"}],
+ ['large-3.12','--to=si 100000000000', {OUT=>"100G"}],
+ ['large-3.13','--to=si 1000000000000', {OUT=>"1.0T"}],
+ ['large-3.14','--to=si 10000000000000', {OUT=> "10T"}],
+ ['large-3.15','--to=si 100000000000000', {OUT=>"100T"}],
+ ['large-3.16','--to=si 1000000000000000', {OUT=>"1.0P"}],
+ ['large-3.17','--to=si 10000000000000000', {OUT=> "10P"}],
+ ['large-3.18','--to=si 100000000000000000', {OUT=>"100P"}],
+ ['large-3.19','--to=si 1000000000000000000', {OUT=>"1.0E"}],
+ ['large-3.20','--to=si 10000000000000000000', {OUT=> "10E"}],
+ ['large-3.21','--to=si 210000000000000000000', {OUT=>"210E"}],
+ ['large-3.22','--to=si 3210000000000000000000', {OUT=>"3.3Z"}],
+ ['large-3.23','--to=si 43210000000000000000000', {OUT=> "44Z"}],
+ ['large-3.24','--to=si 543210000000000000000000', {OUT=>"544Z"}],
+ ['large-3.25','--to=si 6543210000000000000000000', {OUT=>"6.6Y"}],
+ ['large-3.26','--to=si 76543210000000000000000000', {OUT=> "77Y"}],
+ ['large-3.27','--to=si 876543210000000000000000000', {OUT=>"877Y"}],
+
+ # More than 27 digits is not OK
+ ['large-3.28','--to=si 9876543210000000000000000000',
+ {ERR => "$prog: value too large to be converted: " .
+ "'9876543210000000000000000000'\n"},
+ {EXIT => 2}],
+
+ # Test Output
+ ['large-4.1', '--from=si 9.7M', {OUT=>"9700000"}],
+ ['large-4.2', '--from=si 10M', {OUT =>"10000000"}],
+ ['large-4.3', '--from=si 200M', {OUT =>"200000000"}],
+ ['large-4.4', '--from=si 3G', {OUT =>"3000000000"}],
+ ['large-4.5', '--from=si 40G', {OUT =>"40000000000"}],
+ ['large-4.6', '--from=si 500G', {OUT =>"500000000000"}],
+ ['large-4.7', '--from=si 6T', {OUT =>"6000000000000"}],
+ ['large-4.8', '--from=si 70T', {OUT =>"70000000000000"}],
+ ['large-4.9', '--from=si 800T', {OUT =>"800000000000000"}],
+ ['large-4.10','--from=si 9P', {OUT =>"9000000000000000"}],
+ ['large-4.11','--from=si 10P', {OUT =>"10000000000000000"}],
+ ['large-4.12','--from=si 200P', {OUT =>"200000000000000000"}],
+ ['large-4.13','--from=si 3E', {OUT =>"3000000000000000000"}],
+
+ # More than 18 digits of output without scaling - no good.
+ ['large-4.14','--from=si 40E',
+ {ERR => "$prog: value too large to be printed: '4e+19' " .
+ "(consider using --to)\n"},
+ {EXIT => 2}],
+ ['large-4.15','--from=si 500E',
+ {ERR => "$prog: value too large to be printed: '5e+20' " .
+ "(consider using --to)\n"},
+ {EXIT => 2}],
+ ['large-4.16','--from=si 6Z',
+ {ERR => "$prog: value too large to be printed: '6e+21' " .
+ "(consider using --to)\n"},
+ {EXIT => 2}],
+ ['large-4.17','--from=si 70Z',
+ {ERR => "$prog: value too large to be printed: '7e+22' " .
+ "(consider using --to)\n"},
+ {EXIT => 2}],
+ ['large-4.18','--from=si 800Z',
+ {ERR => "$prog: value too large to be printed: '8e+23' " .
+ "(consider using --to)\n"},
+ {EXIT => 2}],
+ ['large-4.19','--from=si 9Y',
+ {ERR => "$prog: value too large to be printed: '9e+24' " .
+ "(consider using --to)\n"},
+ {EXIT => 2}],
+ ['large-4.20','--from=si 10Y',
+ {ERR => "$prog: value too large to be printed: '1e+25' " .
+ "(consider using --to)\n"},
+ {EXIT => 2}],
+ ['large-4.21','--from=si 200Y',
+ {ERR => "$prog: value too large to be printed: '2e+26' " .
+ "(consider using --to)\n"},
+ {EXIT => 2}],
+
+ ['large-5.1','--to=si 1000000000000000000', {OUT=>"1.0E"}],
+ ['large-5','--from=si --to=si 2E', {OUT=>"2.0E"}],
+ ['large-6','--from=si --to=si 3.4Z', {OUT=>"3.4Z"}],
+ ['large-7','--from=si --to=si 80Y', {OUT=>"80Y"}],
+ ['large-8','--from=si --to=si 9000Z', {OUT=>"9.0Y"}],
+
+ ['large-10','--from=si --to=si 999Y', {OUT=>"999Y"}],
+ ['large-11','--from=si --to=iec 999Y', {OUT=>"827Y"}],
+ ['large-12','--from=si --round=down --to=iec 999Y', {OUT=>"826Y"}],
+
+ # units can also affect the output
+ ['large-13','--from=si --from-unit=1000000 9P',
+ {ERR => "$prog: value too large to be printed: '9e+21' " .
+ "(consider using --to)\n"},
+ {EXIT => 2}],
+ ['large-13.1','--from=si --from-unit=1000000 --to=si 9P', {OUT=>"9.0Z"}],
+
+ # Numbers>999Y are never acceptable, regardless of scaling
+ ['large-14','--from=si --to=si 999Y', {OUT=>"999Y"}],
+ ['large-14.1','--from=si --to=si 1000Y',
+ {ERR => "$prog: value too large to be printed: '1e+27' " .
+ "(cannot handle values > 999Y)\n"},
+ {EXIT => 2}],
+ ['large-14.2','--from=si --to=si --from-unit=10000 1Y',
+ {ERR => "$prog: value too large to be printed: '1e+28' " .
+ "(cannot handle values > 999Y)\n"},
+ {EXIT => 2}],
+
+ # debug warnings
+ ['debug-1', '--debug 4096', {OUT=>"4096"},
+ {ERR=>"$prog: no conversion option specified\n"}],
+ # '--padding' is a valid conversion option - no warning should be printed
+ ['debug-1.1', '--debug --padding 10 4096', {OUT=>" 4096"}],
+ ['debug-2', '--debug --grouping --from=si 4.0K', {OUT=>"4000"},
+ {ERR=>"$prog: grouping has no effect in this locale\n"}],
+ ['debug-4', '--to=si --debug 12345678901234567890',
+ {OUT=>"13E"},
+ {ERR=>"$prog: large input value '12345678901234567890':" .
+ " possible precision loss\n"}],
+ ['debug-5', '--to=si --from=si --debug 1.12345678901234567890Y',
+ {OUT=>"1.2Y"},
+ {ERR=>"$prog: large input value '1.12345678901234567890Y':" .
+ " possible precision loss\n"}],
+
+ # dev-debug messages - the actual messages don't matter
+ # just ensure the program works, and for code coverage testing.
+ ['devdebug-1', '---devdebug --from=si 4.9K', {OUT=>"4900"},
+ {ERR=>""},
+ {ERR_SUBST=>"s/.*//msg"}],
+ ['devdebug-2', '---devdebug 4900', {OUT=>"4900"},
+ {ERR=>""},
+ {ERR_SUBST=>"s/.*//msg"}],
+ ['devdebug-3', '---devdebug --from=auto 4Mi', {OUT=>"4194304"},
+ {ERR=>""},
+ {ERR_SUBST=>"s/.*//msg"}],
+ ['devdebug-4', '---devdebug --to=si 4000000', {OUT=>"4.0M"},
+ {ERR=>""},
+ {ERR_SUBST=>"s/.*//msg"}],
+ ['devdebug-5', '---devdebug --to=si --padding=5 4000000', {OUT=>" 4.0M"},
+ {ERR=>""},
+ {ERR_SUBST=>"s/.*//msg"}],
+ ['devdebug-6', '---devdebug --suffix=Foo 1234Foo', {OUT=>"1234Foo"},
+ {ERR=>""},
+ {ERR_SUBST=>"s/.*//msg"}],
+ ['devdebug-7', '---devdebug --suffix=Foo 1234', {OUT=>"1234Foo"},
+ {ERR=>""},
+ {ERR_SUBST=>"s/.*//msg"}],
+ ['devdebug-9', '---devdebug --grouping 10000', {OUT=>"10000"},
+ {ERR=>""},
+ {ERR_SUBST=>"s/.*//msg"}],
+ ['devdebug-10', '---devdebug --format %f 10000', {OUT=>"10000"},
+ {ERR=>""},
+ {ERR_SUBST=>"s/.*//msg"}],
+ ['devdebug-11', '---devdebug --format "%\'-10f" 10000',{OUT=>"10000 "},
+ {ERR=>""},
+ {ERR_SUBST=>"s/.*//msg"}],
+ ['devdebug-12', '---devdebug --field 2 A',{OUT=>""},
+ {ERR=>""}, {EXIT=>2},
+ {ERR_SUBST=>"s/.*//msg"}],
+
+ # Invalid parameters
+ ['help-1', '--foobar',
+ {ERR=>"$prog: unrecognized option '--foobar'\n" .
+ "Try '$prog --help' for more information.\n"},
+ {EXIT=>1}],
+
+ ## Format string - check error detection
+ ['fmt-err-1', '--format ""',
+ {ERR=>"$prog: format '' has no % directive\n"},
+ {EXIT=>1}],
+ ['fmt-err-2', '--format "hello"',
+ {ERR=>"$prog: format 'hello' has no % directive\n"},
+ {EXIT=>1}],
+ ['fmt-err-3', '--format "hello%"',
+ {ERR=>"$prog: format 'hello%' ends in %\n"},
+ {EXIT=>1}],
+ ['fmt-err-4', '--format "%d"',
+ {ERR=>"$prog: invalid format '%d', " .
+ "directive must be %['][-][N]f\n"},
+ {EXIT=>1}],
+ ['fmt-err-5', '--format "% -43 f"',
+ {ERR=>"$prog: invalid format '% -43 f', " .
+ "directive must be %['][-][N]f\n"},
+ {EXIT=>1}],
+ ['fmt-err-6', '--format "%f %f"',
+ {ERR=>"$prog: format '%f %f' has too many % directives\n"},
+ {EXIT=>1}],
+ ['fmt-err-7', '--format "%123456789012345678901234567890f"',
+ {ERR=>"$prog: invalid format '%123456789012345678901234567890f'".
+ " (width overflow)\n"},
+ {EXIT=>1}],
+ ['fmt-err-8', '--format "%f" --padding 20',
+ {ERR=>"$prog: --padding cannot be combined with --format\n"},
+ {EXIT=>1}],
+ ['fmt-err-9', '--format "%f" --grouping',
+ {ERR=>"$prog: --grouping cannot be combined with --format\n"},
+ {EXIT=>1}],
+ ['fmt-err-10', '--format "%\'f" --to=si',
+ {ERR=>"$prog: grouping cannot be combined with --to\n"},
+ {EXIT=>1}],
+ ['fmt-err-11', '--debug --format "%\'f" 5000', {OUT=>"5000"},
+ {ERR=>"$prog: grouping has no effect in this locale\n"}],
+
+ ## Format string - check some corner cases
+ ['fmt-1', '--format "%% %f" 5000', {OUT=>"%%5000"}],
+ ['fmt-2', '--format "%f %%" 5000', {OUT=>"5000 %%"}],
+
+ ['fmt-3', '--format "--%f--" 5000000', {OUT=>"--5000000--"}],
+ ['fmt-4', '--format "--%f--" --to=si 5000000', {OUT=>"--5.0M--"}],
+
+ ['fmt-5', '--format "--%10f--" --to=si 5000000',{OUT=>"-- 5.0M--"}],
+ ['fmt-6', '--format "--%-10f--" --to=si 5000000',{OUT=>"--5.0M --"}],
+ ['fmt-7', '--format "--%10f--" 5000000',{OUT=>"-- 5000000--"}],
+ ['fmt-8', '--format "--%-10f--" 5000000',{OUT=>"--5000000 --"}],
+
+ # too-short width
+ ['fmt-9', '--format "--%5f--" 5000000',{OUT=>"--5000000--"}],
+
+ # Format + Suffix
+ ['fmt-10', '--format "--%10f--" --suffix Foo 50', {OUT=>"-- 50Foo--"}],
+ ['fmt-11', '--format "--%-10f--" --suffix Foo 50',{OUT=>"--50Foo --"}],
+
+ # Grouping in C locale - no grouping effect
+ ['fmt-12', '--format "%\'f" 50000',{OUT=>"50000"}],
+ ['fmt-13', '--format "%\'10f" 50000', {OUT=>" 50000"}],
+ ['fmt-14', '--format "%\'-10f" 50000',{OUT=>"50000 "}],
+
+ # Very large format strings
+ ['fmt-15', '--format "--%100000f--" --to=si 4200',
+ {OUT=>"--" . " " x 99996 . "4.2K--" }],
+
+
+ ## Check all errors again, this time with --invalid=fail
+ ## Input will be printed without conversion,
+ ## and exit code will be 2
+ ['ign-err-1', '--invalid=fail 4Q',
+ {ERR => "$prog: invalid suffix in input: '4Q'\n"},
+ {OUT => "4Q\n"},
+ {EXIT => 2}],
+ ['ign-err-2', '--invalid=fail 4M',
+ {ERR => "$prog: rejecting suffix " .
+ "in input: '4M' (consider using --from)\n"},
+ {OUT => "4M\n"},
+ {EXIT => 2}],
+ ['ign-err-3', '--invalid=fail --from=si 4MQ',
+ {ERR => "$prog: invalid suffix in input '4MQ': 'Q'\n"},
+ {OUT => "4MQ\n"},
+ {EXIT => 2}],
+ ['ign-err-4', '--invalid=fail --suffix=Foo --to=si 7000FooF',
+ {ERR => "$prog: invalid suffix in input: '7000FooF'\n"},
+ {OUT => "7000FooF\n"},
+ {EXIT => 2}],
+ ['ign-err-5','--invalid=fail --field 3 --from=auto "Hello 40M World 90G"',
+ {ERR => "$prog: invalid number: 'World'\n"},
+ {OUT => "Hello 40M World 90G\n"},
+ {EXIT => 2}],
+ ['ign-err-6', '--invalid=fail --field 3 --to=si "Hello World"',
+ {ERR => "$prog: input line is too short, no numbers found " .
+ "to convert in field 3\n"},
+ {OUT => "Hello World\n"},
+ {EXIT => 2}],
+ ['ign-err-7', '--invalid=fail --from=si "foo"',
+ {ERR => "$prog: invalid number: 'foo'\n"},
+ {OUT => "foo\n"},
+ {EXIT=> 2}],
+ ['ign-err-8', '--invalid=fail 12M',
+ {ERR => "$prog: rejecting suffix " .
+ "in input: '12M' (consider using --from)\n"},
+ {OUT => "12M\n"},
+ {EXIT => 2}],
+ ['ign-err-9', '--invalid=fail --from=iec-i 12M',
+ {ERR => "$prog: missing 'i' suffix in input: " .
+ "'12M' (e.g Ki/Mi/Gi)\n"},
+ {OUT => "12M\n"},
+ {EXIT=>2}],
+ ['ign-err-10','--invalid=fail 10000000000000000000',
+ {ERR => "$prog: value too large to be printed: '1e+19' " .
+ "(consider using --to)\n"},
+ {OUT => "10000000000000000000\n"},
+ {EXIT=>2}],
+ ['ign-err-11','--invalid=fail --to=si 9876543210000000000000000000',
+ {ERR => "$prog: value too large to be converted: " .
+ "'9876543210000000000000000000'\n"},
+ {OUT => "9876543210000000000000000000\n"},
+ {EXIT => 2}],
+
+ ## Ignore Errors with multiple conversions
+ ['ign-err-m1', '--invalid=ignore --to=si 1000 2000 bad 3000',
+ {OUT => "1.0K\n2.0K\nbad\n3.0K"},
+ {EXIT => 0}],
+ ['ign-err-m1.1', '--invalid=ignore --to=si',
+ {IN_PIPE => "1000\n2000\nbad\n3000\n"},
+ {OUT => "1.0K\n2.0K\nbad\n3.0K"},
+ {EXIT => 0}],
+ ['ign-err-m1.3', '--invalid=fail --debug --to=si 1000 2000 3000',
+ {OUT => "1.0K\n2.0K\n3.0K"},
+ {EXIT => 0}],
+ ['ign-err-m2', '--invalid=fail --to=si 1000 Foo 3000',
+ {OUT => "1.0K\nFoo\n3.0K\n"},
+ {ERR => "$prog: invalid number: 'Foo'\n"},
+ {EXIT => 2}],
+ ['ign-err-m2.1', '--invalid=warn --to=si',
+ {IN_PIPE => "1000\nFoo\n3000\n"},
+ {OUT => "1.0K\nFoo\n3.0K"},
+ {ERR => "$prog: invalid number: 'Foo'\n"},
+ {EXIT => 0}],
+
+ # --debug will trigger a final warning at EOF
+ ['ign-err-m2.2', '--invalid=fail --debug --to=si 1000 Foo 3000',
+ {OUT => "1.0K\nFoo\n3.0K\n"},
+ {ERR => "$prog: invalid number: 'Foo'\n" .
+ "$prog: failed to convert some of the input numbers\n"},
+ {EXIT => 2}],
+
+ ['ign-err-m3', '--invalid=fail --field 2 --from=si --to=iec',
+ {IN_PIPE => "A 1K x\nB 2M y\nC 3G z\n"},
+ {OUT => "A 1000 x\nB 2.0M y\nC 2.8G z"},
+ {EXIT => 0}],
+ # invalid input on one of the fields
+ ['ign-err-m3.1', '--invalid=fail --field 2 --from=si --to=iec',
+ {IN_PIPE => "A 1K x\nB Foo y\nC 3G z\n"},
+ {OUT => "A 1000 x\nB Foo y\nC 2.8G z\n"},
+ {ERR => "$prog: invalid number: 'Foo'\n"},
+ {EXIT => 2}],
+ # one of the lines is too short
+ ['ign-err-m3.2', '--invalid=fail --field 2 --from=si --to=iec',
+ {IN_PIPE => "A 1K x\nB\nC 3G z\n"},
+ {OUT => "A 1000 x\nB\nC 2.8G z\n"},
+ {ERR => "$prog: input line is too short, no numbers found " .
+ "to convert in field 2\n"},
+ {EXIT => 2}],
+ );
+
+my @Locale_Tests =
+ (
+ # Locale that supports grouping, but without '--grouping' parameter
+ ['lcl-grp-1', '--from=si 7M', {OUT=>"7000000"},
+ {ENV=>"LC_ALL=$locale"}],
+
+ # Locale with grouping
+ ['lcl-grp-2', '--from=si --grouping 7M', {OUT=>"7 000 000"},
+ {ENV=>"LC_ALL=$locale"}],
+
+ # Locale with grouping and debug - no debug warning message
+ ['lcl-grp-3', '--from=si --debug --grouping 7M', {OUT=>"7 000 000"},
+ {ENV=>"LC_ALL=$locale"}],
+
+ # Input with locale'd decimal-point
+ ['lcl-stdtod-1', '--from=si 12,2K', {OUT=>"12200"},
+ {ENV=>"LC_ALL=$locale"}],
+
+ ['lcl-dbl-to-human-1', '--to=si 1100', {OUT=>"1,1K"},
+ {ENV=>"LC_ALL=$locale"}],
+
+ # Format + Grouping
+ ['lcl-fmt-1', '--format "%\'f" 50000',{OUT=>"50 000"},
+ {ENV=>"LC_ALL=$locale"}],
+ ['lcl-fmt-2', '--format "--%\'10f--" 50000', {OUT=>"-- 50 000--"},
+ {ENV=>"LC_ALL=$locale"}],
+ ['lcl-fmt-3', '--format "--%\'-10f--" 50000',{OUT=>"--50 000 --"},
+ {ENV=>"LC_ALL=$locale"}],
+ ['lcl-fmt-4', '--format "--%-10f--" --to=si 5000000',
+ {OUT=>"--5,0M --"},
+ {ENV=>"LC_ALL=$locale"}],
+
+ );
+push @Tests, @Locale_Tests if $locale ne "C";
+
+## Check all valid/invalid suffixes
+foreach my $suf ( 'A' .. 'Z', 'a' .. 'z' ) {
+ if ( $suf =~ /^[KMGTPEZY]$/ )
+ {
+ push @Tests, ["auto-suf-si-$suf","--from=si --to=si 1$suf",
+ {OUT=>"1.0$suf"}];
+ push @Tests, ["auto-suf-iec-$suf","--from=iec --to=iec 1$suf",
+ {OUT=>"1.0$suf"}];
+ push @Tests, ["auto-suf-auto-$suf","--from=auto --to=iec 1${suf}i",
+ {OUT=>"1.0$suf"}];
+ push @Tests, ["auto-suf-iec-to-ieci-$suf","--from=iec --to=iec-i 1${suf}",
+ {OUT=>"1.0${suf}i"}];
+ push @Tests, ["auto-suf-ieci-to-iec-$suf",
+ "--from=iec-i --to=iec 1${suf}i",{OUT=>"1.0${suf}"}];
+ }
+ else
+ {
+ push @Tests, ["auto-suf-si-$suf","--from=si --to=si 1$suf",
+ {ERR=>"$prog: invalid suffix in input: '1${suf}'\n"},
+ {EXIT=>2}];
+ }
+}
+
+# Prepend the command line argument and append a newline to end
+# of each expected 'OUT' string.
+my $t;
+
+Test:
+foreach $t (@Tests)
+ {
+ # Don't fiddle with expected OUT string if there's a nonzero exit status.
+ foreach my $e (@$t)
+ {
+ ref $e eq 'HASH' && exists $e->{EXIT} && $e->{EXIT}
+ and next Test;
+ }
+
+ foreach my $e (@$t)
+ {
+ ref $e eq 'HASH' && exists $e->{OUT}
+ and $e->{OUT} .= "\n"
+ }
+ }
+
+my $save_temps = $ENV{SAVE_TEMPS};
+my $verbose = $ENV{VERBOSE};
+
+my $fail = run_tests ($program_name, $prog, \@Tests, $save_temps, $verbose);
+exit $fail;