diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2016-01-20 10:55:18 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2016-01-20 10:55:18 +0000 |
commit | 70e9163c9c18e995515598085cb824e554eb7ae7 (patch) | |
tree | a42dc8b2a6c031354bf31472de888bfc8a060132 /src/du.c | |
parent | cbf5993c43f49281173f185863577d86bfac6eae (diff) | |
download | coreutils-tarball-70e9163c9c18e995515598085cb824e554eb7ae7.tar.gz |
coreutils-8.25HEADcoreutils-8.25master
Diffstat (limited to 'src/du.c')
-rw-r--r-- | src/du.c | 1189 |
1 files changed, 652 insertions, 537 deletions
@@ -1,10 +1,10 @@ /* du -- summarize disk usage - Copyright (C) 1988-1991, 1995-2007 Free Software Foundation, Inc. + Copyright (C) 1988-2016 Free Software Foundation, Inc. - This program is free software; you can redistribute it and/or modify + This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -12,8 +12,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* Differences from the Unix du: * Doesn't simply ignore the names of regular files given as arguments @@ -25,64 +24,62 @@ Rewritten to use nftw, then to use fts by Jim Meyering. */ #include <config.h> -#include <stdio.h> #include <getopt.h> #include <sys/types.h> #include <assert.h> #include "system.h" #include "argmatch.h" +#include "argv-iter.h" +#include "di-set.h" #include "error.h" #include "exclude.h" #include "fprintftime.h" -#include "hash.h" #include "human.h" -#include "inttostr.h" +#include "mountlist.h" #include "quote.h" -#include "quotearg.h" -#include "readtokens0.h" -#include "same.h" +#include "stat-size.h" #include "stat-time.h" +#include "stdio--.h" #include "xfts.h" #include "xstrtol.h" extern bool fts_debug; -/* The official name of this program (e.g., no `g' prefix). */ +/* The official name of this program (e.g., no 'g' prefix). */ #define PROGRAM_NAME "du" #define AUTHORS \ - "Torbjorn Granlund", "David MacKenzie, Paul Eggert", "Jim Meyering" + proper_name_utf8 ("Torbjorn Granlund", "Torbj\303\266rn Granlund"), \ + proper_name ("David MacKenzie"), \ + proper_name ("Paul Eggert"), \ + proper_name ("Jim Meyering") #if DU_DEBUG # define FTS_CROSS_CHECK(Fts) fts_cross_check (Fts) -# define DEBUG_OPT "d" #else # define FTS_CROSS_CHECK(Fts) -# define DEBUG_OPT #endif -/* Initial size of the hash table. */ -#define INITIAL_TABLE_SIZE 103 +/* A set of dev/ino pairs to help identify files and directories + whose sizes have already been counted. */ +static struct di_set *di_files; -/* Hash structure for inode and device numbers. The separate entry - structure makes it easier to rehash "in place". */ +/* A set containing a dev/ino pair for each local mount point directory. */ +static struct di_set *di_mnt; -struct entry -{ - ino_t st_ino; - dev_t st_dev; -}; - -/* A set of dev/ino pairs. */ -static Hash_table *htab; +/* Keep track of the preceding "level" (depth in hierarchy) + from one call of process_file to the next. */ +static size_t prev_level; /* Define a class for collecting directory information. */ - struct duinfo { /* Size of files in directory. */ uintmax_t size; + /* Number of inodes in directory. */ + uintmax_t inodes; + /* Latest time stamp found. If tmax.tv_sec == TYPE_MINIMUM (time_t) && tmax.tv_nsec < 0, no time stamp has been found. */ struct timespec tmax; @@ -93,6 +90,7 @@ static inline void duinfo_init (struct duinfo *a) { a->size = 0; + a->inodes = 0; a->tmax.tv_sec = TYPE_MINIMUM (time_t); a->tmax.tv_nsec = -1; } @@ -102,6 +100,7 @@ static inline void duinfo_set (struct duinfo *a, uintmax_t size, struct timespec tmax) { a->size = size; + a->inodes = 1; a->tmax = tmax; } @@ -109,7 +108,9 @@ duinfo_set (struct duinfo *a, uintmax_t size, struct timespec tmax) static inline void duinfo_add (struct duinfo *a, struct duinfo const *b) { - a->size += b->size; + uintmax_t sum = a->size + b->size; + a->size = a->size <= sum ? sum : UINTMAX_MAX; + a->inodes = a->inodes + b->inodes; if (timespec_cmp (a->tmax, b->tmax) < 0) a->tmax = b->tmax; } @@ -124,9 +125,6 @@ struct dulevel struct duinfo subdir; }; -/* Name under which this program was invoked. */ -char *program_name; - /* If true, display counts for all files, not just directories. */ static bool opt_all = false; @@ -137,6 +135,9 @@ static bool apparent_size = false; /* If true, count each hard link of files with multiple links. */ static bool opt_count_all = false; +/* If true, hash all files to look for hard links. */ +static bool hash_all; + /* If true, output the NUL byte instead of a newline at the end of each line. */ static bool opt_nul_terminate_output = false; @@ -148,12 +149,19 @@ static bool opt_separate_dirs = false; /* Show the total for each directory (and file if --all) that is at most MAX_DEPTH levels down from the root of the hierarchy. The root - is at level 0, so `du --max-depth=0' is equivalent to `du -s'. */ + is at level 0, so 'du --max-depth=0' is equivalent to 'du -s'. */ static size_t max_depth = SIZE_MAX; +/* Only output entries with at least this SIZE if positive, + or at most if negative. See --threshold option. */ +static intmax_t opt_threshold = 0; + /* Human-readable options for output. */ static int human_output_opts; +/* Output inodes count instead of blocks used. */ +static bool opt_inodes = false; + /* If true, print most recently modified date, using the specified format. */ static bool opt_time = false; @@ -195,17 +203,10 @@ enum EXCLUDE_OPTION, FILES0_FROM_OPTION, HUMAN_SI_OPTION, - - /* FIXME: --kilobytes is deprecated (but not -k); remove in late 2006 */ - KILOBYTES_LONG_OPTION, - - MAX_DEPTH_OPTION, - - /* FIXME: --megabytes is deprecated (but not -m); remove in late 2006 */ - MEGABYTES_LONG_OPTION, - + FTS_DEBUG, TIME_OPTION, - TIME_STYLE_OPTION + TIME_STYLE_OPTION, + INODES_OPTION }; static struct option const long_options[] = @@ -215,22 +216,23 @@ static struct option const long_options[] = {"block-size", required_argument, NULL, 'B'}, {"bytes", no_argument, NULL, 'b'}, {"count-links", no_argument, NULL, 'l'}, + /* {"-debug", no_argument, NULL, FTS_DEBUG}, */ {"dereference", no_argument, NULL, 'L'}, {"dereference-args", no_argument, NULL, 'D'}, {"exclude", required_argument, NULL, EXCLUDE_OPTION}, {"exclude-from", required_argument, NULL, 'X'}, {"files0-from", required_argument, NULL, FILES0_FROM_OPTION}, {"human-readable", no_argument, NULL, 'h'}, + {"inodes", no_argument, NULL, INODES_OPTION}, {"si", no_argument, NULL, HUMAN_SI_OPTION}, - {"kilobytes", no_argument, NULL, KILOBYTES_LONG_OPTION}, - {"max-depth", required_argument, NULL, MAX_DEPTH_OPTION}, + {"max-depth", required_argument, NULL, 'd'}, {"null", no_argument, NULL, '0'}, - {"megabytes", no_argument, NULL, MEGABYTES_LONG_OPTION}, {"no-dereference", no_argument, NULL, 'P'}, {"one-file-system", no_argument, NULL, 'x'}, {"separate-dirs", no_argument, NULL, 'S'}, {"summarize", no_argument, NULL, 's'}, {"total", no_argument, NULL, 'c'}, + {"threshold", required_argument, NULL, 't'}, {"time", optional_argument, NULL, TIME_OPTION}, {"time-style", required_argument, NULL, TIME_STYLE_OPTION}, {GETOPT_HELP_OPTION_DECL}, @@ -248,8 +250,8 @@ static enum time_type const time_types[] = }; ARGMATCH_VERIFY (time_args, time_types); -/* `full-iso' uses full ISO-style dates and times. `long-iso' uses longer - ISO-style time stamps, though shorter than `full-iso'. `iso' uses shorter +/* 'full-iso' uses full ISO-style dates and times. 'long-iso' uses longer + ISO-style time stamps, though shorter than 'full-iso'. 'iso' uses shorter ISO-style time stamps. */ enum time_style { @@ -272,8 +274,7 @@ void usage (int status) { if (status != EXIT_SUCCESS) - fprintf (stderr, _("Try `%s --help' for more information.\n"), - program_name); + emit_try_help (); else { printf (_("\ @@ -281,134 +282,89 @@ Usage: %s [OPTION]... [FILE]...\n\ or: %s [OPTION]... --files0-from=F\n\ "), program_name, program_name); fputs (_("\ -Summarize disk usage of each FILE, recursively for directories.\n\ -\n\ -"), stdout); - fputs (_("\ -Mandatory arguments to long options are mandatory for short options too.\n\ +Summarize disk usage of the set of FILEs, recursively for directories.\n\ "), stdout); + + emit_mandatory_arg_note (); + fputs (_("\ + -0, --null end each output line with NUL, not newline\n\ -a, --all write counts for all files, not just directories\n\ - --apparent-size print apparent sizes, rather than disk usage; although\n\ + --apparent-size print apparent sizes, rather than disk usage; although\ +\n\ the apparent size is usually smaller, it may be\n\ - larger due to holes in (`sparse') files, internal\n\ + larger due to holes in ('sparse') files, internal\n\ fragmentation, indirect blocks, and the like\n\ "), stdout); fputs (_("\ - -B, --block-size=SIZE use SIZE-byte blocks\n\ - -b, --bytes equivalent to `--apparent-size --block-size=1'\n\ + -B, --block-size=SIZE scale sizes by SIZE before printing them; e.g.,\n\ + '-BM' prints sizes in units of 1,048,576 bytes;\n\ + see SIZE format below\n\ + -b, --bytes equivalent to '--apparent-size --block-size=1'\n\ -c, --total produce a grand total\n\ - -D, --dereference-args dereference FILEs that are symbolic links\n\ + -D, --dereference-args dereference only symlinks that are listed on the\n\ + command line\n\ + -d, --max-depth=N print the total for a directory (or file, with --all)\n\ + only if it is N or fewer levels below the command\n\ + line argument; --max-depth=0 is the same as\n\ + --summarize\n\ "), stdout); fputs (_("\ - --files0-from=F summarize disk usage of the NUL-terminated file\n\ - names specified in file F\n\ - -H like --si, but also evokes a warning; will soon\n\ - change to be equivalent to --dereference-args (-D)\n\ - -h, --human-readable print sizes in human readable format (e.g., 1K 234M 2G)\n\ - --si like -h, but use powers of 1000 not 1024\n\ + --files0-from=F summarize disk usage of the\n\ + NUL-terminated file names specified in file F;\n\ + if F is -, then read names from standard input\n\ + -H equivalent to --dereference-args (-D)\n\ + -h, --human-readable print sizes in human readable format (e.g., 1K 234M 2G)\ +\n\ + --inodes list inode usage information instead of block usage\n\ "), stdout); fputs (_("\ -k like --block-size=1K\n\ + -L, --dereference dereference all symbolic links\n\ -l, --count-links count sizes many times if hard linked\n\ -m like --block-size=1M\n\ "), stdout); fputs (_("\ - -L, --dereference dereference all symbolic links\n\ -P, --no-dereference don't follow any symbolic links (this is the default)\n\ - -0, --null end each output line with 0 byte rather than newline\n\ - -S, --separate-dirs do not include size of subdirectories\n\ + -S, --separate-dirs for directories do not include size of subdirectories\n\ + --si like -h, but use powers of 1000 not 1024\n\ -s, --summarize display only a total for each argument\n\ "), stdout); fputs (_("\ - -x, --one-file-system skip directories on different file systems\n\ - -X FILE, --exclude-from=FILE Exclude files that match any pattern in FILE.\n\ - --exclude=PATTERN Exclude files that match PATTERN.\n\ - --max-depth=N print the total for a directory (or file, with --all)\n\ - only if it is N or fewer levels below the command\n\ - line argument; --max-depth=0 is the same as\n\ - --summarize\n\ -"), stdout); - fputs (_("\ + -t, --threshold=SIZE exclude entries smaller than SIZE if positive,\n\ + or entries greater than SIZE if negative\n\ --time show time of the last modification of any file in the\n\ directory, or any of its subdirectories\n\ --time=WORD show time as WORD instead of modification time:\n\ atime, access, use, ctime or status\n\ - --time-style=STYLE show times using style STYLE:\n\ - full-iso, long-iso, iso, +FORMAT\n\ - FORMAT is interpreted like `date'\n\ + --time-style=STYLE show times using STYLE, which can be:\n\ + full-iso, long-iso, iso, or +FORMAT;\n\ + FORMAT is interpreted like in 'date'\n\ +"), stdout); + fputs (_("\ + -X, --exclude-from=FILE exclude files that match any pattern in FILE\n\ + --exclude=PATTERN exclude files that match PATTERN\n\ + -x, --one-file-system skip directories on different file systems\n\ "), stdout); fputs (HELP_OPTION_DESCRIPTION, stdout); fputs (VERSION_OPTION_DESCRIPTION, stdout); - fputs (_("\n\ -SIZE may be (or may be an integer optionally followed by) one of following:\n\ -kB 1000, K 1024, MB 1000*1000, M 1024*1024, and so on for G, T, P, E, Z, Y.\n\ -"), stdout); - printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT); + emit_blocksize_note ("DU"); + emit_size_note (); + emit_ancillary_info (PROGRAM_NAME); } exit (status); } -static size_t -entry_hash (void const *x, size_t table_size) -{ - struct entry const *p = x; - - /* Ignoring the device number here should be fine. */ - /* The cast to uintmax_t prevents negative remainders - if st_ino is negative. */ - return (uintmax_t) p->st_ino % table_size; -} - -/* Compare two dev/ino pairs. Return true if they are the same. */ -static bool -entry_compare (void const *x, void const *y) -{ - struct entry const *a = x; - struct entry const *b = y; - return SAME_INODE (*a, *b) ? true : false; -} - -/* Try to insert the INO/DEV pair into the global table, HTAB. +/* Try to insert the INO/DEV pair into DI_SET. Return true if the pair is successfully inserted, - false if the pair is already in the table. */ + false if the pair was already there. */ static bool -hash_ins (ino_t ino, dev_t dev) +hash_ins (struct di_set *di_set, ino_t ino, dev_t dev) { - struct entry *ent; - struct entry *ent_from_table; - - ent = xmalloc (sizeof *ent); - ent->st_ino = ino; - ent->st_dev = dev; - - ent_from_table = hash_insert (htab, ent); - if (ent_from_table == NULL) - { - /* Insertion failed due to lack of memory. */ - xalloc_die (); - } - - if (ent_from_table == ent) - { - /* Insertion succeeded. */ - return true; - } - - /* That pair is already in the table, so ENT was not inserted. Free it. */ - free (ent); - - return false; -} - -/* Initialize the hash table. */ -static void -hash_init (void) -{ - htab = hash_initialize (INITIAL_TABLE_SIZE, NULL, - entry_hash, entry_compare, free); - if (htab == NULL) + int inserted = di_set_insert (di_set, dev, ino); + if (inserted < 0) xalloc_die (); + return inserted; } /* FIXME: this code is nearly identical to code in date.c */ @@ -422,11 +378,9 @@ show_date (const char *format, struct timespec when) if (! tm) { char buf[INT_BUFSIZE_BOUND (intmax_t)]; - error (0, 0, _("time %s is out of range"), - (TYPE_SIGNED (time_t) - ? imaxtostr (when.tv_sec, buf) - : umaxtostr (when.tv_sec, buf))); - fputs (buf, stdout); + char *when_str = timetostr (when.tv_sec, buf); + error (0, 0, _("time %s is out of range"), quote (when_str)); + fputs (when_str, stdout); return; } @@ -439,8 +393,11 @@ static void print_only_size (uintmax_t n_bytes) { char buf[LONGEST_HUMAN_READABLE + 1]; - fputs (human_readable (n_bytes, buf, human_output_opts, - 1, output_block_size), stdout); + fputs ((n_bytes == UINTMAX_MAX + ? _("Infinity") + : human_readable (n_bytes, buf, human_output_opts, + 1, output_block_size)), + stdout); } /* Print size (and optionally time) indicated by *PDUI, followed by STRING. */ @@ -448,7 +405,10 @@ print_only_size (uintmax_t n_bytes) static void print_size (const struct duinfo *pdui, const char *string) { - print_only_size (pdui->size); + print_only_size (opt_inodes + ? pdui->inodes + : pdui->size); + if (opt_time) { putchar ('\t'); @@ -458,6 +418,64 @@ print_size (const struct duinfo *pdui, const char *string) fflush (stdout); } +/* Fill the di_mnt set with local mount point dev/ino pairs. */ + +static void +fill_mount_table (void) +{ + struct mount_entry *mnt_ent = read_file_system_list (false); + while (mnt_ent) + { + struct mount_entry *mnt_free; + if (!mnt_ent->me_remote && !mnt_ent->me_dummy) + { + struct stat buf; + if (!stat (mnt_ent->me_mountdir, &buf)) + hash_ins (di_mnt, buf.st_ino, buf.st_dev); + else + { + /* Ignore stat failure. False positives are too common. + E.g., "Permission denied" on /run/user/<name>/gvfs. */ + } + } + + mnt_free = mnt_ent; + mnt_ent = mnt_ent->me_next; + free_mount_entry (mnt_free); + } +} + +/* This function checks whether any of the directories in the cycle that + fts detected is a mount point. */ + +static bool +mount_point_in_fts_cycle (FTSENT const *ent) +{ + FTSENT const *cycle_ent = ent->fts_cycle; + + if (!di_mnt) + { + /* Initialize the set of dev,inode pairs. */ + di_mnt = di_set_alloc (); + if (!di_mnt) + xalloc_die (); + + fill_mount_table (); + } + + while (ent && ent != cycle_ent) + { + if (di_set_lookup (di_mnt, ent->fts_statp->st_dev, + ent->fts_statp->st_ino) > 0) + { + return true; + } + ent = ent->fts_parent; + } + + return false; +} + /* This function is called once for every file system object that fts encounters. fts does a depth-first traversal. This function knows that and accumulates per-directory totals based on changes in @@ -466,11 +484,10 @@ print_size (const struct duinfo *pdui, const char *string) static bool process_file (FTS *fts, FTSENT *ent) { - bool ok; + bool ok = true; struct duinfo dui; struct duinfo dui_to_print; size_t level; - static size_t prev_level; static size_t n_alloc; /* First element of the structure contains: The sum of the st_size values of all entries in the single directory @@ -481,71 +498,98 @@ process_file (FTS *fts, FTSENT *ent) The sum of the sizes of all entries in the hierarchy at or below the directory at the specified level. */ static struct dulevel *dulvl; - bool print = true; const char *file = ent->fts_path; const struct stat *sb = ent->fts_statp; - bool skip; + int info = ent->fts_info; - /* If necessary, set FTS_SKIP before returning. */ - skip = excluded_file_name (exclude, ent->fts_path); - if (skip) - fts_set (fts, ent, FTS_SKIP); - - switch (ent->fts_info) + if (info == FTS_DNR) { - case FTS_NS: - error (0, ent->fts_errno, _("cannot access %s"), quote (file)); - return false; - - case FTS_ERR: - /* if (S_ISDIR (ent->fts_statp->st_mode) && FIXME */ - error (0, ent->fts_errno, _("%s"), quote (file)); - return false; - - case FTS_DNR: - /* Don't return just yet, since although the directory is not readable, - we were able to stat it, so we do have a size. */ - error (0, ent->fts_errno, _("cannot read directory %s"), quote (file)); + /* An error occurred, but the size is known, so count it. */ + error (0, ent->fts_errno, _("cannot read directory %s"), quoteaf (file)); ok = false; - break; - - default: - ok = true; - break; - } - - /* If this is the first (pre-order) encounter with a directory, - or if it's the second encounter for a skipped directory, then - return right away. */ - if (ent->fts_info == FTS_D || skip) - return ok; - - /* If the file is being excluded or if it has already been counted - via a hard link, then don't let it contribute to the sums. */ - if (skip - || (!opt_count_all - && ! S_ISDIR (sb->st_mode) - && 1 < sb->st_nlink - && ! hash_ins (sb->st_ino, sb->st_dev))) - { - /* Note that we must not simply return here. - We still have to update prev_level and maybe propagate - some sums up the hierarchy. */ - duinfo_init (&dui); - print = false; } - else + else if (info != FTS_DP) { - duinfo_set (&dui, - (apparent_size - ? sb->st_size - : (uintmax_t) ST_NBLOCKS (*sb) * ST_NBLOCKSIZE), - (time_type == time_mtime ? get_stat_mtime (sb) - : time_type == time_atime ? get_stat_atime (sb) - : get_stat_ctime (sb))); + bool excluded = excluded_file_name (exclude, file); + if (! excluded) + { + /* Make the stat buffer *SB valid, or fail noisily. */ + + if (info == FTS_NSOK) + { + fts_set (fts, ent, FTS_AGAIN); + FTSENT const *e = fts_read (fts); + assert (e == ent); + info = ent->fts_info; + } + + if (info == FTS_NS || info == FTS_SLNONE) + { + error (0, ent->fts_errno, _("cannot access %s"), quoteaf (file)); + return false; + } + + /* The --one-file-system (-x) option cannot exclude anything + specified on the command-line. By definition, it can exclude + a file or directory only when its device number is different + from that of its just-processed parent directory, and du does + not process the parent of a command-line argument. */ + if (fts->fts_options & FTS_XDEV + && FTS_ROOTLEVEL < ent->fts_level + && fts->fts_dev != sb->st_dev) + excluded = true; + } + + if (excluded + || (! opt_count_all + && (hash_all || (! S_ISDIR (sb->st_mode) && 1 < sb->st_nlink)) + && ! hash_ins (di_files, sb->st_ino, sb->st_dev))) + { + /* If ignoring a directory in preorder, skip its children. + Ignore the next fts_read output too, as it's a postorder + visit to the same directory. */ + if (info == FTS_D) + { + fts_set (fts, ent, FTS_SKIP); + FTSENT const *e = fts_read (fts); + assert (e == ent); + } + + return true; + } + + switch (info) + { + case FTS_D: + return true; + + case FTS_ERR: + /* An error occurred, but the size is known, so count it. */ + error (0, ent->fts_errno, "%s", quotef (file)); + ok = false; + break; + + case FTS_DC: + /* If not following symlinks and not a (bind) mount point. */ + if (cycle_warning_required (fts, ent) + && ! mount_point_in_fts_cycle (ent)) + { + emit_cycle_warning (file); + return false; + } + return true; + } } + duinfo_set (&dui, + (apparent_size + ? MAX (0, sb->st_size) + : (uintmax_t) ST_NBLOCKS (*sb) * ST_NBLOCKSIZE), + (time_type == time_mtime ? get_stat_mtime (sb) + : time_type == time_atime ? get_stat_atime (sb) + : get_stat_ctime (sb))); + level = ent->fts_level; dui_to_print = dui; @@ -557,71 +601,68 @@ process_file (FTS *fts, FTSENT *ent) else { if (level == prev_level) - { - /* This is usually the most common case. Do nothing. */ - } + { + /* This is usually the most common case. Do nothing. */ + } else if (level > prev_level) - { - /* Descending the hierarchy. - Clear the accumulators for *all* levels between prev_level - and the current one. The depth may change dramatically, - e.g., from 1 to 10. */ - size_t i; - - if (n_alloc <= level) - { - dulvl = xnrealloc (dulvl, level, 2 * sizeof *dulvl); - n_alloc = level * 2; - } - - for (i = prev_level + 1; i <= level; i++) - { - duinfo_init (&dulvl[i].ent); - duinfo_init (&dulvl[i].subdir); - } - } + { + /* Descending the hierarchy. + Clear the accumulators for *all* levels between prev_level + and the current one. The depth may change dramatically, + e.g., from 1 to 10. */ + size_t i; + + if (n_alloc <= level) + { + dulvl = xnrealloc (dulvl, level, 2 * sizeof *dulvl); + n_alloc = level * 2; + } + + for (i = prev_level + 1; i <= level; i++) + { + duinfo_init (&dulvl[i].ent); + duinfo_init (&dulvl[i].subdir); + } + } else /* level < prev_level */ - { - /* Ascending the hierarchy. - Process a directory only after all entries in that - directory have been processed. When the depth decreases, - propagate sums from the children (prev_level) to the parent. - Here, the current level is always one smaller than the - previous one. */ - assert (level == prev_level - 1); - duinfo_add (&dui_to_print, &dulvl[prev_level].ent); - if (!opt_separate_dirs) - duinfo_add (&dui_to_print, &dulvl[prev_level].subdir); - duinfo_add (&dulvl[level].subdir, &dulvl[prev_level].ent); - duinfo_add (&dulvl[level].subdir, &dulvl[prev_level].subdir); - } + { + /* Ascending the hierarchy. + Process a directory only after all entries in that + directory have been processed. When the depth decreases, + propagate sums from the children (prev_level) to the parent. + Here, the current level is always one smaller than the + previous one. */ + assert (level == prev_level - 1); + duinfo_add (&dui_to_print, &dulvl[prev_level].ent); + if (!opt_separate_dirs) + duinfo_add (&dui_to_print, &dulvl[prev_level].subdir); + duinfo_add (&dulvl[level].subdir, &dulvl[prev_level].ent); + duinfo_add (&dulvl[level].subdir, &dulvl[prev_level].subdir); + } } prev_level = level; /* Let the size of a directory entry contribute to the total for the containing directory, unless --separate-dirs (-S) is specified. */ - if ( ! (opt_separate_dirs && IS_DIR_TYPE (ent->fts_info))) + if (! (opt_separate_dirs && IS_DIR_TYPE (info))) duinfo_add (&dulvl[level].ent, &dui); /* Even if this directory is unreadable or we can't chdir into it, - do let its size contribute to the total, ... */ + do let its size contribute to the total. */ duinfo_add (&tot_dui, &dui); - /* ... but don't print out a total for it, since without the size(s) - of any potential entries, it could be very misleading. */ - if (ent->fts_info == FTS_DNR) - return ok; - - /* If we're not counting an entry, e.g., because it's a hard link - to a file we've already counted (and --count-links), then don't - print a line for it. */ - if (!print) - return ok; - - if ((IS_DIR_TYPE (ent->fts_info) && level <= max_depth) - || ((opt_all && level <= max_depth) || level == 0)) - print_size (&dui_to_print, file); + if ((IS_DIR_TYPE (info) && level <= max_depth) + || (opt_all && level <= max_depth) + || level == 0) + { + /* Print or elide this entry according to the --threshold option. */ + uintmax_t v = opt_inodes ? dui_to_print.inodes : dui_to_print.size; + if (opt_threshold < 0 + ? v <= -opt_threshold + : v >= opt_threshold) + print_size (&dui_to_print, file); + } return ok; } @@ -641,33 +682,36 @@ du_files (char **files, int bit_flags) FTS *fts = xfts_open (files, bit_flags, NULL); while (1) - { - FTSENT *ent; - - ent = fts_read (fts); - if (ent == NULL) - { - if (errno != 0) - { - /* FIXME: try to give a better message */ - error (0, errno, _("fts_read failed")); - ok = false; - } - break; - } - FTS_CROSS_CHECK (fts); - - ok &= process_file (fts, ent); - } - - /* Ignore failure, since the only way it can do so is in failing to - return to the original directory, and since we're about to exit, - that doesn't matter. */ - fts_close (fts); - } + { + FTSENT *ent; - if (print_grand_total) - print_size (&tot_dui, _("total")); + ent = fts_read (fts); + if (ent == NULL) + { + if (errno != 0) + { + error (0, errno, _("fts_read failed: %s"), + quotef (fts->fts_path)); + ok = false; + } + + /* When exiting this loop early, be careful to reset the + global, prev_level, used in process_file. Otherwise, its + (level == prev_level - 1) assertion could fail. */ + prev_level = 0; + break; + } + FTS_CROSS_CHECK (fts); + + ok &= process_file (fts, ent); + } + + if (fts_close (fts) != 0) + { + error (0, errno, _("fts_close failed")); + ok = false; + } + } return ok; } @@ -675,16 +719,13 @@ du_files (char **files, int bit_flags) int main (int argc, char **argv) { - int c; char *cwd_only[2]; bool max_depth_specified = false; - char **files; bool ok = true; char *files_from = NULL; - struct Tokens tok; /* Bit flags that control how fts works. */ - int bit_flags = FTS_TIGHT_CYCLE_CHECK; + int bit_flags = FTS_NOSTAT; /* Select one of the three FTS_ options that control if/when to follow a symlink. */ @@ -693,11 +734,11 @@ main (int argc, char **argv) /* If true, display only a total for each argument. */ bool opt_summarize_only = false; - cwd_only[0] = "."; + cwd_only[0] = bad_cast ("."); cwd_only[1] = NULL; initialize_main (&argc, &argv); - program_name = argv[0]; + set_program_name (argv[0]); setlocale (LC_ALL, ""); bindtextdomain (PACKAGE, LOCALEDIR); textdomain (PACKAGE); @@ -706,167 +747,183 @@ main (int argc, char **argv) exclude = new_exclude (); - human_output_opts = human_options (getenv ("DU_BLOCK_SIZE"), false, - &output_block_size); + human_options (getenv ("DU_BLOCK_SIZE"), + &human_output_opts, &output_block_size); - while ((c = getopt_long (argc, argv, DEBUG_OPT "0abchHklmsxB:DLPSX:", - long_options, NULL)) != -1) + while (true) { + int oi = -1; + int c = getopt_long (argc, argv, "0abd:chHklmst:xB:DLPSX:", + long_options, &oi); + if (c == -1) + break; + switch (c) - { + { #if DU_DEBUG - case 'd': - fts_debug = true; - break; + case FTS_DEBUG: + fts_debug = true; + break; #endif - case '0': - opt_nul_terminate_output = true; - break; - - case 'a': - opt_all = true; - break; - - case APPARENT_SIZE_OPTION: - apparent_size = true; - break; - - case 'b': - apparent_size = true; - human_output_opts = 0; - output_block_size = 1; - break; - - case 'c': - print_grand_total = true; - break; - - case 'h': - human_output_opts = human_autoscale | human_SI | human_base_1024; - output_block_size = 1; - break; - - case 'H': /* FIXME: remove warning and move this "case 'H'" to - precede --dereference-args in late 2006. */ - error (0, 0, _("WARNING: use --si, not -H; the meaning of the -H\ - option will soon\nchange to be the same as that of --dereference-args (-D)")); - /* fall through */ - case HUMAN_SI_OPTION: - human_output_opts = human_autoscale | human_SI; - output_block_size = 1; - break; - - case KILOBYTES_LONG_OPTION: - error (0, 0, - _("the --kilobytes option is deprecated; use -k instead")); - /* fall through */ - case 'k': - human_output_opts = 0; - output_block_size = 1024; - break; - - case MAX_DEPTH_OPTION: /* --max-depth=N */ - { - unsigned long int tmp_ulong; - if (xstrtoul (optarg, NULL, 0, &tmp_ulong, NULL) == LONGINT_OK - && tmp_ulong <= SIZE_MAX) - { - max_depth_specified = true; - max_depth = tmp_ulong; - } - else - { - error (0, 0, _("invalid maximum depth %s"), - quote (optarg)); - ok = false; - } - } - break; - - case MEGABYTES_LONG_OPTION: - error (0, 0, - _("the --megabytes option is deprecated; use -m instead")); - /* fall through */ - case 'm': - human_output_opts = 0; - output_block_size = 1024 * 1024; - break; - - case 'l': - opt_count_all = true; - break; - - case 's': - opt_summarize_only = true; - break; - - case 'x': - bit_flags |= FTS_XDEV; - break; - - case 'B': - human_output_opts = human_options (optarg, true, &output_block_size); - break; - - case 'D': /* This will eventually be 'H' (-H), too. */ - symlink_deref_bits = FTS_COMFOLLOW | FTS_PHYSICAL; - break; - - case 'L': /* --dereference */ - symlink_deref_bits = FTS_LOGICAL; - break; - - case 'P': /* --no-dereference */ - symlink_deref_bits = FTS_PHYSICAL; - break; - - case 'S': - opt_separate_dirs = true; - break; - - case 'X': - if (add_exclude_file (add_exclude, exclude, optarg, - EXCLUDE_WILDCARDS, '\n')) - { - error (0, errno, "%s", quotearg_colon (optarg)); - ok = false; - } - break; - - case FILES0_FROM_OPTION: - files_from = optarg; - break; - - case EXCLUDE_OPTION: - add_exclude (exclude, optarg, EXCLUDE_WILDCARDS); - break; - - case TIME_OPTION: - opt_time = true; - time_type = - (optarg - ? XARGMATCH ("--time", optarg, time_args, time_types) - : time_mtime); - break; - - case TIME_STYLE_OPTION: - time_style = optarg; - break; - - case_GETOPT_HELP_CHAR; - - case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); - - default: - ok = false; - } + case '0': + opt_nul_terminate_output = true; + break; + + case 'a': + opt_all = true; + break; + + case APPARENT_SIZE_OPTION: + apparent_size = true; + break; + + case 'b': + apparent_size = true; + human_output_opts = 0; + output_block_size = 1; + break; + + case 'c': + print_grand_total = true; + break; + + case 'h': + human_output_opts = human_autoscale | human_SI | human_base_1024; + output_block_size = 1; + break; + + case HUMAN_SI_OPTION: + human_output_opts = human_autoscale | human_SI; + output_block_size = 1; + break; + + case 'k': + human_output_opts = 0; + output_block_size = 1024; + break; + + case 'd': /* --max-depth=N */ + { + unsigned long int tmp_ulong; + if (xstrtoul (optarg, NULL, 0, &tmp_ulong, NULL) == LONGINT_OK + && tmp_ulong <= SIZE_MAX) + { + max_depth_specified = true; + max_depth = tmp_ulong; + } + else + { + error (0, 0, _("invalid maximum depth %s"), + quote (optarg)); + ok = false; + } + } + break; + + case 'm': + human_output_opts = 0; + output_block_size = 1024 * 1024; + break; + + case 'l': + opt_count_all = true; + break; + + case 's': + opt_summarize_only = true; + break; + + case 't': + { + enum strtol_error e; + e = xstrtoimax (optarg, NULL, 0, &opt_threshold, "kKmMGTPEZY0"); + if (e != LONGINT_OK) + xstrtol_fatal (e, oi, c, long_options, optarg); + if (opt_threshold == 0 && *optarg == '-') + { + /* Do not allow -0, as this wouldn't make sense anyway. */ + error (EXIT_FAILURE, 0, _("invalid --threshold argument '-0'")); + } + } + break; + + case 'x': + bit_flags |= FTS_XDEV; + break; + + case 'B': + { + enum strtol_error e = human_options (optarg, &human_output_opts, + &output_block_size); + if (e != LONGINT_OK) + xstrtol_fatal (e, oi, c, long_options, optarg); + } + break; + + case 'H': /* NOTE: before 2008-12, -H was equivalent to --si. */ + case 'D': + symlink_deref_bits = FTS_COMFOLLOW | FTS_PHYSICAL; + break; + + case 'L': /* --dereference */ + symlink_deref_bits = FTS_LOGICAL; + break; + + case 'P': /* --no-dereference */ + symlink_deref_bits = FTS_PHYSICAL; + break; + + case 'S': + opt_separate_dirs = true; + break; + + case 'X': + if (add_exclude_file (add_exclude, exclude, optarg, + EXCLUDE_WILDCARDS, '\n')) + { + error (0, errno, "%s", quotef (optarg)); + ok = false; + } + break; + + case FILES0_FROM_OPTION: + files_from = optarg; + break; + + case EXCLUDE_OPTION: + add_exclude (exclude, optarg, EXCLUDE_WILDCARDS); + break; + + case INODES_OPTION: + opt_inodes = true; + break; + + case TIME_OPTION: + opt_time = true; + time_type = + (optarg + ? XARGMATCH ("--time", optarg, time_args, time_types) + : time_mtime); + break; + + case TIME_STYLE_OPTION: + time_style = optarg; + break; + + case_GETOPT_HELP_CHAR; + + case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); + + default: + ok = false; + } } if (!ok) usage (EXIT_FAILURE); - if (opt_all & opt_summarize_only) + if (opt_all && opt_summarize_only) { error (0, 0, _("cannot both summarize and show all entries")); usage (EXIT_FAILURE); @@ -875,7 +932,7 @@ main (int argc, char **argv) if (opt_summarize_only && max_depth_specified && max_depth == 0) { error (0, 0, - _("warning: summarizing is the same as using --max-depth=0")); + _("warning: summarizing is the same as using --max-depth=0")); } if (opt_summarize_only && max_depth_specified && max_depth != 0) @@ -888,134 +945,192 @@ main (int argc, char **argv) if (opt_summarize_only) max_depth = 0; + if (opt_inodes) + { + if (apparent_size) + { + error (0, 0, _("warning: options --apparent-size and -b are " + "ineffective with --inodes")); + } + output_block_size = 1; + } + /* Process time style if printing last times. */ if (opt_time) { if (! time_style) - { - time_style = getenv ("TIME_STYLE"); - - /* Ignore TIMESTYLE="locale", for compatibility with ls. */ - if (! time_style || STREQ (time_style, "locale")) - time_style = "long-iso"; - else if (*time_style == '+') - { - /* Ignore anything after a newline, for compatibility - with ls. */ - char *p = strchr (time_style, '\n'); - if (p) - *p = '\0'; - } - else - { - /* Ignore "posix-" prefix, for compatibility with ls. */ - static char const posix_prefix[] = "posix-"; - while (strncmp (time_style, posix_prefix, sizeof posix_prefix - 1) - == 0) - time_style += sizeof posix_prefix - 1; - } - } + { + time_style = getenv ("TIME_STYLE"); + + /* Ignore TIMESTYLE="locale", for compatibility with ls. */ + if (! time_style || STREQ (time_style, "locale")) + time_style = "long-iso"; + else if (*time_style == '+') + { + /* Ignore anything after a newline, for compatibility + with ls. */ + char *p = strchr (time_style, '\n'); + if (p) + *p = '\0'; + } + else + { + /* Ignore "posix-" prefix, for compatibility with ls. */ + static char const posix_prefix[] = "posix-"; + static const size_t prefix_len = sizeof posix_prefix - 1; + while (STREQ_LEN (time_style, posix_prefix, prefix_len)) + time_style += prefix_len; + } + } if (*time_style == '+') - time_format = time_style + 1; + time_format = time_style + 1; else { switch (XARGMATCH ("time style", time_style, time_style_args, time_style_types)) { - case full_iso_time_style: - time_format = "%Y-%m-%d %H:%M:%S.%N %z"; - break; + case full_iso_time_style: + time_format = "%Y-%m-%d %H:%M:%S.%N %z"; + break; - case long_iso_time_style: - time_format = "%Y-%m-%d %H:%M"; - break; + case long_iso_time_style: + time_format = "%Y-%m-%d %H:%M"; + break; - case iso_time_style: - time_format = "%Y-%m-%d"; - break; + case iso_time_style: + time_format = "%Y-%m-%d"; + break; } } } + struct argv_iterator *ai; if (files_from) { /* When using --files0-from=F, you may not specify any files - on the command-line. */ + on the command-line. */ if (optind < argc) - { - error (0, 0, _("extra operand %s"), quote (argv[optind])); - fprintf (stderr, "%s\n", - _("File operands cannot be combined with --files0-from.")); - usage (EXIT_FAILURE); - } + { + error (0, 0, _("extra operand %s"), quote (argv[optind])); + fprintf (stderr, "%s\n", + _("file operands cannot be combined with --files0-from")); + usage (EXIT_FAILURE); + } if (! (STREQ (files_from, "-") || freopen (files_from, "r", stdin))) - error (EXIT_FAILURE, errno, _("cannot open %s for reading"), - quote (files_from)); - - readtokens0_init (&tok); + error (EXIT_FAILURE, errno, _("cannot open %s for reading"), + quoteaf (files_from)); - if (! readtokens0 (stdin, &tok) || fclose (stdin) != 0) - error (EXIT_FAILURE, 0, _("cannot read file names from %s"), - quote (files_from)); + ai = argv_iter_init_stream (stdin); - files = tok.tok; + /* It's not easy here to count the arguments, so assume the + worst. */ + hash_all = true; } else { - files = (optind < argc ? argv + optind : cwd_only); + char **files = (optind < argc ? argv + optind : cwd_only); + ai = argv_iter_init_argv (files); + + /* Hash all dev,ino pairs if there are multiple arguments, or if + following non-command-line symlinks, because in either case a + file with just one hard link might be seen more than once. */ + hash_all = (optind + 1 < argc || symlink_deref_bits == FTS_LOGICAL); } - /* Initialize the hash structure for inode numbers. */ - hash_init (); + if (!ai) + xalloc_die (); - /* Report and filter out any empty file names before invoking fts. - This works around a glitch in fts, which fails immediately - (without looking at the other file names) when given an empty - file name. */ - { - size_t i = 0; - size_t j; - - for (j = 0; ; j++) - { - if (i != j) - files[i] = files[j]; - - if ( ! files[i]) - break; - - if (files[i][0]) - i++; - else - { - if (files_from) - { - /* Using the standard `filename:line-number:' prefix here is - not totally appropriate, since NUL is the separator, not NL, - but it might be better than nothing. */ - unsigned long int file_number = j + 1; - error (0, 0, "%s:%lu: %s", quotearg_colon (files_from), - file_number, _("invalid zero-length file name")); - } - else - error (0, 0, "%s", _("invalid zero-length file name")); - } - } - - ok = (i == j); - } + /* Initialize the set of dev,inode pairs. */ + di_files = di_set_alloc (); + if (!di_files) + xalloc_die (); + + /* If not hashing everything, process_file won't find cycles on its + own, so ask fts_read to check for them accurately. */ + if (opt_count_all || ! hash_all) + bit_flags |= FTS_TIGHT_CYCLE_CHECK; bit_flags |= symlink_deref_bits; - ok &= du_files (files, bit_flags); + static char *temp_argv[] = { NULL, NULL }; - /* This isn't really necessary, but it does ensure we - exercise this function. */ - if (files_from) - readtokens0_free (&tok); + while (true) + { + bool skip_file = false; + enum argv_iter_err ai_err; + char *file_name = argv_iter (ai, &ai_err); + if (!file_name) + { + switch (ai_err) + { + case AI_ERR_EOF: + goto argv_iter_done; + case AI_ERR_READ: + error (0, errno, _("%s: read error"), + quotef (files_from)); + ok = false; + goto argv_iter_done; + case AI_ERR_MEM: + xalloc_die (); + default: + assert (!"unexpected error code from argv_iter"); + } + } + if (files_from && STREQ (files_from, "-") && STREQ (file_name, "-")) + { + /* Give a better diagnostic in an unusual case: + printf - | du --files0-from=- */ + error (0, 0, _("when reading file names from stdin, " + "no file name of %s allowed"), + quoteaf (file_name)); + skip_file = true; + } + + /* Report and skip any empty file names before invoking fts. + This works around a glitch in fts, which fails immediately + (without looking at the other file names) when given an empty + file name. */ + if (!file_name[0]) + { + /* Diagnose a zero-length file name. When it's one + among many, knowing the record number may help. + FIXME: currently print the record number only with + --files0-from=FILE. Maybe do it for argv, too? */ + if (files_from == NULL) + error (0, 0, "%s", _("invalid zero-length file name")); + else + { + /* Using the standard 'filename:line-number:' prefix here is + not totally appropriate, since NUL is the separator, not NL, + but it might be better than nothing. */ + unsigned long int file_number = argv_iter_n_args (ai); + error (0, 0, "%s:%lu: %s", quotef (files_from), + file_number, _("invalid zero-length file name")); + } + skip_file = true; + } + + if (skip_file) + ok = false; + else + { + temp_argv[0] = file_name; + ok &= du_files (temp_argv, bit_flags); + } + } + argv_iter_done: + + argv_iter_free (ai); + di_set_free (di_files); + if (di_mnt) + di_set_free (di_mnt); - hash_free (htab); + if (files_from && (ferror (stdin) || fclose (stdin) != 0) && ok) + error (EXIT_FAILURE, 0, _("error reading %s"), quoteaf (files_from)); + + if (print_grand_total) + print_size (&tot_dui, _("total")); - exit (ok ? EXIT_SUCCESS : EXIT_FAILURE); + return ok ? EXIT_SUCCESS : EXIT_FAILURE; } |