summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoel Rosdahl <joel@rosdahl.net>2023-03-02 18:14:49 +0100
committerJoel Rosdahl <joel@rosdahl.net>2023-03-04 10:10:21 +0100
commitac98dee75104f31c95ebe57909b43ff88e336c45 (patch)
tree32183a113a8a24ef3b55bef237eed3073e5af93c
parent32e35fc80784adbbfe6d35c001b1e6c36a599aaa (diff)
downloadccache-ac98dee75104f31c95ebe57909b43ff88e336c45.tar.gz
feat: Improve cache size presentation and specification
Aligned how cache size is presented (in "ccache --show-stats", "ccache --show-compression", "ccache --recompress", debug logs, etc.) and specified (in configuration files, "ccache --max-size" and "ccache --trim-max-size"). The size units are now formatted according to the type of size unit prefix used for the max_size/CCACHE_MAXSIZE setting: a decimal size unit prefix (k/M/G/T with or without B for bytes) in max_size means using decimal size unit prefix for presented sizes, and similar for binary size unit prefixes (Ki/Mi/Gi/Ti with or without B for bytes). If no unit is specified, GiB is assumed, . For example, "ccache -M 10" means 10 GiB. Also aligned how cache sizes are calculated. Now all sizes are computed as "apparent size", i.e., rounded up to the disk block size. This means that the cache size in "--show-stats" and the sizes presented in "--show-compression" and "--recompress" now match.
-rw-r--r--doc/MANUAL.adoc12
-rw-r--r--src/Config.cpp14
-rw-r--r--src/Config.hpp19
-rw-r--r--src/core/FileRecompressor.cpp4
-rw-r--r--src/core/Statistics.cpp15
-rw-r--r--src/core/mainoptions.cpp90
-rw-r--r--src/storage/local/LocalStorage.cpp119
-rw-r--r--src/storage/local/LocalStorage.hpp10
-rw-r--r--src/util/string.cpp48
-rw-r--r--src/util/string.hpp14
-rw-r--r--unittest/test_Config.cpp6
-rw-r--r--unittest/test_util_string.cpp219
12 files changed, 371 insertions, 199 deletions
diff --git a/doc/MANUAL.adoc b/doc/MANUAL.adoc
index 4645267b..76a5663b 100644
--- a/doc/MANUAL.adoc
+++ b/doc/MANUAL.adoc
@@ -135,10 +135,10 @@ documentation.
*-M* _SIZE_, *--max-size* _SIZE_::
Set the maximum size of the files stored in the cache. _SIZE_ should be a
- number followed by an optional suffix: k, M, G, T (decimal), Ki, Mi, Gi or
- Ti (binary). The default suffix is G. Use 0 for no limit. The value is
- stored in a configuration file in the cache directory and applies to all
- future compilations.
+ number followed by an optional suffix: kB, MB, GB, TB (decimal), KiB, MiB,
+ GiB or TiB (binary). The default suffix is GiB. Use 0 for no limit. The
+ value is stored in a configuration file in the cache directory and applies
+ to all future compilations.
*-X* _LEVEL_, *--recompress* _LEVEL_::
@@ -210,8 +210,8 @@ directory to a certain size, use `CCACHE_MAXSIZE=_SIZE_ ccache -c`.
*--trim-max-size* _SIZE_::
Specify the maximum size for `--trim-dir`. _SIZE_ should be a number
- followed by an optional suffix: k, M, G, T (decimal), Ki, Mi, Gi or Ti
- (binary). The default suffix is G.
+ followed by an optional suffix: kB, MB, GB, TB (decimal), KiB, MiB, GiB or
+ TiB (binary). The default suffix is GiB.
*--trim-method* _METHOD_::
diff --git a/src/Config.cpp b/src/Config.cpp
index bf79acea..840925ca 100644
--- a/src/Config.cpp
+++ b/src/Config.cpp
@@ -247,9 +247,9 @@ format_bool(bool value)
}
std::string
-format_cache_size(uint64_t value)
+format_cache_size(uint64_t value, util::SizeUnitPrefixType prefix_type)
{
- return util::format_parsable_size_with_suffix(value);
+ return util::format_human_readable_size(value, prefix_type);
}
CompilerType
@@ -792,7 +792,7 @@ Config::get_string_value(const std::string& key) const
return FMT("{}", m_max_files);
case ConfigItem::max_size:
- return format_cache_size(m_max_size);
+ return format_cache_size(m_max_size, m_size_suffix_type);
case ConfigItem::msvc_dep_prefix:
return m_msvc_dep_prefix;
@@ -1036,9 +1036,13 @@ Config::set_item(const std::string& key,
util::parse_unsigned(value, std::nullopt, std::nullopt, "max_files"));
break;
- case ConfigItem::max_size:
- m_max_size = util::value_or_throw<core::Error>(util::parse_size(value));
+ case ConfigItem::max_size: {
+ const auto [size, prefix_type] =
+ util::value_or_throw<core::Error>(util::parse_size(value));
+ m_max_size = size;
+ m_size_suffix_type = prefix_type;
break;
+ }
case ConfigItem::msvc_dep_prefix:
m_msvc_dep_prefix = Util::expand_environment_variables(value);
diff --git a/src/Config.hpp b/src/Config.hpp
index 43c6a7fc..90569667 100644
--- a/src/Config.hpp
+++ b/src/Config.hpp
@@ -21,6 +21,7 @@
#include "NonCopyable.hpp"
#include <core/Sloppiness.hpp>
+#include <util/string.hpp>
#include <sys/types.h>
@@ -102,6 +103,7 @@ public:
// Return true for MSVC (cl.exe), clang-cl, and icl.
bool is_compiler_group_msvc() const;
+ util::SizeUnitPrefixType size_unit_prefix_type() const;
std::string default_temporary_dir() const;
void set_base_dir(const std::string& value);
@@ -117,7 +119,6 @@ public:
void set_ignore_options(const std::string& value);
void set_inode_cache(bool value);
void set_max_files(uint64_t value);
- void set_max_size(uint64_t value);
void set_msvc_dep_prefix(const std::string& value);
void set_run_second_cpp(bool value);
void set_temporary_dir(const std::string& value);
@@ -190,7 +191,7 @@ private:
bool m_keep_comments_cpp = false;
std::string m_log_file;
uint64_t m_max_files = 0;
- uint64_t m_max_size = 5ULL * 1000 * 1000 * 1000;
+ uint64_t m_max_size = 5ULL * 1024 * 1024 * 1024;
std::string m_msvc_dep_prefix = "Note: including file:";
std::string m_path;
bool m_pch_external_checksum = false;
@@ -211,6 +212,8 @@ private:
std::optional<mode_t> m_umask;
bool m_temporary_dir_configured_explicitly = false;
+ util::SizeUnitPrefixType m_size_suffix_type =
+ util::SizeUnitPrefixType::binary;
std::unordered_map<std::string /*key*/, std::string /*origin*/> m_origins;
@@ -494,6 +497,12 @@ Config::umask() const
return m_umask;
}
+inline util::SizeUnitPrefixType
+Config::size_unit_prefix_type() const
+{
+ return m_size_suffix_type;
+}
+
inline void
Config::set_base_dir(const std::string& value)
{
@@ -576,12 +585,6 @@ Config::set_max_files(uint64_t value)
}
inline void
-Config::set_max_size(uint64_t value)
-{
- m_max_size = value;
-}
-
-inline void
Config::set_msvc_dep_prefix(const std::string& value)
{
m_msvc_dep_prefix = value;
diff --git a/src/core/FileRecompressor.cpp b/src/core/FileRecompressor.cpp
index 636d6761..92aa6c7a 100644
--- a/src/core/FileRecompressor.cpp
+++ b/src/core/FileRecompressor.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2022 Joel Rosdahl and other contributors
+// Copyright (C) 2022-2023 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
@@ -64,7 +64,7 @@ FileRecompressor::recompress(const Stat& stat,
util::set_timestamps(stat.path(), stat.mtime(), stat.atime());
}
- m_content_size += header.entry_size;
+ m_content_size += util::likely_size_on_disk(header.entry_size);
m_old_size += stat.size_on_disk();
m_new_size += (new_stat ? *new_stat : stat).size_on_disk();
diff --git a/src/core/Statistics.cpp b/src/core/Statistics.cpp
index 459d1f13..aa928a72 100644
--- a/src/core/Statistics.cpp
+++ b/src/core/Statistics.cpp
@@ -305,7 +305,13 @@ Statistics::format_human_readable(const Config& config,
add_ratio_row(table, " Preprocessed:", p_hits, p_hits + p_misses);
}
- const uint64_t g = 1'000'000'000;
+ const char* size_unit =
+ config.size_unit_prefix_type() == util::SizeUnitPrefixType::binary ? "GiB"
+ : "GB";
+ const uint64_t size_divider =
+ config.size_unit_prefix_type() == util::SizeUnitPrefixType::binary
+ ? 1024 * 1024 * 1024
+ : 1000 * 1000 * 1000;
const uint64_t local_hits = S(local_storage_hit);
const uint64_t local_misses = S(local_storage_miss);
const uint64_t local_reads =
@@ -326,12 +332,13 @@ Statistics::format_human_readable(const Config& config,
}
if (!from_log) {
std::vector<C> size_cells{
- " Cache size (GB):",
- C(FMT("{:.1f}", static_cast<double>(local_size) / g)).right_align()};
+ FMT(" Cache size ({}):", size_unit),
+ C(FMT("{:.1f}", static_cast<double>(local_size) / size_divider))
+ .right_align()};
if (config.max_size() != 0) {
size_cells.emplace_back("/");
size_cells.emplace_back(
- C(FMT("{:.1f}", static_cast<double>(config.max_size()) / g))
+ C(FMT("{:.1f}", static_cast<double>(config.max_size()) / size_divider))
.right_align());
size_cells.emplace_back(percent(local_size, config.max_size()));
}
diff --git a/src/core/mainoptions.cpp b/src/core/mainoptions.cpp
index 1fba40db..bf2f3deb 100644
--- a/src/core/mainoptions.cpp
+++ b/src/core/mainoptions.cpp
@@ -28,6 +28,7 @@
#include <TemporaryFile.hpp>
#include <ThreadPool.hpp>
#include <UmaskScope.hpp>
+#include <assertions.hpp>
#include <ccache.hpp>
#include <core/CacheEntry.hpp>
#include <core/FileRecompressor.hpp>
@@ -112,8 +113,9 @@ Common options:
-F, --max-files NUM set maximum number of files in cache to NUM (use
0 for no limit)
-M, --max-size SIZE set maximum size of cache to SIZE (use 0 for no
- limit); available suffixes: k, M, G, T (decimal)
- and Ki, Mi, Gi, Ti (binary); default suffix: G
+ limit); available suffixes: kB, MB, GB, TB
+ (decimal) and KiB, MiB, GiB, TiB (binary);
+ default suffix: GiB
-X, --recompress LEVEL recompress the cache to level LEVEL (integer or
"uncompressed")
--recompress-threads THREADS
@@ -140,8 +142,8 @@ Options for remote file-based storage:
(note: don't use this option to trim the local
cache)
--trim-max-size SIZE specify the maximum size for --trim-dir;
- available suffixes: k, M, G, T (decimal) and Ki,
- Mi, Gi, Ti (binary); default suffix: G
+ available suffixes: kB, MB, GB, TB (decimal) and
+ KiB, MiB, GiB, TiB (binary); default suffix: GiB
--trim-method METHOD specify the method (atime or mtime) for
--trim-dir; default: atime
--trim-recompress LEVEL
@@ -233,39 +235,61 @@ inspect_path(const std::string& path)
}
static void
-print_compression_statistics(const storage::local::CompressionStatistics& cs)
+print_compression_statistics(const Config& config,
+ const storage::local::CompressionStatistics& cs)
{
- const double ratio = cs.compr_size > 0
- ? static_cast<double>(cs.content_size) / cs.compr_size
+ const double ratio = cs.actual_size > 0
+ ? static_cast<double>(cs.content_size) / cs.actual_size
: 0.0;
const double savings = ratio > 0.0 ? 100.0 - (100.0 / ratio) : 0.0;
+ auto human_readable = [&](uint64_t size) {
+ return util::format_human_readable_size(size,
+ config.size_unit_prefix_type());
+ };
+
+ const auto [total_data_quantity, total_data_unit] = util::split_once(
+ human_readable(cs.actual_size + cs.incompressible_size), ' ');
+ ASSERT(total_data_unit);
+ const auto [compressed_data_quantity, compressed_data_unit] =
+ util::split_once(human_readable(cs.actual_size), ' ');
+ ASSERT(compressed_data_unit);
+ const auto [original_data_quantity, original_data_unit] =
+ util::split_once(human_readable(cs.content_size), ' ');
+ ASSERT(original_data_unit);
+ const auto [incompressible_data_quantity, incompressible_data_unit] =
+ util::split_once(human_readable(cs.incompressible_size), ' ');
+ ASSERT(incompressible_data_unit);
+
using C = util::TextTable::Cell;
- auto human_readable = util::format_human_readable_size;
util::TextTable table;
table.add_row({
"Total data:",
- C(human_readable(cs.compr_size + cs.incompr_size)).right_align(),
- FMT("({} disk blocks)", human_readable(cs.on_disk_size)),
+ C(total_data_quantity).right_align(),
+ *total_data_unit,
});
table.add_row({
"Compressed data:",
- C(human_readable(cs.compr_size)).right_align(),
+ C(compressed_data_quantity).right_align(),
+ *compressed_data_unit,
FMT("({:.1f}% of original size)", 100.0 - savings),
});
table.add_row({
" Original size:",
- C(human_readable(cs.content_size)).right_align(),
+ C(original_data_quantity).right_align(),
+ *original_data_unit,
});
table.add_row({
" Compression ratio:",
- C(FMT("{:.3f} x ", ratio)).right_align(),
+ C(FMT("{:.3f}", ratio)).right_align(),
+ "x",
FMT("({:.1f}% space savings)", savings),
});
table.add_row({
"Incompressible data:",
- C(human_readable(cs.incompr_size)).right_align(),
+ C(incompressible_data_quantity).right_align(),
+ *incompressible_data_unit,
});
PRINT_RAW(stdout, table.render());
@@ -274,6 +298,7 @@ print_compression_statistics(const storage::local::CompressionStatistics& cs)
static void
trim_dir(const std::string& dir,
const uint64_t trim_max_size,
+ const util::SizeUnitPrefixType suffix_type,
const bool trim_lru_mtime,
std::optional<std::optional<int8_t>> recompress_level,
uint32_t recompress_threads)
@@ -329,11 +354,11 @@ trim_dir(const std::string& dir,
recompression_diff = recompressor.new_size() - recompressor.old_size();
PRINT(stdout,
"Recompressed {} to {} ({})\n",
- util::format_human_readable_size(incompressible_size
- + recompressor.old_size()),
- util::format_human_readable_size(incompressible_size
- + recompressor.new_size()),
- util::format_human_readable_diff(recompression_diff));
+ util::format_human_readable_size(
+ incompressible_size + recompressor.old_size(), suffix_type),
+ util::format_human_readable_size(
+ incompressible_size + recompressor.new_size(), suffix_type),
+ util::format_human_readable_diff(recompression_diff, suffix_type));
}
uint64_t size_after_recompression = initial_size + recompression_diff;
@@ -352,9 +377,10 @@ trim_dir(const std::string& dir,
PRINT(stdout,
"Trimmed {} to {} ({}, {}{} file{})\n",
- util::format_human_readable_size(size_after_recompression),
- util::format_human_readable_size(final_size),
- util::format_human_readable_diff(final_size - size_after_recompression),
+ util::format_human_readable_size(size_after_recompression, suffix_type),
+ util::format_human_readable_size(final_size, suffix_type),
+ util::format_human_readable_diff(final_size - size_after_recompression,
+ suffix_type),
removed_files == 0 ? "" : "-",
removed_files,
removed_files == 1 ? "" : "s");
@@ -452,6 +478,7 @@ process_main_options(int argc, const char* const* argv)
uint8_t verbosity = 0;
std::optional<uint64_t> trim_max_size;
+ std::optional<util::SizeUnitPrefixType> trim_suffix_type;
bool trim_lru_mtime = false;
std::optional<std::optional<int8_t>> trim_recompress;
uint32_t trim_recompress_threads = std::thread::hardware_concurrency();
@@ -484,9 +511,13 @@ process_main_options(int argc, const char* const* argv)
arg, 1, std::numeric_limits<uint32_t>::max(), "threads"));
break;
- case TRIM_MAX_SIZE:
- trim_max_size = util::value_or_throw<Error>(util::parse_size(arg));
+ case TRIM_MAX_SIZE: {
+ auto [size, suffix_type] =
+ util::value_or_throw<Error>(util::parse_size(arg));
+ trim_max_size = size;
+ trim_suffix_type = suffix_type;
break;
+ }
case TRIM_METHOD:
trim_lru_mtime = (arg == "ctime");
@@ -657,14 +688,16 @@ process_main_options(int argc, const char* const* argv)
}
case 'M': { // --max-size
- uint64_t size = util::value_or_throw<Error>(util::parse_size(arg));
+ auto [size, suffix_type] =
+ util::value_or_throw<Error>(util::parse_size(arg));
+ uint64_t max_size = size;
config.set_value_in_file(config.config_path(), "max_size", arg);
- if (size == 0) {
+ if (max_size == 0) {
PRINT_RAW(stdout, "Unset cache size limit\n");
} else {
PRINT(stdout,
"Set cache size limit to {}\n",
- util::format_human_readable_size(size));
+ util::format_human_readable_size(max_size, suffix_type));
}
break;
}
@@ -715,6 +748,7 @@ process_main_options(int argc, const char* const* argv)
}
trim_dir(arg,
*trim_max_size,
+ *trim_suffix_type,
trim_lru_mtime,
trim_recompress,
trim_recompress_threads);
@@ -739,7 +773,7 @@ process_main_options(int argc, const char* const* argv)
if (isatty(STDOUT_FILENO)) {
PRINT_RAW(stdout, "\n\n");
}
- print_compression_statistics(compression_statistics);
+ print_compression_statistics(config, compression_statistics);
break;
}
diff --git a/src/storage/local/LocalStorage.cpp b/src/storage/local/LocalStorage.cpp
index 706d1eae..fe423989 100644
--- a/src/storage/local/LocalStorage.cpp
+++ b/src/storage/local/LocalStorage.cpp
@@ -39,6 +39,7 @@
#include <storage/local/StatsFile.hpp>
#include <storage/local/util.hpp>
#include <util/Duration.hpp>
+#include <util/TextTable.hpp>
#include <util/expected.hpp>
#include <util/file.hpp>
#include <util/string.hpp>
@@ -51,6 +52,7 @@
#include <algorithm>
#include <atomic>
+#include <cstdlib>
#include <memory>
#include <numeric>
#include <string>
@@ -668,13 +670,12 @@ LocalStorage::get_compression_statistics(
for (size_t i = 0; i < files.size(); ++i) {
const auto& cache_file = files[i];
- cs.on_disk_size += cache_file.size_on_disk();
try {
core::CacheEntry::Header header(cache_file.path());
- cs.compr_size += cache_file.size();
- cs.content_size += header.entry_size;
+ cs.actual_size += cache_file.size_on_disk();
+ cs.content_size += util::likely_size_on_disk(header.entry_size);
} catch (core::Error&) {
- cs.incompr_size += cache_file.size();
+ cs.incompressible_size += cache_file.size_on_disk();
}
l2_progress_receiver(0.2 + 0.8 * i / files.size());
}
@@ -775,42 +776,69 @@ LocalStorage::recompress(const std::optional<int8_t> level,
: 0.0;
const double new_savings =
new_ratio > 0.0 ? 100.0 - (100.0 / new_ratio) : 0.0;
- const int64_t size_difference =
- static_cast<int64_t>(recompressor.new_size())
- - static_cast<int64_t>(recompressor.old_size());
-
- const std::string old_compr_size_str =
- util::format_human_readable_size(recompressor.old_size());
- const std::string new_compr_size_str =
- util::format_human_readable_size(recompressor.new_size());
- const std::string content_size_str =
- util::format_human_readable_size(recompressor.content_size());
- const std::string incompr_size_str =
- util::format_human_readable_size(incompressible_size);
- const std::string size_difference_str =
- FMT("{}{}",
- size_difference < 0 ? "-" : (size_difference > 0 ? "+" : " "),
- util::format_human_readable_size(
- size_difference < 0 ? -size_difference : size_difference));
-
- PRINT(stdout, "Original data: {:>8s}\n", content_size_str);
- PRINT(stdout,
- "Old compressed data: {:>8s} ({:.1f}% of original size)\n",
- old_compr_size_str,
- 100.0 - old_savings);
- PRINT(stdout,
- " - Compression ratio: {:>5.3f} x ({:.1f}% space savings)\n",
- old_ratio,
- old_savings);
- PRINT(stdout,
- "New compressed data: {:>8s} ({:.1f}% of original size)\n",
- new_compr_size_str,
- 100.0 - new_savings);
- PRINT(stdout,
- " - Compression ratio: {:>5.3f} x ({:.1f}% space savings)\n",
- new_ratio,
- new_savings);
- PRINT(stdout, "Size change: {:>9s}\n", size_difference_str);
+ const int64_t size_diff = static_cast<int64_t>(recompressor.new_size())
+ - static_cast<int64_t>(recompressor.old_size());
+
+ auto human_readable = [&](uint64_t size) {
+ return util::format_human_readable_size(size,
+ m_config.size_unit_prefix_type());
+ };
+
+ const auto [old_compr_size_quantity, old_compr_size_unit] =
+ util::split_once(human_readable(recompressor.old_size()), ' ');
+ ASSERT(old_compr_size_unit);
+ const auto [new_compr_size_quantity, new_compr_size_unit] =
+ util::split_once(human_readable(recompressor.new_size()), ' ');
+ ASSERT(new_compr_size_unit);
+ const auto [content_size_quantity, content_size_unit] =
+ util::split_once(human_readable(recompressor.content_size()), ' ');
+ ASSERT(content_size_unit);
+ const auto [incompr_size_quantity, incompr_size_unit] =
+ util::split_once(human_readable(incompressible_size), ' ');
+ ASSERT(incompr_size_unit);
+ const auto [size_diff_quantity, size_diff_unit] =
+ util::split_once(human_readable(std::abs(size_diff)), ' ');
+ ASSERT(size_diff_unit);
+
+ using C = util::TextTable::Cell;
+ util::TextTable table;
+
+ table.add_row({
+ "Original data:",
+ C(content_size_quantity).right_align(),
+ *content_size_unit,
+ });
+ table.add_row({
+ "Old compressed data:",
+ C(old_compr_size_quantity).right_align(),
+ *old_compr_size_unit,
+ FMT("({:.1f}% of original size)", 100.0 - old_savings),
+ });
+ table.add_row({
+ " Compression ratio:",
+ C(FMT("{:5.3f}", old_ratio)).right_align(),
+ "x",
+ FMT("({:.1f}% space savings)", old_savings),
+ });
+ table.add_row({
+ "New compressed data:",
+ C(new_compr_size_quantity).right_align(),
+ *new_compr_size_unit,
+ FMT("({:.1f}% of original size)", 100.0 - new_savings),
+ });
+ table.add_row({
+ " Compression ratio:",
+ C(FMT("{:5.3f}", new_ratio)).right_align(),
+ "x",
+ FMT("({:.1f}% space savings)", new_savings),
+ });
+ table.add_row({
+ "Size change:",
+ C(FMT("{}{}", size_diff < 0 ? "-" : "", size_diff_quantity)).right_align(),
+ *size_diff_unit,
+ });
+
+ PRINT_RAW(stdout, table.render());
}
// Private methods
@@ -1173,13 +1201,16 @@ LocalStorage::evaluate_cleanup()
});
std::string max_size_str =
- m_config.max_size() > 0 ? FMT(
- ", max size {}", util::format_human_readable_size(m_config.max_size()))
- : "";
+ m_config.max_size() > 0
+ ? FMT(", max size {}",
+ util::format_human_readable_size(m_config.max_size(),
+ m_config.size_unit_prefix_type()))
+ : "";
std::string max_files_str =
m_config.max_files() > 0 ? FMT(", max files {}", m_config.max_files()) : "";
std::string info_str = FMT("size {}, files {}{}{}",
- util::format_human_readable_size(total_size),
+ util::format_human_readable_size(
+ total_size, m_config.size_unit_prefix_type()),
total_files,
max_size_str,
max_files_str);
diff --git a/src/storage/local/LocalStorage.hpp b/src/storage/local/LocalStorage.hpp
index 4989ef09..29f8a028 100644
--- a/src/storage/local/LocalStorage.hpp
+++ b/src/storage/local/LocalStorage.hpp
@@ -42,10 +42,14 @@ namespace storage::local {
struct CompressionStatistics
{
- uint64_t compr_size;
+ // Storage that would be needed to store the content of compressible entries
+ // uncompressed (without headers), rounded up to disk blocks.
uint64_t content_size;
- uint64_t incompr_size;
- uint64_t on_disk_size;
+ // Actual size of compressible entries (including headers), rounded up to disk
+ // blocks.
+ uint64_t actual_size;
+ // Actual size of incompressible entries, rounded up to disk blocks.
+ uint64_t incompressible_size;
};
enum class FileType { result, manifest, raw, unknown };
diff --git a/src/util/string.cpp b/src/util/string.cpp
index c33bd9f5..48d4b8b0 100644
--- a/src/util/string.cpp
+++ b/src/util/string.cpp
@@ -28,21 +28,25 @@
namespace util {
std::string
-format_human_readable_diff(int64_t diff)
+format_human_readable_diff(int64_t diff, SizeUnitPrefixType prefix_type)
{
const char* sign = diff == 0 ? "" : (diff > 0 ? "+" : "-");
- return FMT("{}{}", sign, format_human_readable_size(std::abs(diff)));
+ return FMT(
+ "{}{}", sign, format_human_readable_size(std::abs(diff), prefix_type));
}
std::string
-format_human_readable_size(uint64_t size)
+format_human_readable_size(uint64_t size, SizeUnitPrefixType prefix_type)
{
- if (size >= 1000 * 1000 * 1000) {
- return FMT("{:.1f} GB", size / ((double)(1000 * 1000 * 1000)));
- } else if (size >= 1000 * 1000) {
- return FMT("{:.1f} MB", size / ((double)(1000 * 1000)));
- } else if (size >= 1000) {
- return FMT("{:.1f} kB", size / 1000.0);
+ const double factor = prefix_type == SizeUnitPrefixType::binary ? 1024 : 1000;
+ const char* infix = prefix_type == SizeUnitPrefixType::binary ? "i" : "";
+ if (size >= factor * factor * factor) {
+ return FMT("{:.1f} G{}B", size / (factor * factor * factor), infix);
+ } else if (size >= factor * factor) {
+ return FMT("{:.1f} M{}B", size / (factor * factor), infix);
+ } else if (size >= factor) {
+ const char* k = prefix_type == SizeUnitPrefixType::binary ? "K" : "k";
+ return FMT("{:.1f} {}{}B", size / factor, k, infix);
} else if (size == 1) {
return "1 byte";
} else {
@@ -50,18 +54,6 @@ format_human_readable_size(uint64_t size)
}
}
-std::string
-format_parsable_size_with_suffix(uint64_t size)
-{
- if (size >= 1000 * 1000 * 1000) {
- return FMT("{:.1f}G", size / ((double)(1000 * 1000 * 1000)));
- } else if (size >= 1000 * 1000) {
- return FMT("{:.1f}M", size / ((double)(1000 * 1000)));
- } else {
- return FMT("{}", size);
- }
-}
-
nonstd::expected<double, std::string>
parse_double(const std::string& value)
{
@@ -114,7 +106,7 @@ parse_signed(std::string_view value,
}
}
-nonstd::expected<uint64_t, std::string>
+nonstd::expected<std::pair<uint64_t, SizeUnitPrefixType>, std::string>
parse_size(const std::string& value)
{
errno = 0;
@@ -129,8 +121,12 @@ parse_size(const std::string& value)
++p;
}
+ SizeUnitPrefixType prefix_type;
if (*p != '\0') {
- unsigned multiplier = *(p + 1) == 'i' ? 1024 : 1000;
+ prefix_type = *(p + 1) == 'i' ? SizeUnitPrefixType::binary
+ : SizeUnitPrefixType::decimal;
+ unsigned multiplier =
+ prefix_type == SizeUnitPrefixType::binary ? 1024 : 1000;
switch (*p) {
case 'T':
result *= multiplier;
@@ -149,11 +145,11 @@ parse_size(const std::string& value)
return nonstd::make_unexpected(FMT("invalid size: \"{}\"", value));
}
} else {
- // Default suffix: G.
- result *= 1000 * 1000 * 1000;
+ result *= 1024 * 1024 * 1024;
+ prefix_type = SizeUnitPrefixType::binary;
}
- return static_cast<uint64_t>(result);
+ return std::make_pair(static_cast<uint64_t>(result), prefix_type);
}
nonstd::expected<mode_t, std::string>
diff --git a/src/util/string.hpp b/src/util/string.hpp
index 3b23fa4e..6e1fcf4d 100644
--- a/src/util/string.hpp
+++ b/src/util/string.hpp
@@ -36,17 +36,18 @@ namespace util {
// --- Interface ---
+enum class SizeUnitPrefixType { binary, decimal };
+
// Return true if `suffix` is a suffix of `string`.
bool ends_with(std::string_view string, std::string_view suffix);
// Format `diff` as a human-readable string.
-std::string format_human_readable_diff(int64_t diff);
+std::string format_human_readable_diff(int64_t diff,
+ SizeUnitPrefixType prefix_type);
// Format `size` as a human-readable string.
-std::string format_human_readable_size(uint64_t size);
-
-// Format `size` as a parsable string.
-std::string format_parsable_size_with_suffix(uint64_t size);
+std::string format_human_readable_size(uint64_t size,
+ SizeUnitPrefixType prefix_type);
// Join stringified elements of `container` delimited by `delimiter` into a
// string. There must exist an `std::string to_string(T::value_type)` function.
@@ -80,7 +81,8 @@ parse_signed(std::string_view value,
// Parse a "size value", i.e. a string that can end in k, M, G, T (10-based
// suffixes) or Ki, Mi, Gi, Ti (2-based suffixes). For backward compatibility, K
// is also recognized as a synonym of k.
-nonstd::expected<uint64_t, std::string> parse_size(const std::string& value);
+nonstd::expected<std::pair<uint64_t, util::SizeUnitPrefixType>, std::string>
+parse_size(const std::string& value);
// Parse `value` (an octal integer).
nonstd::expected<mode_t, std::string> parse_umask(std::string_view value);
diff --git a/unittest/test_Config.cpp b/unittest/test_Config.cpp
index 3bf27d19..2b48c570 100644
--- a/unittest/test_Config.cpp
+++ b/unittest/test_Config.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2011-2022 Joel Rosdahl and other contributors
+// Copyright (C) 2011-2023 Joel Rosdahl and other contributors
//
// See doc/AUTHORS.adoc for a complete list of contributors.
//
@@ -62,7 +62,7 @@ TEST_CASE("Config: default values")
CHECK_FALSE(config.keep_comments_cpp());
CHECK(config.log_file().empty());
CHECK(config.max_files() == 0);
- CHECK(config.max_size() == static_cast<uint64_t>(5) * 1000 * 1000 * 1000);
+ CHECK(config.max_size() == static_cast<uint64_t>(5) * 1024 * 1024 * 1024);
CHECK(config.msvc_dep_prefix() == "Note: including file:");
CHECK(config.path().empty());
CHECK_FALSE(config.pch_external_checksum());
@@ -466,7 +466,7 @@ TEST_CASE("Config::visit_items")
"(test.conf) keep_comments_cpp = true",
"(test.conf) log_file = lf",
"(test.conf) max_files = 4711",
- "(test.conf) max_size = 98.7M",
+ "(test.conf) max_size = 98.7 MB",
"(test.conf) msvc_dep_prefix = mdp",
"(test.conf) namespace = ns",
"(test.conf) path = p",
diff --git a/unittest/test_util_string.cpp b/unittest/test_util_string.cpp
index e2fb2334..d6afeab7 100644
--- a/unittest/test_util_string.cpp
+++ b/unittest/test_util_string.cpp
@@ -59,59 +59,136 @@ TEST_CASE("util::ends_with")
TEST_CASE("util::format_human_readable_diff")
{
- CHECK(util::format_human_readable_diff(0) == "0 bytes");
- CHECK(util::format_human_readable_diff(1) == "+1 byte");
- CHECK(util::format_human_readable_diff(42) == "+42 bytes");
- CHECK(util::format_human_readable_diff(1949) == "+1.9 kB");
- CHECK(util::format_human_readable_diff(1951) == "+2.0 kB");
- CHECK(util::format_human_readable_diff(499.7 * 1000) == "+499.7 kB");
- CHECK(util::format_human_readable_diff(1000 * 1000) == "+1.0 MB");
- CHECK(util::format_human_readable_diff(1234 * 1000) == "+1.2 MB");
- CHECK(util::format_human_readable_diff(438.5 * 1000 * 1000) == "+438.5 MB");
- CHECK(util::format_human_readable_diff(1000 * 1000 * 1000) == "+1.0 GB");
- CHECK(util::format_human_readable_diff(17.11 * 1000 * 1000 * 1000)
- == "+17.1 GB");
-
- CHECK(util::format_human_readable_diff(-1) == "-1 byte");
- CHECK(util::format_human_readable_diff(-42) == "-42 bytes");
- CHECK(util::format_human_readable_diff(-1949) == "-1.9 kB");
- CHECK(util::format_human_readable_diff(-1951) == "-2.0 kB");
- CHECK(util::format_human_readable_diff(-499.7 * 1000) == "-499.7 kB");
- CHECK(util::format_human_readable_diff(-1000 * 1000) == "-1.0 MB");
- CHECK(util::format_human_readable_diff(-1234 * 1000) == "-1.2 MB");
- CHECK(util::format_human_readable_diff(-438.5 * 1000 * 1000) == "-438.5 MB");
- CHECK(util::format_human_readable_diff(-1000 * 1000 * 1000) == "-1.0 GB");
- CHECK(util::format_human_readable_diff(-17.11 * 1000 * 1000 * 1000)
- == "-17.1 GB");
+ using SUPT = util::SizeUnitPrefixType;
+
+ SUBCASE("binary")
+ {
+ CHECK(util::format_human_readable_diff(0, SUPT::binary) == "0 bytes");
+ CHECK(util::format_human_readable_diff(1, SUPT::binary) == "+1 byte");
+ CHECK(util::format_human_readable_diff(42, SUPT::binary) == "+42 bytes");
+ CHECK(util::format_human_readable_diff(1949, SUPT::binary) == "+1.9 KiB");
+ CHECK(util::format_human_readable_diff(1951, SUPT::binary) == "+1.9 KiB");
+ CHECK(util::format_human_readable_diff(499.7 * 1000, SUPT::binary)
+ == "+488.0 KiB");
+ CHECK(util::format_human_readable_diff(1000 * 1000, SUPT::binary)
+ == "+976.6 KiB");
+ CHECK(util::format_human_readable_diff(1234 * 1000, SUPT::binary)
+ == "+1.2 MiB");
+ CHECK(util::format_human_readable_diff(438.5 * 1000 * 1000, SUPT::binary)
+ == "+418.2 MiB");
+ CHECK(util::format_human_readable_diff(1000 * 1000 * 1000, SUPT::binary)
+ == "+953.7 MiB");
+ CHECK(
+ util::format_human_readable_diff(17.11 * 1000 * 1000 * 1000, SUPT::binary)
+ == "+15.9 GiB");
+
+ CHECK(util::format_human_readable_diff(-1, SUPT::binary) == "-1 byte");
+ CHECK(util::format_human_readable_diff(-42, SUPT::binary) == "-42 bytes");
+ CHECK(util::format_human_readable_diff(-1949, SUPT::binary) == "-1.9 KiB");
+ CHECK(util::format_human_readable_diff(-1951, SUPT::binary) == "-1.9 KiB");
+ CHECK(util::format_human_readable_diff(-499.7 * 1000, SUPT::binary)
+ == "-488.0 KiB");
+ CHECK(util::format_human_readable_diff(-1000 * 1000, SUPT::binary)
+ == "-976.6 KiB");
+ CHECK(util::format_human_readable_diff(-1234 * 1000, SUPT::binary)
+ == "-1.2 MiB");
+ CHECK(util::format_human_readable_diff(-438.5 * 1000 * 1000, SUPT::binary)
+ == "-418.2 MiB");
+ CHECK(util::format_human_readable_diff(-1000 * 1000 * 1000, SUPT::binary)
+ == "-953.7 MiB");
+ CHECK(util::format_human_readable_diff(-17.11 * 1000 * 1000 * 1000,
+ SUPT::binary)
+ == "-15.9 GiB");
+ }
+
+ SUBCASE("decimal")
+ {
+ CHECK(util::format_human_readable_diff(0, SUPT::decimal) == "0 bytes");
+ CHECK(util::format_human_readable_diff(1, SUPT::decimal) == "+1 byte");
+ CHECK(util::format_human_readable_diff(42, SUPT::decimal) == "+42 bytes");
+ CHECK(util::format_human_readable_diff(1949, SUPT::decimal) == "+1.9 kB");
+ CHECK(util::format_human_readable_diff(1951, SUPT::decimal) == "+2.0 kB");
+ CHECK(util::format_human_readable_diff(499.7 * 1000, SUPT::decimal)
+ == "+499.7 kB");
+ CHECK(util::format_human_readable_diff(1000 * 1000, SUPT::decimal)
+ == "+1.0 MB");
+ CHECK(util::format_human_readable_diff(1234 * 1000, SUPT::decimal)
+ == "+1.2 MB");
+ CHECK(util::format_human_readable_diff(438.5 * 1000 * 1000, SUPT::decimal)
+ == "+438.5 MB");
+ CHECK(util::format_human_readable_diff(1000 * 1000 * 1000, SUPT::decimal)
+ == "+1.0 GB");
+ CHECK(util::format_human_readable_diff(17.11 * 1000 * 1000 * 1000,
+ SUPT::decimal)
+ == "+17.1 GB");
+
+ CHECK(util::format_human_readable_diff(-1, SUPT::decimal) == "-1 byte");
+ CHECK(util::format_human_readable_diff(-42, SUPT::decimal) == "-42 bytes");
+ CHECK(util::format_human_readable_diff(-1949, SUPT::decimal) == "-1.9 kB");
+ CHECK(util::format_human_readable_diff(-1951, SUPT::decimal) == "-2.0 kB");
+ CHECK(util::format_human_readable_diff(-499.7 * 1000, SUPT::decimal)
+ == "-499.7 kB");
+ CHECK(util::format_human_readable_diff(-1000 * 1000, SUPT::decimal)
+ == "-1.0 MB");
+ CHECK(util::format_human_readable_diff(-1234 * 1000, SUPT::decimal)
+ == "-1.2 MB");
+ CHECK(util::format_human_readable_diff(-438.5 * 1000 * 1000, SUPT::decimal)
+ == "-438.5 MB");
+ CHECK(util::format_human_readable_diff(-1000 * 1000 * 1000, SUPT::decimal)
+ == "-1.0 GB");
+ CHECK(util::format_human_readable_diff(-17.11 * 1000 * 1000 * 1000,
+ SUPT::decimal)
+ == "-17.1 GB");
+ }
}
TEST_CASE("util::format_human_readable_size")
{
- CHECK(util::format_human_readable_size(0) == "0 bytes");
- CHECK(util::format_human_readable_size(1) == "1 byte");
- CHECK(util::format_human_readable_size(42) == "42 bytes");
- CHECK(util::format_human_readable_size(1949) == "1.9 kB");
- CHECK(util::format_human_readable_size(1951) == "2.0 kB");
- CHECK(util::format_human_readable_size(499.7 * 1000) == "499.7 kB");
- CHECK(util::format_human_readable_size(1000 * 1000) == "1.0 MB");
- CHECK(util::format_human_readable_size(1234 * 1000) == "1.2 MB");
- CHECK(util::format_human_readable_size(438.5 * 1000 * 1000) == "438.5 MB");
- CHECK(util::format_human_readable_size(1000 * 1000 * 1000) == "1.0 GB");
- CHECK(util::format_human_readable_size(17.11 * 1000 * 1000 * 1000)
- == "17.1 GB");
-}
+ using SUPT = util::SizeUnitPrefixType;
-TEST_CASE("util::format_parsable_size_with_suffix")
-{
- CHECK(util::format_parsable_size_with_suffix(0) == "0");
- CHECK(util::format_parsable_size_with_suffix(42 * 1000) == "42000");
- CHECK(util::format_parsable_size_with_suffix(1000 * 1000) == "1.0M");
- CHECK(util::format_parsable_size_with_suffix(1234 * 1000) == "1.2M");
- CHECK(util::format_parsable_size_with_suffix(438.5 * 1000 * 1000)
- == "438.5M");
- CHECK(util::format_parsable_size_with_suffix(1000 * 1000 * 1000) == "1.0G");
- CHECK(util::format_parsable_size_with_suffix(17.11 * 1000 * 1000 * 1000)
- == "17.1G");
+ SUBCASE("binary")
+ {
+ CHECK(util::format_human_readable_size(0, SUPT::binary) == "0 bytes");
+ CHECK(util::format_human_readable_size(1, SUPT::binary) == "1 byte");
+ CHECK(util::format_human_readable_size(42, SUPT::binary) == "42 bytes");
+ CHECK(util::format_human_readable_size(1949, SUPT::binary) == "1.9 KiB");
+ CHECK(util::format_human_readable_size(1951, SUPT::binary) == "1.9 KiB");
+ CHECK(util::format_human_readable_size(499.7 * 1000, SUPT::binary)
+ == "488.0 KiB");
+ CHECK(util::format_human_readable_size(1000 * 1000, SUPT::binary)
+ == "976.6 KiB");
+ CHECK(util::format_human_readable_size(1234 * 1000, SUPT::binary)
+ == "1.2 MiB");
+ CHECK(util::format_human_readable_size(438.5 * 1000 * 1000, SUPT::binary)
+ == "418.2 MiB");
+ CHECK(util::format_human_readable_size(1000 * 1000 * 1000, SUPT::binary)
+ == "953.7 MiB");
+ CHECK(
+ util::format_human_readable_size(17.11 * 1000 * 1000 * 1000, SUPT::binary)
+ == "15.9 GiB");
+ }
+
+ SUBCASE("decimal")
+ {
+ CHECK(util::format_human_readable_size(0, SUPT::decimal) == "0 bytes");
+ CHECK(util::format_human_readable_size(1, SUPT::decimal) == "1 byte");
+ CHECK(util::format_human_readable_size(42, SUPT::decimal) == "42 bytes");
+ CHECK(util::format_human_readable_size(1949, SUPT::decimal) == "1.9 kB");
+ CHECK(util::format_human_readable_size(1951, SUPT::decimal) == "2.0 kB");
+ CHECK(util::format_human_readable_size(499.7 * 1000, SUPT::decimal)
+ == "499.7 kB");
+ CHECK(util::format_human_readable_size(1000 * 1000, SUPT::decimal)
+ == "1.0 MB");
+ CHECK(util::format_human_readable_size(1234 * 1000, SUPT::decimal)
+ == "1.2 MB");
+ CHECK(util::format_human_readable_size(438.5 * 1000 * 1000, SUPT::decimal)
+ == "438.5 MB");
+ CHECK(util::format_human_readable_size(1000 * 1000 * 1000, SUPT::decimal)
+ == "1.0 GB");
+ CHECK(util::format_human_readable_size(17.11 * 1000 * 1000 * 1000,
+ SUPT::decimal)
+ == "17.1 GB");
+ }
}
TEST_CASE("util::join")
@@ -192,26 +269,40 @@ TEST_CASE("util::parse_signed")
TEST_CASE("util::parse_size")
{
- CHECK(*util::parse_size("0") == 0);
- CHECK(*util::parse_size("42") // Default suffix: G
- == static_cast<uint64_t>(42) * 1000 * 1000 * 1000);
- CHECK(*util::parse_size("78k") == 78 * 1000);
- CHECK(*util::parse_size("78K") == 78 * 1000);
- CHECK(*util::parse_size("1.1 M") == (int64_t(1.1 * 1000 * 1000)));
- CHECK(*util::parse_size("438.55M") == (int64_t(438.55 * 1000 * 1000)));
- CHECK(*util::parse_size("1 G") == 1 * 1000 * 1000 * 1000);
+ using SUPT = util::SizeUnitPrefixType;
+
+ auto u64 = [](auto i) { return static_cast<uint64_t>(i); };
+ auto h = [&](auto size, auto st) { return std::make_pair(u64(size), st); };
+
+ // Default suffix: Gi
+ CHECK(*util::parse_size("0") == h(0, SUPT::binary));
+ CHECK(*util::parse_size("42")
+ == h(u64(42) * 1024 * 1024 * 1024, SUPT::binary));
+
+ // Decimal suffixes
+ CHECK(*util::parse_size("78k") == h(78 * 1000, SUPT::decimal));
+ CHECK(*util::parse_size("78K") == h(78 * 1000, SUPT::decimal));
+ CHECK(*util::parse_size("1.1 M") == h(u64(1.1 * 1000 * 1000), SUPT::decimal));
+ CHECK(*util::parse_size("438.55M")
+ == h(u64(438.55 * 1000 * 1000), SUPT::decimal));
+ CHECK(*util::parse_size("1 G") == h(1 * 1000 * 1000 * 1000, SUPT::decimal));
CHECK(*util::parse_size("2T")
- == static_cast<uint64_t>(2) * 1000 * 1000 * 1000 * 1000);
- CHECK(*util::parse_size("78 Ki") == 78 * 1024);
- CHECK(*util::parse_size("1.1Mi") == (int64_t(1.1 * 1024 * 1024)));
- CHECK(*util::parse_size("438.55 Mi") == (int64_t(438.55 * 1024 * 1024)));
- CHECK(*util::parse_size("1Gi") == 1 * 1024 * 1024 * 1024);
+ == h(u64(2) * 1000 * 1000 * 1000 * 1000, SUPT::decimal));
+
+ // Binary suffixes
+ CHECK(*util::parse_size("78 Ki") == h(78 * 1024, SUPT::binary));
+ CHECK(*util::parse_size("1.1Mi") == h(u64(1.1 * 1024 * 1024), SUPT::binary));
+ CHECK(*util::parse_size("438.55 Mi")
+ == h(u64(438.55 * 1024 * 1024), SUPT::binary));
+ CHECK(*util::parse_size("1Gi") == h(1 * 1024 * 1024 * 1024, SUPT::binary));
CHECK(*util::parse_size("2 Ti")
- == static_cast<uint64_t>(2) * 1024 * 1024 * 1024 * 1024);
+ == h(u64(2) * 1024 * 1024 * 1024 * 1024, SUPT::binary));
- CHECK(*util::parse_size("9MB") == 9 * 1000 * 1000);
- CHECK(*util::parse_size("9MiB") == 9 * 1024 * 1024);
+ // With B suffix
+ CHECK(*util::parse_size("9MB") == h(9 * 1000 * 1000, SUPT::decimal));
+ CHECK(*util::parse_size("9MiB") == h(9 * 1024 * 1024, SUPT::binary));
+ // Errors
CHECK(util::parse_size("").error() == "invalid size: \"\"");
CHECK(util::parse_size("x").error() == "invalid size: \"x\"");
CHECK(util::parse_size("10x").error() == "invalid size: \"10x\"");