summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoel Rosdahl <joel@rosdahl.net>2022-04-05 19:50:52 +0200
committerGitHub <noreply@github.com>2022-04-05 19:50:52 +0200
commit870894585eb4f764c03ddf0c6e0e76a9d591d33f (patch)
treeac4376b317e97eaa0043b78ed2f790fe88ec1ee9
parentef2e922f9642f943199138447b29ec53fa63ea68 (diff)
parentff10b34851050a24fa7a13fa08064010c37bcd78 (diff)
downloadccache-870894585eb4f764c03ddf0c6e0e76a9d591d33f.tar.gz
Merge pull request #1037 from orgads/cl-strip-lf
-rw-r--r--src/Util.cpp18
-rw-r--r--src/Util.hpp14
-rw-r--r--src/ccache.cpp8
-rw-r--r--src/util/Tokenizer.cpp12
-rw-r--r--src/util/Tokenizer.hpp20
-rw-r--r--unittest/test_util_Tokenizer.cpp184
6 files changed, 133 insertions, 123 deletions
diff --git a/src/Util.cpp b/src/Util.cpp
index 0a4d4304..e68ef79f 100644
--- a/src/Util.cpp
+++ b/src/Util.cpp
@@ -103,6 +103,7 @@ extern "C" {
using nonstd::nullopt;
using nonstd::optional;
using nonstd::string_view;
+using IncludeDelimiter = util::Tokenizer::IncludeDelimiter;
namespace {
@@ -159,10 +160,13 @@ template<typename T>
std::vector<T>
split_into(string_view string,
const char* separators,
- util::Tokenizer::Mode mode)
+ util::Tokenizer::Mode mode,
+ IncludeDelimiter include_delimiter)
+
{
std::vector<T> result;
- for (const auto token : util::Tokenizer(string, separators, mode)) {
+ for (const auto token :
+ util::Tokenizer(string, separators, mode, include_delimiter)) {
result.emplace_back(token);
}
return result;
@@ -1313,17 +1317,19 @@ setenv(const std::string& name, const std::string& value)
std::vector<string_view>
split_into_views(string_view string,
const char* separators,
- util::Tokenizer::Mode mode)
+ util::Tokenizer::Mode mode,
+ IncludeDelimiter include_delimiter)
{
- return split_into<string_view>(string, separators, mode);
+ return split_into<string_view>(string, separators, mode, include_delimiter);
}
std::vector<std::string>
split_into_strings(string_view string,
const char* separators,
- util::Tokenizer::Mode mode)
+ util::Tokenizer::Mode mode,
+ IncludeDelimiter include_delimiter)
{
- return split_into<std::string>(string, separators, mode);
+ return split_into<std::string>(string, separators, mode, include_delimiter);
}
std::string
diff --git a/src/Util.hpp b/src/Util.hpp
index 9ad678c7..6325463b 100644
--- a/src/Util.hpp
+++ b/src/Util.hpp
@@ -357,16 +357,20 @@ size_change_kibibyte(const Stat& old_stat, const Stat& new_stat)
// Split `string` into tokens at any of the characters in `separators`. These
// tokens are views into `string`. `separators` must neither be the empty string
// nor a nullptr.
-std::vector<nonstd::string_view> split_into_views(
- nonstd::string_view string,
- const char* separators,
- util::Tokenizer::Mode mode = util::Tokenizer::Mode::skip_empty);
+std::vector<nonstd::string_view>
+split_into_views(nonstd::string_view string,
+ const char* separators,
+ util::Tokenizer::Mode mode = util::Tokenizer::Mode::skip_empty,
+ util::Tokenizer::IncludeDelimiter include_delimiter =
+ util::Tokenizer::IncludeDelimiter::no);
// Same as `split_into_views` but the tokens are copied from `string`.
std::vector<std::string> split_into_strings(
nonstd::string_view string,
const char* separators,
- util::Tokenizer::Mode mode = util::Tokenizer::Mode::skip_empty);
+ util::Tokenizer::Mode mode = util::Tokenizer::Mode::skip_empty,
+ util::Tokenizer::IncludeDelimiter include_delimiter =
+ util::Tokenizer::IncludeDelimiter::no);
// Returns a copy of string with the specified ANSI CSI sequences removed.
[[nodiscard]] std::string strip_ansi_csi_seqs(nonstd::string_view string);
diff --git a/src/ccache.cpp b/src/ccache.cpp
index 0b2fbd22..2598d02b 100644
--- a/src/ccache.cpp
+++ b/src/ccache.cpp
@@ -906,6 +906,9 @@ write_result(Context& ctx,
static std::string
rewrite_stdout_from_compiler(const Context& ctx, std::string&& stdout_data)
{
+ using util::Tokenizer;
+ using Mode = Tokenizer::Mode;
+ using IncludeDelimiter = Tokenizer::IncludeDelimiter;
// distcc-pump outputs lines like this:
//
// __________Using # distcc servers in pump mode
@@ -913,13 +916,12 @@ rewrite_stdout_from_compiler(const Context& ctx, std::string&& stdout_data)
// We don't want to cache those.
if (!stdout_data.empty()) {
std::string new_stdout_text;
- for (const auto line : util::Tokenizer(
- stdout_data, "\n", util::Tokenizer::Mode::include_empty)) {
+ for (const auto line : Tokenizer(
+ stdout_data, "\n", Mode::include_empty, IncludeDelimiter::yes)) {
if (util::starts_with(line, "__________")) {
Util::send_to_fd(ctx, std::string(line), STDOUT_FILENO);
} else {
new_stdout_text.append(line.data(), line.length());
- new_stdout_text.append("\n");
}
}
return new_stdout_text;
diff --git a/src/util/Tokenizer.cpp b/src/util/Tokenizer.cpp
index 9b27c8fb..b1d3e7e8 100644
--- a/src/util/Tokenizer.cpp
+++ b/src/util/Tokenizer.cpp
@@ -50,4 +50,16 @@ Tokenizer::Iterator::advance(bool initial)
}
}
+nonstd::sv_lite::string_view
+Tokenizer::Iterator::operator*() const
+{
+ DEBUG_ASSERT(m_left <= m_right);
+ DEBUG_ASSERT(m_right <= m_tokenizer.m_string.length());
+ const bool include_delim =
+ m_tokenizer.m_include_delimiter == IncludeDelimiter::yes;
+ const int with_delim =
+ include_delim && m_right < m_tokenizer.m_string.length() ? 1 : 0;
+ return m_tokenizer.m_string.substr(m_left, m_right - m_left + with_delim);
+}
+
} // namespace util
diff --git a/src/util/Tokenizer.hpp b/src/util/Tokenizer.hpp
index 90cb0c09..e57d4c2c 100644
--- a/src/util/Tokenizer.hpp
+++ b/src/util/Tokenizer.hpp
@@ -37,11 +37,14 @@ public:
skip_last_empty, // Include empty tokens except the last one.
};
+ enum class IncludeDelimiter { no, yes };
+
// Split `string` into tokens at any of the characters in `separators` which
// must neither be the empty string nor a nullptr.
Tokenizer(nonstd::string_view string,
const char* delimiters,
- Mode mode = Mode::skip_empty);
+ Mode mode = Mode::skip_empty,
+ IncludeDelimiter include_delimiter = IncludeDelimiter::no);
class Iterator
{
@@ -69,14 +72,17 @@ private:
const nonstd::string_view m_string;
const char* const m_delimiters;
const Mode m_mode;
+ const IncludeDelimiter m_include_delimiter;
};
inline Tokenizer::Tokenizer(const nonstd::string_view string,
const char* const delimiters,
- const Tokenizer::Mode mode)
+ Tokenizer::Mode mode,
+ Tokenizer::IncludeDelimiter include_delimiter)
: m_string(string),
m_delimiters(delimiters),
- m_mode(mode)
+ m_mode(mode),
+ m_include_delimiter(include_delimiter)
{
DEBUG_ASSERT(delimiters != nullptr && delimiters[0] != '\0');
}
@@ -107,14 +113,6 @@ Tokenizer::Iterator::operator!=(const Iterator& other) const
return &m_tokenizer != &other.m_tokenizer || m_left != other.m_left;
}
-inline nonstd::string_view
-Tokenizer::Iterator::operator*() const
-{
- DEBUG_ASSERT(m_left <= m_right);
- DEBUG_ASSERT(m_right <= m_tokenizer.m_string.length());
- return m_tokenizer.m_string.substr(m_left, m_right - m_left);
-}
-
inline Tokenizer::Iterator
Tokenizer::begin()
{
diff --git a/unittest/test_util_Tokenizer.cpp b/unittest/test_util_Tokenizer.cpp
index 76c9e027..c5efea34 100644
--- a/unittest/test_util_Tokenizer.cpp
+++ b/unittest/test_util_Tokenizer.cpp
@@ -23,116 +23,104 @@
TEST_CASE("util::Tokenizer")
{
using Mode = util::Tokenizer::Mode;
-
- SUBCASE("include empty tokens")
+ using IncludeDelimiter = util::Tokenizer::IncludeDelimiter;
+ struct SplitTest
{
+ SplitTest(Mode mode,
+ IncludeDelimiter includeDelimiter = IncludeDelimiter::no)
+ : m_mode(mode),
+ m_includeDelimiter(includeDelimiter)
{
- const auto s = Util::split_into_views("", "/", Mode::include_empty);
- REQUIRE(s.size() == 1);
- CHECK(s[0] == "");
- }
- {
- const auto s = Util::split_into_views("/", "/", Mode::include_empty);
- REQUIRE(s.size() == 2);
- CHECK(s[0] == "");
- CHECK(s[1] == "");
- }
- {
- const auto s = Util::split_into_views("a/", "/", Mode::include_empty);
- REQUIRE(s.size() == 2);
- CHECK(s[0] == "a");
- CHECK(s[1] == "");
- }
- {
- const auto s = Util::split_into_views("/b", "/", Mode::include_empty);
- REQUIRE(s.size() == 2);
- CHECK(s[0] == "");
- CHECK(s[1] == "b");
- }
- {
- const auto s = Util::split_into_views("a/b", "/", Mode::include_empty);
- REQUIRE(s.size() == 2);
- CHECK(s[0] == "a");
- CHECK(s[1] == "b");
}
+
+ void
+ operator()(const char* input,
+ const char* separators,
+ const std::vector<std::string>& expected)
{
- const auto s = Util::split_into_views("/a:", "/:", Mode::include_empty);
- REQUIRE(s.size() == 3);
- CHECK(s[0] == "");
- CHECK(s[1] == "a");
- CHECK(s[2] == "");
+ const auto res =
+ Util::split_into_views(input, separators, m_mode, m_includeDelimiter);
+ REQUIRE(res.size() == expected.size());
+ for (int i = 0, total = expected.size(); i < total; ++i)
+ CHECK(res[i] == expected[i]);
}
+
+ Mode m_mode;
+ IncludeDelimiter m_includeDelimiter;
+ };
+
+ SUBCASE("include empty tokens")
+ {
+ SplitTest split(Mode::include_empty);
+ split("", "/", {""});
+ split("/", "/", {"", ""});
+ split("a/", "/", {"a", ""});
+ split("/b", "/", {"", "b"});
+ split("a/b", "/", {"a", "b"});
+ split("/a:", "/:", {"", "a", ""});
}
SUBCASE("skip empty")
{
- CHECK(Util::split_into_views("", "/", Mode::skip_empty).empty());
- CHECK(Util::split_into_views("///", "/", Mode::skip_empty).empty());
- {
- const auto s = Util::split_into_views("a/b", "/", Mode::skip_empty);
- REQUIRE(s.size() == 2);
- CHECK(s[0] == "a");
- CHECK(s[1] == "b");
- }
- {
- const auto s = Util::split_into_views("a/b", "x", Mode::skip_empty);
- REQUIRE(s.size() == 1);
- CHECK(s[0] == "a/b");
- }
- {
- const auto s = Util::split_into_views("a/b:c", "/:", Mode::skip_empty);
- REQUIRE(s.size() == 3);
- CHECK(s[0] == "a");
- CHECK(s[1] == "b");
- CHECK(s[2] == "c");
- }
- {
- const auto s =
- Util::split_into_views(":a//b..:.c/:/.", "/:.", Mode::skip_empty);
- REQUIRE(s.size() == 3);
- CHECK(s[0] == "a");
- CHECK(s[1] == "b");
- CHECK(s[2] == "c");
- }
- {
- const auto s = Util::split_into_views(
- ".0.1.2.3.4.5.6.7.8.9.", "/:.+_abcdef", Mode::skip_empty);
- REQUIRE(s.size() == 10);
- CHECK(s[0] == "0");
- CHECK(s[9] == "9");
- }
+ SplitTest split(Mode::skip_empty);
+ split("", "/", {});
+ split("///", "/", {});
+ split("a/b", "/", {"a", "b"});
+ split("a/b", "x", {"a/b"});
+ split("a/b:c", "/:", {"a", "b", "c"});
+ split("/a:", "/:", {"a"});
+ split(":a//b..:.c/:/.", "/:.", {"a", "b", "c"});
+ split(".0.1.2.3.4.5.6.7.8.9.",
+ "/:.+_abcdef",
+ {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"});
}
SUBCASE("skip last empty token")
{
- CHECK(Util::split_into_views("", "/", Mode::skip_last_empty).empty());
- {
- const auto s = Util::split_into_views("/", "/", Mode::skip_last_empty);
- REQUIRE(s.size() == 1);
- CHECK(s[0] == "");
- }
- {
- const auto s = Util::split_into_views("a/", "/", Mode::skip_last_empty);
- REQUIRE(s.size() == 1);
- CHECK(s[0] == "a");
- }
- {
- const auto s = Util::split_into_views("/b", "/", Mode::skip_last_empty);
- REQUIRE(s.size() == 2);
- CHECK(s[0] == "");
- CHECK(s[1] == "b");
- }
- {
- const auto s = Util::split_into_views("a/b", "/", Mode::skip_last_empty);
- REQUIRE(s.size() == 2);
- CHECK(s[0] == "a");
- CHECK(s[1] == "b");
- }
- {
- const auto s = Util::split_into_views("/a:", "/:", Mode::skip_last_empty);
- REQUIRE(s.size() == 2);
- CHECK(s[0] == "");
- CHECK(s[1] == "a");
- }
+ SplitTest split(Mode::skip_last_empty);
+ split("", "/", {});
+ split("/", "/", {""});
+ split("a/", "/", {"a"});
+ split("/b", "/", {"", "b"});
+ split("a/b", "/", {"a", "b"});
+ split("/a:", "/:", {"", "a"});
+ }
+
+ SUBCASE("include empty and delimiter")
+ {
+ SplitTest split(Mode::include_empty, IncludeDelimiter::yes);
+ split("", "/", {""});
+ split("/", "/", {"/", ""});
+ split("a/", "/", {"a/", ""});
+ split("/b", "/", {"/", "b"});
+ split("a/b", "/", {"a/", "b"});
+ split("/a:", "/:", {"/", "a:", ""});
+ split("a//b/", "/", {"a/", "/", "b/", ""});
+ }
+
+ SUBCASE("skip empty and include delimiter")
+ {
+ SplitTest split(Mode::skip_empty, IncludeDelimiter::yes);
+ split("", "/", {});
+ split("///", "/", {});
+ split("a/b", "/", {"a/", "b"});
+ split("a/b", "x", {"a/b"});
+ split("a/b:c", "/:", {"a/", "b:", "c"});
+ split("/a:", "/:", {"a:"});
+ split(":a//b..:.c/:/.", "/:.", {"a/", "b.", "c/"});
+ split(".0.1.2.3.4.5.6.7.8.9.",
+ "/:.+_abcdef",
+ {"0.", "1.", "2.", "3.", "4.", "5.", "6.", "7.", "8.", "9."});
+ }
+
+ SUBCASE("skip last empty and include delimiter")
+ {
+ SplitTest split(Mode::skip_last_empty, IncludeDelimiter::yes);
+ split("", "/", {});
+ split("/", "/", {"/"});
+ split("a/", "/", {"a/"});
+ split("/b", "/", {"/", "b"});
+ split("a/b", "/", {"a/", "b"});
+ split("/a:", "/:", {"/", "a:"});
}
}