diff options
author | Joel Rosdahl <joel@rosdahl.net> | 2022-04-05 19:50:52 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-04-05 19:50:52 +0200 |
commit | 870894585eb4f764c03ddf0c6e0e76a9d591d33f (patch) | |
tree | ac4376b317e97eaa0043b78ed2f790fe88ec1ee9 | |
parent | ef2e922f9642f943199138447b29ec53fa63ea68 (diff) | |
parent | ff10b34851050a24fa7a13fa08064010c37bcd78 (diff) | |
download | ccache-870894585eb4f764c03ddf0c6e0e76a9d591d33f.tar.gz |
Merge pull request #1037 from orgads/cl-strip-lf
-rw-r--r-- | src/Util.cpp | 18 | ||||
-rw-r--r-- | src/Util.hpp | 14 | ||||
-rw-r--r-- | src/ccache.cpp | 8 | ||||
-rw-r--r-- | src/util/Tokenizer.cpp | 12 | ||||
-rw-r--r-- | src/util/Tokenizer.hpp | 20 | ||||
-rw-r--r-- | unittest/test_util_Tokenizer.cpp | 184 |
6 files changed, 133 insertions, 123 deletions
diff --git a/src/Util.cpp b/src/Util.cpp index 0a4d4304..e68ef79f 100644 --- a/src/Util.cpp +++ b/src/Util.cpp @@ -103,6 +103,7 @@ extern "C" { using nonstd::nullopt; using nonstd::optional; using nonstd::string_view; +using IncludeDelimiter = util::Tokenizer::IncludeDelimiter; namespace { @@ -159,10 +160,13 @@ template<typename T> std::vector<T> split_into(string_view string, const char* separators, - util::Tokenizer::Mode mode) + util::Tokenizer::Mode mode, + IncludeDelimiter include_delimiter) + { std::vector<T> result; - for (const auto token : util::Tokenizer(string, separators, mode)) { + for (const auto token : + util::Tokenizer(string, separators, mode, include_delimiter)) { result.emplace_back(token); } return result; @@ -1313,17 +1317,19 @@ setenv(const std::string& name, const std::string& value) std::vector<string_view> split_into_views(string_view string, const char* separators, - util::Tokenizer::Mode mode) + util::Tokenizer::Mode mode, + IncludeDelimiter include_delimiter) { - return split_into<string_view>(string, separators, mode); + return split_into<string_view>(string, separators, mode, include_delimiter); } std::vector<std::string> split_into_strings(string_view string, const char* separators, - util::Tokenizer::Mode mode) + util::Tokenizer::Mode mode, + IncludeDelimiter include_delimiter) { - return split_into<std::string>(string, separators, mode); + return split_into<std::string>(string, separators, mode, include_delimiter); } std::string diff --git a/src/Util.hpp b/src/Util.hpp index 9ad678c7..6325463b 100644 --- a/src/Util.hpp +++ b/src/Util.hpp @@ -357,16 +357,20 @@ size_change_kibibyte(const Stat& old_stat, const Stat& new_stat) // Split `string` into tokens at any of the characters in `separators`. These // tokens are views into `string`. `separators` must neither be the empty string // nor a nullptr. -std::vector<nonstd::string_view> split_into_views( - nonstd::string_view string, - const char* separators, - util::Tokenizer::Mode mode = util::Tokenizer::Mode::skip_empty); +std::vector<nonstd::string_view> +split_into_views(nonstd::string_view string, + const char* separators, + util::Tokenizer::Mode mode = util::Tokenizer::Mode::skip_empty, + util::Tokenizer::IncludeDelimiter include_delimiter = + util::Tokenizer::IncludeDelimiter::no); // Same as `split_into_views` but the tokens are copied from `string`. std::vector<std::string> split_into_strings( nonstd::string_view string, const char* separators, - util::Tokenizer::Mode mode = util::Tokenizer::Mode::skip_empty); + util::Tokenizer::Mode mode = util::Tokenizer::Mode::skip_empty, + util::Tokenizer::IncludeDelimiter include_delimiter = + util::Tokenizer::IncludeDelimiter::no); // Returns a copy of string with the specified ANSI CSI sequences removed. [[nodiscard]] std::string strip_ansi_csi_seqs(nonstd::string_view string); diff --git a/src/ccache.cpp b/src/ccache.cpp index 0b2fbd22..2598d02b 100644 --- a/src/ccache.cpp +++ b/src/ccache.cpp @@ -906,6 +906,9 @@ write_result(Context& ctx, static std::string rewrite_stdout_from_compiler(const Context& ctx, std::string&& stdout_data) { + using util::Tokenizer; + using Mode = Tokenizer::Mode; + using IncludeDelimiter = Tokenizer::IncludeDelimiter; // distcc-pump outputs lines like this: // // __________Using # distcc servers in pump mode @@ -913,13 +916,12 @@ rewrite_stdout_from_compiler(const Context& ctx, std::string&& stdout_data) // We don't want to cache those. if (!stdout_data.empty()) { std::string new_stdout_text; - for (const auto line : util::Tokenizer( - stdout_data, "\n", util::Tokenizer::Mode::include_empty)) { + for (const auto line : Tokenizer( + stdout_data, "\n", Mode::include_empty, IncludeDelimiter::yes)) { if (util::starts_with(line, "__________")) { Util::send_to_fd(ctx, std::string(line), STDOUT_FILENO); } else { new_stdout_text.append(line.data(), line.length()); - new_stdout_text.append("\n"); } } return new_stdout_text; diff --git a/src/util/Tokenizer.cpp b/src/util/Tokenizer.cpp index 9b27c8fb..b1d3e7e8 100644 --- a/src/util/Tokenizer.cpp +++ b/src/util/Tokenizer.cpp @@ -50,4 +50,16 @@ Tokenizer::Iterator::advance(bool initial) } } +nonstd::sv_lite::string_view +Tokenizer::Iterator::operator*() const +{ + DEBUG_ASSERT(m_left <= m_right); + DEBUG_ASSERT(m_right <= m_tokenizer.m_string.length()); + const bool include_delim = + m_tokenizer.m_include_delimiter == IncludeDelimiter::yes; + const int with_delim = + include_delim && m_right < m_tokenizer.m_string.length() ? 1 : 0; + return m_tokenizer.m_string.substr(m_left, m_right - m_left + with_delim); +} + } // namespace util diff --git a/src/util/Tokenizer.hpp b/src/util/Tokenizer.hpp index 90cb0c09..e57d4c2c 100644 --- a/src/util/Tokenizer.hpp +++ b/src/util/Tokenizer.hpp @@ -37,11 +37,14 @@ public: skip_last_empty, // Include empty tokens except the last one. }; + enum class IncludeDelimiter { no, yes }; + // Split `string` into tokens at any of the characters in `separators` which // must neither be the empty string nor a nullptr. Tokenizer(nonstd::string_view string, const char* delimiters, - Mode mode = Mode::skip_empty); + Mode mode = Mode::skip_empty, + IncludeDelimiter include_delimiter = IncludeDelimiter::no); class Iterator { @@ -69,14 +72,17 @@ private: const nonstd::string_view m_string; const char* const m_delimiters; const Mode m_mode; + const IncludeDelimiter m_include_delimiter; }; inline Tokenizer::Tokenizer(const nonstd::string_view string, const char* const delimiters, - const Tokenizer::Mode mode) + Tokenizer::Mode mode, + Tokenizer::IncludeDelimiter include_delimiter) : m_string(string), m_delimiters(delimiters), - m_mode(mode) + m_mode(mode), + m_include_delimiter(include_delimiter) { DEBUG_ASSERT(delimiters != nullptr && delimiters[0] != '\0'); } @@ -107,14 +113,6 @@ Tokenizer::Iterator::operator!=(const Iterator& other) const return &m_tokenizer != &other.m_tokenizer || m_left != other.m_left; } -inline nonstd::string_view -Tokenizer::Iterator::operator*() const -{ - DEBUG_ASSERT(m_left <= m_right); - DEBUG_ASSERT(m_right <= m_tokenizer.m_string.length()); - return m_tokenizer.m_string.substr(m_left, m_right - m_left); -} - inline Tokenizer::Iterator Tokenizer::begin() { diff --git a/unittest/test_util_Tokenizer.cpp b/unittest/test_util_Tokenizer.cpp index 76c9e027..c5efea34 100644 --- a/unittest/test_util_Tokenizer.cpp +++ b/unittest/test_util_Tokenizer.cpp @@ -23,116 +23,104 @@ TEST_CASE("util::Tokenizer") { using Mode = util::Tokenizer::Mode; - - SUBCASE("include empty tokens") + using IncludeDelimiter = util::Tokenizer::IncludeDelimiter; + struct SplitTest { + SplitTest(Mode mode, + IncludeDelimiter includeDelimiter = IncludeDelimiter::no) + : m_mode(mode), + m_includeDelimiter(includeDelimiter) { - const auto s = Util::split_into_views("", "/", Mode::include_empty); - REQUIRE(s.size() == 1); - CHECK(s[0] == ""); - } - { - const auto s = Util::split_into_views("/", "/", Mode::include_empty); - REQUIRE(s.size() == 2); - CHECK(s[0] == ""); - CHECK(s[1] == ""); - } - { - const auto s = Util::split_into_views("a/", "/", Mode::include_empty); - REQUIRE(s.size() == 2); - CHECK(s[0] == "a"); - CHECK(s[1] == ""); - } - { - const auto s = Util::split_into_views("/b", "/", Mode::include_empty); - REQUIRE(s.size() == 2); - CHECK(s[0] == ""); - CHECK(s[1] == "b"); - } - { - const auto s = Util::split_into_views("a/b", "/", Mode::include_empty); - REQUIRE(s.size() == 2); - CHECK(s[0] == "a"); - CHECK(s[1] == "b"); } + + void + operator()(const char* input, + const char* separators, + const std::vector<std::string>& expected) { - const auto s = Util::split_into_views("/a:", "/:", Mode::include_empty); - REQUIRE(s.size() == 3); - CHECK(s[0] == ""); - CHECK(s[1] == "a"); - CHECK(s[2] == ""); + const auto res = + Util::split_into_views(input, separators, m_mode, m_includeDelimiter); + REQUIRE(res.size() == expected.size()); + for (int i = 0, total = expected.size(); i < total; ++i) + CHECK(res[i] == expected[i]); } + + Mode m_mode; + IncludeDelimiter m_includeDelimiter; + }; + + SUBCASE("include empty tokens") + { + SplitTest split(Mode::include_empty); + split("", "/", {""}); + split("/", "/", {"", ""}); + split("a/", "/", {"a", ""}); + split("/b", "/", {"", "b"}); + split("a/b", "/", {"a", "b"}); + split("/a:", "/:", {"", "a", ""}); } SUBCASE("skip empty") { - CHECK(Util::split_into_views("", "/", Mode::skip_empty).empty()); - CHECK(Util::split_into_views("///", "/", Mode::skip_empty).empty()); - { - const auto s = Util::split_into_views("a/b", "/", Mode::skip_empty); - REQUIRE(s.size() == 2); - CHECK(s[0] == "a"); - CHECK(s[1] == "b"); - } - { - const auto s = Util::split_into_views("a/b", "x", Mode::skip_empty); - REQUIRE(s.size() == 1); - CHECK(s[0] == "a/b"); - } - { - const auto s = Util::split_into_views("a/b:c", "/:", Mode::skip_empty); - REQUIRE(s.size() == 3); - CHECK(s[0] == "a"); - CHECK(s[1] == "b"); - CHECK(s[2] == "c"); - } - { - const auto s = - Util::split_into_views(":a//b..:.c/:/.", "/:.", Mode::skip_empty); - REQUIRE(s.size() == 3); - CHECK(s[0] == "a"); - CHECK(s[1] == "b"); - CHECK(s[2] == "c"); - } - { - const auto s = Util::split_into_views( - ".0.1.2.3.4.5.6.7.8.9.", "/:.+_abcdef", Mode::skip_empty); - REQUIRE(s.size() == 10); - CHECK(s[0] == "0"); - CHECK(s[9] == "9"); - } + SplitTest split(Mode::skip_empty); + split("", "/", {}); + split("///", "/", {}); + split("a/b", "/", {"a", "b"}); + split("a/b", "x", {"a/b"}); + split("a/b:c", "/:", {"a", "b", "c"}); + split("/a:", "/:", {"a"}); + split(":a//b..:.c/:/.", "/:.", {"a", "b", "c"}); + split(".0.1.2.3.4.5.6.7.8.9.", + "/:.+_abcdef", + {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"}); } SUBCASE("skip last empty token") { - CHECK(Util::split_into_views("", "/", Mode::skip_last_empty).empty()); - { - const auto s = Util::split_into_views("/", "/", Mode::skip_last_empty); - REQUIRE(s.size() == 1); - CHECK(s[0] == ""); - } - { - const auto s = Util::split_into_views("a/", "/", Mode::skip_last_empty); - REQUIRE(s.size() == 1); - CHECK(s[0] == "a"); - } - { - const auto s = Util::split_into_views("/b", "/", Mode::skip_last_empty); - REQUIRE(s.size() == 2); - CHECK(s[0] == ""); - CHECK(s[1] == "b"); - } - { - const auto s = Util::split_into_views("a/b", "/", Mode::skip_last_empty); - REQUIRE(s.size() == 2); - CHECK(s[0] == "a"); - CHECK(s[1] == "b"); - } - { - const auto s = Util::split_into_views("/a:", "/:", Mode::skip_last_empty); - REQUIRE(s.size() == 2); - CHECK(s[0] == ""); - CHECK(s[1] == "a"); - } + SplitTest split(Mode::skip_last_empty); + split("", "/", {}); + split("/", "/", {""}); + split("a/", "/", {"a"}); + split("/b", "/", {"", "b"}); + split("a/b", "/", {"a", "b"}); + split("/a:", "/:", {"", "a"}); + } + + SUBCASE("include empty and delimiter") + { + SplitTest split(Mode::include_empty, IncludeDelimiter::yes); + split("", "/", {""}); + split("/", "/", {"/", ""}); + split("a/", "/", {"a/", ""}); + split("/b", "/", {"/", "b"}); + split("a/b", "/", {"a/", "b"}); + split("/a:", "/:", {"/", "a:", ""}); + split("a//b/", "/", {"a/", "/", "b/", ""}); + } + + SUBCASE("skip empty and include delimiter") + { + SplitTest split(Mode::skip_empty, IncludeDelimiter::yes); + split("", "/", {}); + split("///", "/", {}); + split("a/b", "/", {"a/", "b"}); + split("a/b", "x", {"a/b"}); + split("a/b:c", "/:", {"a/", "b:", "c"}); + split("/a:", "/:", {"a:"}); + split(":a//b..:.c/:/.", "/:.", {"a/", "b.", "c/"}); + split(".0.1.2.3.4.5.6.7.8.9.", + "/:.+_abcdef", + {"0.", "1.", "2.", "3.", "4.", "5.", "6.", "7.", "8.", "9."}); + } + + SUBCASE("skip last empty and include delimiter") + { + SplitTest split(Mode::skip_last_empty, IncludeDelimiter::yes); + split("", "/", {}); + split("/", "/", {"/"}); + split("a/", "/", {"a/"}); + split("/b", "/", {"/", "b"}); + split("a/b", "/", {"a/", "b"}); + split("/a:", "/:", {"/", "a:"}); } } |