Merge pull request #1037 from orgads/cl-strip-lf

author: Joel Rosdahl <joel@rosdahl.net> 2022-04-05 19:50:52 +0200
committer: GitHub <noreply@github.com> 2022-04-05 19:50:52 +0200
commit: 870894585eb4f764c03ddf0c6e0e76a9d591d33f (patch)
tree: ac4376b317e97eaa0043b78ed2f790fe88ec1ee9
parent: ef2e922f9642f943199138447b29ec53fa63ea68 (diff)
parent: ff10b34851050a24fa7a13fa08064010c37bcd78 (diff)
download: ccache-870894585eb4f764c03ddf0c6e0e76a9d591d33f.tar.gz
6 files changed, 133 insertions, 123 deletions
diff --git a/src/Util.cpp b/src/Util.cpp
index 0a4d4304..e68ef79f 100644
--- a/src/Util.cpp
+++ b/src/Util.cpp
@@ -103,6 +103,7 @@ extern "C" {
 using nonstd::nullopt;
 using nonstd::optional;
 using nonstd::string_view;
+using IncludeDelimiter = util::Tokenizer::IncludeDelimiter;
 
 namespace {
 
@@ -159,10 +160,13 @@ template<typename T>
 std::vector<T>
 split_into(string_view string,
            const char* separators,
-           util::Tokenizer::Mode mode)
+           util::Tokenizer::Mode mode,
+           IncludeDelimiter include_delimiter)
+
 {
   std::vector<T> result;
-  for (const auto token : util::Tokenizer(string, separators, mode)) {
+  for (const auto token :
+       util::Tokenizer(string, separators, mode, include_delimiter)) {
     result.emplace_back(token);
   }
   return result;
@@ -1313,17 +1317,19 @@ setenv(const std::string& name, const std::string& value)
 std::vector<string_view>
 split_into_views(string_view string,
                  const char* separators,
-                 util::Tokenizer::Mode mode)
+                 util::Tokenizer::Mode mode,
+                 IncludeDelimiter include_delimiter)
 {
-  return split_into<string_view>(string, separators, mode);
+  return split_into<string_view>(string, separators, mode, include_delimiter);
 }
 
 std::vector<std::string>
 split_into_strings(string_view string,
                    const char* separators,
-                   util::Tokenizer::Mode mode)
+                   util::Tokenizer::Mode mode,
+                   IncludeDelimiter include_delimiter)
 {
-  return split_into<std::string>(string, separators, mode);
+  return split_into<std::string>(string, separators, mode, include_delimiter);
 }
 
 std::string
diff --git a/src/Util.hpp b/src/Util.hpp
index 9ad678c7..6325463b 100644
--- a/src/Util.hpp
+++ b/src/Util.hpp
@@ -357,16 +357,20 @@ size_change_kibibyte(const Stat& old_stat, const Stat& new_stat)
 // Split `string` into tokens at any of the characters in `separators`. These
 // tokens are views into `string`. `separators` must neither be the empty string
 // nor a nullptr.
-std::vector<nonstd::string_view> split_into_views(
-  nonstd::string_view string,
-  const char* separators,
-  util::Tokenizer::Mode mode = util::Tokenizer::Mode::skip_empty);
+std::vector<nonstd::string_view>
+split_into_views(nonstd::string_view string,
+                 const char* separators,
+                 util::Tokenizer::Mode mode = util::Tokenizer::Mode::skip_empty,
+                 util::Tokenizer::IncludeDelimiter include_delimiter =
+                   util::Tokenizer::IncludeDelimiter::no);
 
 // Same as `split_into_views` but the tokens are copied from `string`.
 std::vector<std::string> split_into_strings(
   nonstd::string_view string,
   const char* separators,
-  util::Tokenizer::Mode mode = util::Tokenizer::Mode::skip_empty);
+  util::Tokenizer::Mode mode = util::Tokenizer::Mode::skip_empty,
+  util::Tokenizer::IncludeDelimiter include_delimiter =
+    util::Tokenizer::IncludeDelimiter::no);
 
 // Returns a copy of string with the specified ANSI CSI sequences removed.
 [[nodiscard]] std::string strip_ansi_csi_seqs(nonstd::string_view string);
diff --git a/src/ccache.cpp b/src/ccache.cpp
index 0b2fbd22..2598d02b 100644
--- a/src/ccache.cpp
+++ b/src/ccache.cpp
@@ -906,6 +906,9 @@ write_result(Context& ctx,
 static std::string
 rewrite_stdout_from_compiler(const Context& ctx, std::string&& stdout_data)
 {
+  using util::Tokenizer;
+  using Mode = Tokenizer::Mode;
+  using IncludeDelimiter = Tokenizer::IncludeDelimiter;
   // distcc-pump outputs lines like this:
   //
   //   __________Using # distcc servers in pump mode
@@ -913,13 +916,12 @@ rewrite_stdout_from_compiler(const Context& ctx, std::string&& stdout_data)
   // We don't want to cache those.
   if (!stdout_data.empty()) {
     std::string new_stdout_text;
-    for (const auto line : util::Tokenizer(
-           stdout_data, "\n", util::Tokenizer::Mode::include_empty)) {
+    for (const auto line : Tokenizer(
+           stdout_data, "\n", Mode::include_empty, IncludeDelimiter::yes)) {
       if (util::starts_with(line, "__________")) {
         Util::send_to_fd(ctx, std::string(line), STDOUT_FILENO);
       } else {
         new_stdout_text.append(line.data(), line.length());
-        new_stdout_text.append("\n");
       }
     }
     return new_stdout_text;
diff --git a/src/util/Tokenizer.cpp b/src/util/Tokenizer.cpp
index 9b27c8fb..b1d3e7e8 100644
--- a/src/util/Tokenizer.cpp
+++ b/src/util/Tokenizer.cpp
@@ -50,4 +50,16 @@ Tokenizer::Iterator::advance(bool initial)
   }
 }
 
+nonstd::sv_lite::string_view
+Tokenizer::Iterator::operator*() const
+{
+  DEBUG_ASSERT(m_left <= m_right);
+  DEBUG_ASSERT(m_right <= m_tokenizer.m_string.length());
+  const bool include_delim =
+    m_tokenizer.m_include_delimiter == IncludeDelimiter::yes;
+  const int with_delim =
+    include_delim && m_right < m_tokenizer.m_string.length() ? 1 : 0;
+  return m_tokenizer.m_string.substr(m_left, m_right - m_left + with_delim);
+}
+
 } // namespace util
diff --git a/src/util/Tokenizer.hpp b/src/util/Tokenizer.hpp
index 90cb0c09..e57d4c2c 100644
--- a/src/util/Tokenizer.hpp
+++ b/src/util/Tokenizer.hpp
@@ -37,11 +37,14 @@ public:
     skip_last_empty, // Include empty tokens except the last one.
   };
 
+  enum class IncludeDelimiter { no, yes };
+
   // Split `string` into tokens at any of the characters in `separators` which
   // must neither be the empty string nor a nullptr.
   Tokenizer(nonstd::string_view string,
             const char* delimiters,
-            Mode mode = Mode::skip_empty);
+            Mode mode = Mode::skip_empty,
+            IncludeDelimiter include_delimiter = IncludeDelimiter::no);
 
   class Iterator
   {
@@ -69,14 +72,17 @@ private:
   const nonstd::string_view m_string;
   const char* const m_delimiters;
   const Mode m_mode;
+  const IncludeDelimiter m_include_delimiter;
 };
 
 inline Tokenizer::Tokenizer(const nonstd::string_view string,
                             const char* const delimiters,
-                            const Tokenizer::Mode mode)
+                            Tokenizer::Mode mode,
+                            Tokenizer::IncludeDelimiter include_delimiter)
   : m_string(string),
     m_delimiters(delimiters),
-    m_mode(mode)
+    m_mode(mode),
+    m_include_delimiter(include_delimiter)
 {
   DEBUG_ASSERT(delimiters != nullptr && delimiters[0] != '\0');
 }
@@ -107,14 +113,6 @@ Tokenizer::Iterator::operator!=(const Iterator& other) const
   return &m_tokenizer != &other.m_tokenizer || m_left != other.m_left;
 }
 
-inline nonstd::string_view
-Tokenizer::Iterator::operator*() const
-{
-  DEBUG_ASSERT(m_left <= m_right);
-  DEBUG_ASSERT(m_right <= m_tokenizer.m_string.length());
-  return m_tokenizer.m_string.substr(m_left, m_right - m_left);
-}
-
 inline Tokenizer::Iterator
 Tokenizer::begin()
 {
diff --git a/unittest/test_util_Tokenizer.cpp b/unittest/test_util_Tokenizer.cpp
index 76c9e027..c5efea34 100644
--- a/unittest/test_util_Tokenizer.cpp
+++ b/unittest/test_util_Tokenizer.cpp
@@ -23,116 +23,104 @@
 TEST_CASE("util::Tokenizer")
 {
   using Mode = util::Tokenizer::Mode;
-
-  SUBCASE("include empty tokens")
+  using IncludeDelimiter = util::Tokenizer::IncludeDelimiter;
+  struct SplitTest
   {
+    SplitTest(Mode mode,
+              IncludeDelimiter includeDelimiter = IncludeDelimiter::no)
+      : m_mode(mode),
+        m_includeDelimiter(includeDelimiter)
     {
-      const auto s = Util::split_into_views("", "/", Mode::include_empty);
-      REQUIRE(s.size() == 1);
-      CHECK(s[0] == "");
-    }
-    {
-      const auto s = Util::split_into_views("/", "/", Mode::include_empty);
-      REQUIRE(s.size() == 2);
-      CHECK(s[0] == "");
-      CHECK(s[1] == "");
-    }
-    {
-      const auto s = Util::split_into_views("a/", "/", Mode::include_empty);
-      REQUIRE(s.size() == 2);
-      CHECK(s[0] == "a");
-      CHECK(s[1] == "");
-    }
-    {
-      const auto s = Util::split_into_views("/b", "/", Mode::include_empty);
-      REQUIRE(s.size() == 2);
-      CHECK(s[0] == "");
-      CHECK(s[1] == "b");
-    }
-    {
-      const auto s = Util::split_into_views("a/b", "/", Mode::include_empty);
-      REQUIRE(s.size() == 2);
-      CHECK(s[0] == "a");
-      CHECK(s[1] == "b");
     }
+
+    void
+    operator()(const char* input,
+               const char* separators,
+               const std::vector<std::string>& expected)
     {
-      const auto s = Util::split_into_views("/a:", "/:", Mode::include_empty);
-      REQUIRE(s.size() == 3);
-      CHECK(s[0] == "");
-      CHECK(s[1] == "a");
-      CHECK(s[2] == "");
+      const auto res =
+        Util::split_into_views(input, separators, m_mode, m_includeDelimiter);
+      REQUIRE(res.size() == expected.size());
+      for (int i = 0, total = expected.size(); i < total; ++i)
+        CHECK(res[i] == expected[i]);
     }
+
+    Mode m_mode;
+    IncludeDelimiter m_includeDelimiter;
+  };
+
+  SUBCASE("include empty tokens")
+  {
+    SplitTest split(Mode::include_empty);
+    split("", "/", {""});
+    split("/", "/", {"", ""});
+    split("a/", "/", {"a", ""});
+    split("/b", "/", {"", "b"});
+    split("a/b", "/", {"a", "b"});
+    split("/a:", "/:", {"", "a", ""});
   }
 
   SUBCASE("skip empty")
   {
-    CHECK(Util::split_into_views("", "/", Mode::skip_empty).empty());
-    CHECK(Util::split_into_views("///", "/", Mode::skip_empty).empty());
-    {
-      const auto s = Util::split_into_views("a/b", "/", Mode::skip_empty);
-      REQUIRE(s.size() == 2);
-      CHECK(s[0] == "a");
-      CHECK(s[1] == "b");
-    }
-    {
-      const auto s = Util::split_into_views("a/b", "x", Mode::skip_empty);
-      REQUIRE(s.size() == 1);
-      CHECK(s[0] == "a/b");
-    }
-    {
-      const auto s = Util::split_into_views("a/b:c", "/:", Mode::skip_empty);
-      REQUIRE(s.size() == 3);
-      CHECK(s[0] == "a");
-      CHECK(s[1] == "b");
-      CHECK(s[2] == "c");
-    }
-    {
-      const auto s =
-        Util::split_into_views(":a//b..:.c/:/.", "/:.", Mode::skip_empty);
-      REQUIRE(s.size() == 3);
-      CHECK(s[0] == "a");
-      CHECK(s[1] == "b");
-      CHECK(s[2] == "c");
-    }
-    {
-      const auto s = Util::split_into_views(
-        ".0.1.2.3.4.5.6.7.8.9.", "/:.+_abcdef", Mode::skip_empty);
-      REQUIRE(s.size() == 10);
-      CHECK(s[0] == "0");
-      CHECK(s[9] == "9");
-    }
+    SplitTest split(Mode::skip_empty);
+    split("", "/", {});
+    split("///", "/", {});
+    split("a/b", "/", {"a", "b"});
+    split("a/b", "x", {"a/b"});
+    split("a/b:c", "/:", {"a", "b", "c"});
+    split("/a:", "/:", {"a"});
+    split(":a//b..:.c/:/.", "/:.", {"a", "b", "c"});
+    split(".0.1.2.3.4.5.6.7.8.9.",
+          "/:.+_abcdef",
+          {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"});
   }
 
   SUBCASE("skip last empty token")
   {
-    CHECK(Util::split_into_views("", "/", Mode::skip_last_empty).empty());
-    {
-      const auto s = Util::split_into_views("/", "/", Mode::skip_last_empty);
-      REQUIRE(s.size() == 1);
-      CHECK(s[0] == "");
-    }
-    {
-      const auto s = Util::split_into_views("a/", "/", Mode::skip_last_empty);
-      REQUIRE(s.size() == 1);
-      CHECK(s[0] == "a");
-    }
-    {
-      const auto s = Util::split_into_views("/b", "/", Mode::skip_last_empty);
-      REQUIRE(s.size() == 2);
-      CHECK(s[0] == "");
-      CHECK(s[1] == "b");
-    }
-    {
-      const auto s = Util::split_into_views("a/b", "/", Mode::skip_last_empty);
-      REQUIRE(s.size() == 2);
-      CHECK(s[0] == "a");
-      CHECK(s[1] == "b");
-    }
-    {
-      const auto s = Util::split_into_views("/a:", "/:", Mode::skip_last_empty);
-      REQUIRE(s.size() == 2);
-      CHECK(s[0] == "");
-      CHECK(s[1] == "a");
-    }
+    SplitTest split(Mode::skip_last_empty);
+    split("", "/", {});
+    split("/", "/", {""});
+    split("a/", "/", {"a"});
+    split("/b", "/", {"", "b"});
+    split("a/b", "/", {"a", "b"});
+    split("/a:", "/:", {"", "a"});
+  }
+
+  SUBCASE("include empty and delimiter")
+  {
+    SplitTest split(Mode::include_empty, IncludeDelimiter::yes);
+    split("", "/", {""});
+    split("/", "/", {"/", ""});
+    split("a/", "/", {"a/", ""});
+    split("/b", "/", {"/", "b"});
+    split("a/b", "/", {"a/", "b"});
+    split("/a:", "/:", {"/", "a:", ""});
+    split("a//b/", "/", {"a/", "/", "b/", ""});
+  }
+
+  SUBCASE("skip empty and include delimiter")
+  {
+    SplitTest split(Mode::skip_empty, IncludeDelimiter::yes);
+    split("", "/", {});
+    split("///", "/", {});
+    split("a/b", "/", {"a/", "b"});
+    split("a/b", "x", {"a/b"});
+    split("a/b:c", "/:", {"a/", "b:", "c"});
+    split("/a:", "/:", {"a:"});
+    split(":a//b..:.c/:/.", "/:.", {"a/", "b.", "c/"});
+    split(".0.1.2.3.4.5.6.7.8.9.",
+          "/:.+_abcdef",
+          {"0.", "1.", "2.", "3.", "4.", "5.", "6.", "7.", "8.", "9."});
+  }
+
+  SUBCASE("skip last empty and include delimiter")
+  {
+    SplitTest split(Mode::skip_last_empty, IncludeDelimiter::yes);
+    split("", "/", {});
+    split("/", "/", {"/"});
+    split("a/", "/", {"a/"});
+    split("/b", "/", {"/", "b"});
+    split("a/b", "/", {"a/", "b"});
+    split("/a:", "/:", {"/", "a:"});
   }
 }
author	Joel Rosdahl <joel@rosdahl.net>	2022-04-05 19:50:52 +0200
committer	GitHub <noreply@github.com>	2022-04-05 19:50:52 +0200
commit	870894585eb4f764c03ddf0c6e0e76a9d591d33f (patch)
tree	ac4376b317e97eaa0043b78ed2f790fe88ec1ee9
parent	ef2e922f9642f943199138447b29ec53fa63ea68 (diff)
parent	ff10b34851050a24fa7a13fa08064010c37bcd78 (diff)
download	ccache-870894585eb4f764c03ddf0c6e0e76a9d591d33f.tar.gz