summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOrgad Shaneh <orgad.shaneh@audiocodes.com>2022-03-25 17:27:39 +0300
committerOrgad Shaneh <orgad.shaneh@audiocodes.com>2022-04-05 10:22:34 +0300
commita5b5c5acacb4c0f610569f39a7d97f80535e56b2 (patch)
tree0c2ff07b31badaf29079304c211f06a7a3e45c82
parent79087328ec60236112a7eb5818f5114d4992638d (diff)
downloadccache-a5b5c5acacb4c0f610569f39a7d97f80535e56b2.tar.gz
feat: Support preserving the delimiters on tokenizer
-rw-r--r--src/Util.cpp18
-rw-r--r--src/Util.hpp14
-rw-r--r--src/util/Tokenizer.cpp12
-rw-r--r--src/util/Tokenizer.hpp20
-rw-r--r--unittest/test_util_Tokenizer.cpp48
5 files changed, 88 insertions, 24 deletions
diff --git a/src/Util.cpp b/src/Util.cpp
index 0a4d4304..e68ef79f 100644
--- a/src/Util.cpp
+++ b/src/Util.cpp
@@ -103,6 +103,7 @@ extern "C" {
using nonstd::nullopt;
using nonstd::optional;
using nonstd::string_view;
+using IncludeDelimiter = util::Tokenizer::IncludeDelimiter;
namespace {
@@ -159,10 +160,13 @@ template<typename T>
std::vector<T>
split_into(string_view string,
const char* separators,
- util::Tokenizer::Mode mode)
+ util::Tokenizer::Mode mode,
+ IncludeDelimiter include_delimiter)
+
{
std::vector<T> result;
- for (const auto token : util::Tokenizer(string, separators, mode)) {
+ for (const auto token :
+ util::Tokenizer(string, separators, mode, include_delimiter)) {
result.emplace_back(token);
}
return result;
@@ -1313,17 +1317,19 @@ setenv(const std::string& name, const std::string& value)
std::vector<string_view>
split_into_views(string_view string,
const char* separators,
- util::Tokenizer::Mode mode)
+ util::Tokenizer::Mode mode,
+ IncludeDelimiter include_delimiter)
{
- return split_into<string_view>(string, separators, mode);
+ return split_into<string_view>(string, separators, mode, include_delimiter);
}
std::vector<std::string>
split_into_strings(string_view string,
const char* separators,
- util::Tokenizer::Mode mode)
+ util::Tokenizer::Mode mode,
+ IncludeDelimiter include_delimiter)
{
- return split_into<std::string>(string, separators, mode);
+ return split_into<std::string>(string, separators, mode, include_delimiter);
}
std::string
diff --git a/src/Util.hpp b/src/Util.hpp
index 9ad678c7..6325463b 100644
--- a/src/Util.hpp
+++ b/src/Util.hpp
@@ -357,16 +357,20 @@ size_change_kibibyte(const Stat& old_stat, const Stat& new_stat)
// Split `string` into tokens at any of the characters in `separators`. These
// tokens are views into `string`. `separators` must neither be the empty string
// nor a nullptr.
-std::vector<nonstd::string_view> split_into_views(
- nonstd::string_view string,
- const char* separators,
- util::Tokenizer::Mode mode = util::Tokenizer::Mode::skip_empty);
+std::vector<nonstd::string_view>
+split_into_views(nonstd::string_view string,
+ const char* separators,
+ util::Tokenizer::Mode mode = util::Tokenizer::Mode::skip_empty,
+ util::Tokenizer::IncludeDelimiter include_delimiter =
+ util::Tokenizer::IncludeDelimiter::no);
// Same as `split_into_views` but the tokens are copied from `string`.
std::vector<std::string> split_into_strings(
nonstd::string_view string,
const char* separators,
- util::Tokenizer::Mode mode = util::Tokenizer::Mode::skip_empty);
+ util::Tokenizer::Mode mode = util::Tokenizer::Mode::skip_empty,
+ util::Tokenizer::IncludeDelimiter include_delimiter =
+ util::Tokenizer::IncludeDelimiter::no);
// Returns a copy of string with the specified ANSI CSI sequences removed.
[[nodiscard]] std::string strip_ansi_csi_seqs(nonstd::string_view string);
diff --git a/src/util/Tokenizer.cpp b/src/util/Tokenizer.cpp
index 9b27c8fb..b1d3e7e8 100644
--- a/src/util/Tokenizer.cpp
+++ b/src/util/Tokenizer.cpp
@@ -50,4 +50,16 @@ Tokenizer::Iterator::advance(bool initial)
}
}
+nonstd::sv_lite::string_view
+Tokenizer::Iterator::operator*() const
+{
+ DEBUG_ASSERT(m_left <= m_right);
+ DEBUG_ASSERT(m_right <= m_tokenizer.m_string.length());
+ const bool include_delim =
+ m_tokenizer.m_include_delimiter == IncludeDelimiter::yes;
+ const int with_delim =
+ include_delim && m_right < m_tokenizer.m_string.length() ? 1 : 0;
+ return m_tokenizer.m_string.substr(m_left, m_right - m_left + with_delim);
+}
+
} // namespace util
diff --git a/src/util/Tokenizer.hpp b/src/util/Tokenizer.hpp
index 90cb0c09..e57d4c2c 100644
--- a/src/util/Tokenizer.hpp
+++ b/src/util/Tokenizer.hpp
@@ -37,11 +37,14 @@ public:
skip_last_empty, // Include empty tokens except the last one.
};
+ enum class IncludeDelimiter { no, yes };
+
// Split `string` into tokens at any of the characters in `separators` which
// must neither be the empty string nor a nullptr.
Tokenizer(nonstd::string_view string,
const char* delimiters,
- Mode mode = Mode::skip_empty);
+ Mode mode = Mode::skip_empty,
+ IncludeDelimiter include_delimiter = IncludeDelimiter::no);
class Iterator
{
@@ -69,14 +72,17 @@ private:
const nonstd::string_view m_string;
const char* const m_delimiters;
const Mode m_mode;
+ const IncludeDelimiter m_include_delimiter;
};
inline Tokenizer::Tokenizer(const nonstd::string_view string,
const char* const delimiters,
- const Tokenizer::Mode mode)
+ Tokenizer::Mode mode,
+ Tokenizer::IncludeDelimiter include_delimiter)
: m_string(string),
m_delimiters(delimiters),
- m_mode(mode)
+ m_mode(mode),
+ m_include_delimiter(include_delimiter)
{
DEBUG_ASSERT(delimiters != nullptr && delimiters[0] != '\0');
}
@@ -107,14 +113,6 @@ Tokenizer::Iterator::operator!=(const Iterator& other) const
return &m_tokenizer != &other.m_tokenizer || m_left != other.m_left;
}
-inline nonstd::string_view
-Tokenizer::Iterator::operator*() const
-{
- DEBUG_ASSERT(m_left <= m_right);
- DEBUG_ASSERT(m_right <= m_tokenizer.m_string.length());
- return m_tokenizer.m_string.substr(m_left, m_right - m_left);
-}
-
inline Tokenizer::Iterator
Tokenizer::begin()
{
diff --git a/unittest/test_util_Tokenizer.cpp b/unittest/test_util_Tokenizer.cpp
index 0ada5a2a..c5efea34 100644
--- a/unittest/test_util_Tokenizer.cpp
+++ b/unittest/test_util_Tokenizer.cpp
@@ -23,9 +23,13 @@
TEST_CASE("util::Tokenizer")
{
using Mode = util::Tokenizer::Mode;
+ using IncludeDelimiter = util::Tokenizer::IncludeDelimiter;
struct SplitTest
{
- SplitTest(Mode mode) : m_mode(mode)
+ SplitTest(Mode mode,
+ IncludeDelimiter includeDelimiter = IncludeDelimiter::no)
+ : m_mode(mode),
+ m_includeDelimiter(includeDelimiter)
{
}
@@ -34,13 +38,15 @@ TEST_CASE("util::Tokenizer")
const char* separators,
const std::vector<std::string>& expected)
{
- const auto res = Util::split_into_views(input, separators, m_mode);
+ const auto res =
+ Util::split_into_views(input, separators, m_mode, m_includeDelimiter);
REQUIRE(res.size() == expected.size());
for (int i = 0, total = expected.size(); i < total; ++i)
CHECK(res[i] == expected[i]);
}
Mode m_mode;
+ IncludeDelimiter m_includeDelimiter;
};
SUBCASE("include empty tokens")
@@ -79,4 +85,42 @@ TEST_CASE("util::Tokenizer")
split("a/b", "/", {"a", "b"});
split("/a:", "/:", {"", "a"});
}
+
+ SUBCASE("include empty and delimiter")
+ {
+ SplitTest split(Mode::include_empty, IncludeDelimiter::yes);
+ split("", "/", {""});
+ split("/", "/", {"/", ""});
+ split("a/", "/", {"a/", ""});
+ split("/b", "/", {"/", "b"});
+ split("a/b", "/", {"a/", "b"});
+ split("/a:", "/:", {"/", "a:", ""});
+ split("a//b/", "/", {"a/", "/", "b/", ""});
+ }
+
+ SUBCASE("skip empty and include delimiter")
+ {
+ SplitTest split(Mode::skip_empty, IncludeDelimiter::yes);
+ split("", "/", {});
+ split("///", "/", {});
+ split("a/b", "/", {"a/", "b"});
+ split("a/b", "x", {"a/b"});
+ split("a/b:c", "/:", {"a/", "b:", "c"});
+ split("/a:", "/:", {"a:"});
+ split(":a//b..:.c/:/.", "/:.", {"a/", "b.", "c/"});
+ split(".0.1.2.3.4.5.6.7.8.9.",
+ "/:.+_abcdef",
+ {"0.", "1.", "2.", "3.", "4.", "5.", "6.", "7.", "8.", "9."});
+ }
+
+ SUBCASE("skip last empty and include delimiter")
+ {
+ SplitTest split(Mode::skip_last_empty, IncludeDelimiter::yes);
+ split("", "/", {});
+ split("/", "/", {"/"});
+ split("a/", "/", {"a/"});
+ split("/b", "/", {"/", "b"});
+ split("a/b", "/", {"a/", "b"});
+ split("/a:", "/:", {"/", "a:"});
+ }
}