diff options
author | Orgad Shaneh <orgad.shaneh@audiocodes.com> | 2022-03-25 17:27:39 +0300 |
---|---|---|
committer | Orgad Shaneh <orgad.shaneh@audiocodes.com> | 2022-04-05 10:22:34 +0300 |
commit | a5b5c5acacb4c0f610569f39a7d97f80535e56b2 (patch) | |
tree | 0c2ff07b31badaf29079304c211f06a7a3e45c82 | |
parent | 79087328ec60236112a7eb5818f5114d4992638d (diff) | |
download | ccache-a5b5c5acacb4c0f610569f39a7d97f80535e56b2.tar.gz |
feat: Support preserving the delimiters on tokenizer
-rw-r--r-- | src/Util.cpp | 18 | ||||
-rw-r--r-- | src/Util.hpp | 14 | ||||
-rw-r--r-- | src/util/Tokenizer.cpp | 12 | ||||
-rw-r--r-- | src/util/Tokenizer.hpp | 20 | ||||
-rw-r--r-- | unittest/test_util_Tokenizer.cpp | 48 |
5 files changed, 88 insertions, 24 deletions
diff --git a/src/Util.cpp b/src/Util.cpp index 0a4d4304..e68ef79f 100644 --- a/src/Util.cpp +++ b/src/Util.cpp @@ -103,6 +103,7 @@ extern "C" { using nonstd::nullopt; using nonstd::optional; using nonstd::string_view; +using IncludeDelimiter = util::Tokenizer::IncludeDelimiter; namespace { @@ -159,10 +160,13 @@ template<typename T> std::vector<T> split_into(string_view string, const char* separators, - util::Tokenizer::Mode mode) + util::Tokenizer::Mode mode, + IncludeDelimiter include_delimiter) + { std::vector<T> result; - for (const auto token : util::Tokenizer(string, separators, mode)) { + for (const auto token : + util::Tokenizer(string, separators, mode, include_delimiter)) { result.emplace_back(token); } return result; @@ -1313,17 +1317,19 @@ setenv(const std::string& name, const std::string& value) std::vector<string_view> split_into_views(string_view string, const char* separators, - util::Tokenizer::Mode mode) + util::Tokenizer::Mode mode, + IncludeDelimiter include_delimiter) { - return split_into<string_view>(string, separators, mode); + return split_into<string_view>(string, separators, mode, include_delimiter); } std::vector<std::string> split_into_strings(string_view string, const char* separators, - util::Tokenizer::Mode mode) + util::Tokenizer::Mode mode, + IncludeDelimiter include_delimiter) { - return split_into<std::string>(string, separators, mode); + return split_into<std::string>(string, separators, mode, include_delimiter); } std::string diff --git a/src/Util.hpp b/src/Util.hpp index 9ad678c7..6325463b 100644 --- a/src/Util.hpp +++ b/src/Util.hpp @@ -357,16 +357,20 @@ size_change_kibibyte(const Stat& old_stat, const Stat& new_stat) // Split `string` into tokens at any of the characters in `separators`. These // tokens are views into `string`. `separators` must neither be the empty string // nor a nullptr. -std::vector<nonstd::string_view> split_into_views( - nonstd::string_view string, - const char* separators, - util::Tokenizer::Mode mode = util::Tokenizer::Mode::skip_empty); +std::vector<nonstd::string_view> +split_into_views(nonstd::string_view string, + const char* separators, + util::Tokenizer::Mode mode = util::Tokenizer::Mode::skip_empty, + util::Tokenizer::IncludeDelimiter include_delimiter = + util::Tokenizer::IncludeDelimiter::no); // Same as `split_into_views` but the tokens are copied from `string`. std::vector<std::string> split_into_strings( nonstd::string_view string, const char* separators, - util::Tokenizer::Mode mode = util::Tokenizer::Mode::skip_empty); + util::Tokenizer::Mode mode = util::Tokenizer::Mode::skip_empty, + util::Tokenizer::IncludeDelimiter include_delimiter = + util::Tokenizer::IncludeDelimiter::no); // Returns a copy of string with the specified ANSI CSI sequences removed. [[nodiscard]] std::string strip_ansi_csi_seqs(nonstd::string_view string); diff --git a/src/util/Tokenizer.cpp b/src/util/Tokenizer.cpp index 9b27c8fb..b1d3e7e8 100644 --- a/src/util/Tokenizer.cpp +++ b/src/util/Tokenizer.cpp @@ -50,4 +50,16 @@ Tokenizer::Iterator::advance(bool initial) } } +nonstd::sv_lite::string_view +Tokenizer::Iterator::operator*() const +{ + DEBUG_ASSERT(m_left <= m_right); + DEBUG_ASSERT(m_right <= m_tokenizer.m_string.length()); + const bool include_delim = + m_tokenizer.m_include_delimiter == IncludeDelimiter::yes; + const int with_delim = + include_delim && m_right < m_tokenizer.m_string.length() ? 1 : 0; + return m_tokenizer.m_string.substr(m_left, m_right - m_left + with_delim); +} + } // namespace util diff --git a/src/util/Tokenizer.hpp b/src/util/Tokenizer.hpp index 90cb0c09..e57d4c2c 100644 --- a/src/util/Tokenizer.hpp +++ b/src/util/Tokenizer.hpp @@ -37,11 +37,14 @@ public: skip_last_empty, // Include empty tokens except the last one. }; + enum class IncludeDelimiter { no, yes }; + // Split `string` into tokens at any of the characters in `separators` which // must neither be the empty string nor a nullptr. Tokenizer(nonstd::string_view string, const char* delimiters, - Mode mode = Mode::skip_empty); + Mode mode = Mode::skip_empty, + IncludeDelimiter include_delimiter = IncludeDelimiter::no); class Iterator { @@ -69,14 +72,17 @@ private: const nonstd::string_view m_string; const char* const m_delimiters; const Mode m_mode; + const IncludeDelimiter m_include_delimiter; }; inline Tokenizer::Tokenizer(const nonstd::string_view string, const char* const delimiters, - const Tokenizer::Mode mode) + Tokenizer::Mode mode, + Tokenizer::IncludeDelimiter include_delimiter) : m_string(string), m_delimiters(delimiters), - m_mode(mode) + m_mode(mode), + m_include_delimiter(include_delimiter) { DEBUG_ASSERT(delimiters != nullptr && delimiters[0] != '\0'); } @@ -107,14 +113,6 @@ Tokenizer::Iterator::operator!=(const Iterator& other) const return &m_tokenizer != &other.m_tokenizer || m_left != other.m_left; } -inline nonstd::string_view -Tokenizer::Iterator::operator*() const -{ - DEBUG_ASSERT(m_left <= m_right); - DEBUG_ASSERT(m_right <= m_tokenizer.m_string.length()); - return m_tokenizer.m_string.substr(m_left, m_right - m_left); -} - inline Tokenizer::Iterator Tokenizer::begin() { diff --git a/unittest/test_util_Tokenizer.cpp b/unittest/test_util_Tokenizer.cpp index 0ada5a2a..c5efea34 100644 --- a/unittest/test_util_Tokenizer.cpp +++ b/unittest/test_util_Tokenizer.cpp @@ -23,9 +23,13 @@ TEST_CASE("util::Tokenizer") { using Mode = util::Tokenizer::Mode; + using IncludeDelimiter = util::Tokenizer::IncludeDelimiter; struct SplitTest { - SplitTest(Mode mode) : m_mode(mode) + SplitTest(Mode mode, + IncludeDelimiter includeDelimiter = IncludeDelimiter::no) + : m_mode(mode), + m_includeDelimiter(includeDelimiter) { } @@ -34,13 +38,15 @@ TEST_CASE("util::Tokenizer") const char* separators, const std::vector<std::string>& expected) { - const auto res = Util::split_into_views(input, separators, m_mode); + const auto res = + Util::split_into_views(input, separators, m_mode, m_includeDelimiter); REQUIRE(res.size() == expected.size()); for (int i = 0, total = expected.size(); i < total; ++i) CHECK(res[i] == expected[i]); } Mode m_mode; + IncludeDelimiter m_includeDelimiter; }; SUBCASE("include empty tokens") @@ -79,4 +85,42 @@ TEST_CASE("util::Tokenizer") split("a/b", "/", {"a", "b"}); split("/a:", "/:", {"", "a"}); } + + SUBCASE("include empty and delimiter") + { + SplitTest split(Mode::include_empty, IncludeDelimiter::yes); + split("", "/", {""}); + split("/", "/", {"/", ""}); + split("a/", "/", {"a/", ""}); + split("/b", "/", {"/", "b"}); + split("a/b", "/", {"a/", "b"}); + split("/a:", "/:", {"/", "a:", ""}); + split("a//b/", "/", {"a/", "/", "b/", ""}); + } + + SUBCASE("skip empty and include delimiter") + { + SplitTest split(Mode::skip_empty, IncludeDelimiter::yes); + split("", "/", {}); + split("///", "/", {}); + split("a/b", "/", {"a/", "b"}); + split("a/b", "x", {"a/b"}); + split("a/b:c", "/:", {"a/", "b:", "c"}); + split("/a:", "/:", {"a:"}); + split(":a//b..:.c/:/.", "/:.", {"a/", "b.", "c/"}); + split(".0.1.2.3.4.5.6.7.8.9.", + "/:.+_abcdef", + {"0.", "1.", "2.", "3.", "4.", "5.", "6.", "7.", "8.", "9."}); + } + + SUBCASE("skip last empty and include delimiter") + { + SplitTest split(Mode::skip_last_empty, IncludeDelimiter::yes); + split("", "/", {}); + split("/", "/", {"/"}); + split("a/", "/", {"a/"}); + split("/b", "/", {"/", "b"}); + split("a/b", "/", {"a/", "b"}); + split("/a:", "/:", {"/", "a:"}); + } } |