// Copyright 2020 The Chromium Authors. All rights reserved. // Use of this source code is governed by an MIT-style license that can be // found in the LICENSE file or at https://opensource.org/licenses/MIT. #include "third_party/liburlpattern/tokenize.h" #include "testing/gtest/include/gtest/gtest.h" namespace liburlpattern { void RunTokenizeTest(absl::string_view pattern, absl::StatusOr> expected) { auto result = Tokenize(pattern); ASSERT_EQ(result.ok(), expected.ok()) << "lexer status for: " << pattern; if (!expected.ok()) { ASSERT_EQ(result.status().code(), expected.status().code()) << "lexer status code for: " << pattern; EXPECT_NE(result.status().message().find(expected.status().message()), std::string::npos) << "lexer message '" << result.status().message() << "' does not contain '" << expected.status().message() << "' for: " << pattern; return; } const auto& expected_token_list = expected.value(); const auto& token_list = result.value(); EXPECT_EQ(token_list.size(), expected_token_list.size()) << "lexer should produce expected number of tokens for: " << pattern; for (size_t i = 0; i < token_list.size() && i < expected_token_list.size(); ++i) { EXPECT_EQ(token_list[i], expected_token_list[i]) << "token at index " << i << " wrong for: " << pattern; } } TEST(TokenizeTest, EmptyPattern) { std::vector expected_tokens = { Token(TokenType::kEnd, 0, absl::string_view()), }; RunTokenizeTest("", expected_tokens); } TEST(TokenizeTest, Chars) { std::vector expected_tokens = { Token(TokenType::kChar, 0, "/"), Token(TokenType::kChar, 1, "f"), Token(TokenType::kChar, 2, "o"), Token(TokenType::kChar, 3, "o"), Token(TokenType::kEnd, 4, absl::string_view()), }; RunTokenizeTest("/foo", expected_tokens); } TEST(TokenizeTest, CharsWithClosingParen) { std::vector expected_tokens = { Token(TokenType::kChar, 0, "/"), Token(TokenType::kChar, 1, "f"), Token(TokenType::kChar, 2, "o"), Token(TokenType::kChar, 3, "o"), Token(TokenType::kChar, 4, ")"), Token(TokenType::kEnd, 5, absl::string_view()), }; RunTokenizeTest("/foo)", expected_tokens); } TEST(TokenizeTest, EscapedChar) { std::vector expected_tokens = { Token(TokenType::kChar, 0, "/"), Token(TokenType::kEscapedChar, 1, "f"), Token(TokenType::kChar, 3, "o"), Token(TokenType::kChar, 4, "o"), Token(TokenType::kEnd, 5, absl::string_view()), }; RunTokenizeTest("/\\foo", expected_tokens); } TEST(TokenizeTest, EscapedColon) { std::vector expected_tokens = { Token(TokenType::kChar, 0, "/"), Token(TokenType::kEscapedChar, 1, ":"), Token(TokenType::kChar, 3, "f"), Token(TokenType::kChar, 4, "o"), Token(TokenType::kChar, 5, "o"), Token(TokenType::kEnd, 6, absl::string_view()), }; RunTokenizeTest("/\\:foo", expected_tokens); } TEST(TokenizeTest, EscapedParen) { std::vector expected_tokens = { Token(TokenType::kChar, 0, "/"), Token(TokenType::kEscapedChar, 1, "("), Token(TokenType::kChar, 3, "f"), Token(TokenType::kChar, 4, "o"), Token(TokenType::kChar, 5, "o"), Token(TokenType::kEscapedChar, 6, ")"), Token(TokenType::kEnd, 8, absl::string_view()), }; RunTokenizeTest("/\\(foo\\)", expected_tokens); } TEST(TokenizeTest, EscapedCurlyBrace) { std::vector expected_tokens = { Token(TokenType::kChar, 0, "/"), Token(TokenType::kEscapedChar, 1, "{"), Token(TokenType::kChar, 3, "f"), Token(TokenType::kChar, 4, "o"), Token(TokenType::kChar, 5, "o"), Token(TokenType::kEscapedChar, 6, "}"), Token(TokenType::kEnd, 8, absl::string_view()), }; RunTokenizeTest("/\\{foo\\}", expected_tokens); } TEST(TokenizeTest, EscapedCharAtEnd) { RunTokenizeTest("/foo\\", absl::InvalidArgumentError("Trailing escape character")); } TEST(TokenizeTest, EscapedInvalidChar) { // Use a single byte invalid character since the escape only applies to the // next byte character. RunTokenizeTest("\\\xff", absl::InvalidArgumentError("Invalid character")); } TEST(TokenizeTest, Name) { std::vector expected_tokens = { Token(TokenType::kName, 0, "Foo_1"), Token(TokenType::kEnd, 6, absl::string_view()), }; RunTokenizeTest(":Foo_1", expected_tokens); } TEST(TokenizeTest, NameWithZeroLength) { RunTokenizeTest("/:/foo", absl::InvalidArgumentError("Missing parameter name")); } TEST(TokenizeTest, NameWithInvalidChar) { RunTokenizeTest("/:fooßar", absl::InvalidArgumentError("Invalid character")); } TEST(TokenizeTest, NameAndFileExtension) { std::vector expected_tokens = { Token(TokenType::kName, 0, "foo"), Token(TokenType::kChar, 4, "."), Token(TokenType::kChar, 5, "j"), Token(TokenType::kChar, 6, "p"), Token(TokenType::kChar, 7, "g"), Token(TokenType::kEnd, 8, absl::string_view()), }; RunTokenizeTest(":foo.jpg", expected_tokens); } TEST(TokenizeTest, NameInPath) { std::vector expected_tokens = { Token(TokenType::kChar, 0, "/"), Token(TokenType::kName, 1, "foo"), Token(TokenType::kChar, 5, "/"), Token(TokenType::kChar, 6, "b"), Token(TokenType::kChar, 7, "a"), Token(TokenType::kChar, 8, "r"), Token(TokenType::kEnd, 9, absl::string_view()), }; RunTokenizeTest("/:foo/bar", expected_tokens); } TEST(TokenizeTest, Regex) { std::vector expected_tokens = { Token(TokenType::kRegex, 0, "foo"), Token(TokenType::kEnd, 5, absl::string_view()), }; RunTokenizeTest("(foo)", expected_tokens); } TEST(TokenizeTest, RegexWithZeroLength) { RunTokenizeTest("()", absl::InvalidArgumentError("Missing regex")); } TEST(TokenizeTest, RegexWithInvalidChar) { RunTokenizeTest("(ßar)", absl::InvalidArgumentError("Invalid character")); } TEST(TokenizeTest, RegexWithoutClosingParen) { RunTokenizeTest("(foo", absl::InvalidArgumentError("Unbalanced regex")); } TEST(TokenizeTest, RegexWithNestedCapturingGroup) { RunTokenizeTest("(f(oo))", absl::InvalidArgumentError( "Unnamed capturing groups are not allowed")); } TEST(TokenizeTest, RegexWithNestedNamedCapturingGroup) { std::vector expected_tokens = { Token(TokenType::kRegex, 0, "f(?oo)"), Token(TokenType::kEnd, 8, absl::string_view()), }; RunTokenizeTest("(f(?oo))", expected_tokens); } TEST(TokenizeTest, RegexWithNestedNonCapturingGroup) { std::vector expected_tokens = { Token(TokenType::kRegex, 0, "f(?:oo)"), Token(TokenType::kEnd, 9, absl::string_view()), }; RunTokenizeTest("(f(?:oo))", expected_tokens); } TEST(TokenizeTest, RegexWithAssertion) { std::vector expected_tokens = { Token(TokenType::kRegex, 0, "f(? expected_tokens = { Token(TokenType::kRegex, 0, "f\\(oo"), Token(TokenType::kEnd, 7, absl::string_view()), }; RunTokenizeTest("(f\\(oo)", expected_tokens); } TEST(TokenizeTest, RegexWithTrailingEscapedChar) { RunTokenizeTest("(foo\\", absl::InvalidArgumentError("Trailing escape character")); } TEST(TokenizeTest, RegexWithEscapedInvalidChar) { // Use a single byte invalid character since the escape only applies to the // next byte character. RunTokenizeTest("(\\\xff)", absl::InvalidArgumentError("Invalid character")); } TEST(TokenizeTest, RegexWithLeadingQuestion) { RunTokenizeTest("(?foo)", absl::InvalidArgumentError("Regex cannot start with '?'")); } TEST(TokenizeTest, RegexInPath) { std::vector expected_tokens = { Token(TokenType::kChar, 0, "/"), Token(TokenType::kChar, 1, "f"), Token(TokenType::kChar, 2, "o"), Token(TokenType::kChar, 3, "o"), Token(TokenType::kChar, 4, "/"), Token(TokenType::kRegex, 5, ".*"), Token(TokenType::kChar, 9, "/"), Token(TokenType::kChar, 10, "b"), Token(TokenType::kChar, 11, "a"), Token(TokenType::kChar, 12, "r"), Token(TokenType::kEnd, 13, absl::string_view()), }; RunTokenizeTest("/foo/(.*)/bar", expected_tokens); } TEST(TokenizeTest, ModifierStar) { std::vector expected_tokens = { Token(TokenType::kChar, 0, "/"), Token(TokenType::kOpen, 1, "{"), Token(TokenType::kChar, 2, "f"), Token(TokenType::kChar, 3, "o"), Token(TokenType::kChar, 4, "o"), Token(TokenType::kClose, 5, "}"), Token(TokenType::kModifier, 6, "*"), Token(TokenType::kEnd, 7, absl::string_view()), }; RunTokenizeTest("/{foo}*", expected_tokens); } TEST(TokenizeTest, ModifierPlus) { std::vector expected_tokens = { Token(TokenType::kChar, 0, "/"), Token(TokenType::kOpen, 1, "{"), Token(TokenType::kChar, 2, "f"), Token(TokenType::kChar, 3, "o"), Token(TokenType::kChar, 4, "o"), Token(TokenType::kClose, 5, "}"), Token(TokenType::kModifier, 6, "+"), Token(TokenType::kEnd, 7, absl::string_view()), }; RunTokenizeTest("/{foo}+", expected_tokens); } TEST(TokenizeTest, ModifierQuestion) { std::vector expected_tokens = { Token(TokenType::kChar, 0, "/"), Token(TokenType::kOpen, 1, "{"), Token(TokenType::kChar, 2, "f"), Token(TokenType::kChar, 3, "o"), Token(TokenType::kChar, 4, "o"), Token(TokenType::kClose, 5, "}"), Token(TokenType::kModifier, 6, "?"), Token(TokenType::kEnd, 7, absl::string_view()), }; RunTokenizeTest("/{foo}?", expected_tokens); } TEST(TokenizeTest, Everything) { std::vector expected_tokens = { Token(TokenType::kChar, 0, "/"), Token(TokenType::kEscapedChar, 1, "f"), Token(TokenType::kChar, 3, "o"), Token(TokenType::kChar, 4, "o"), Token(TokenType::kChar, 5, "/"), Token(TokenType::kRegex, 6, "a(?.*)"), Token(TokenType::kOpen, 14, "{"), Token(TokenType::kChar, 15, "/"), Token(TokenType::kName, 16, "bar"), Token(TokenType::kClose, 20, "}"), Token(TokenType::kModifier, 21, "*"), Token(TokenType::kEnd, 22, absl::string_view()), }; RunTokenizeTest("/\\foo/(a(?.*)){/:bar}*", expected_tokens); } } // namespace liburlpattern