summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoel Rosdahl <joel@rosdahl.net>2022-10-05 21:34:17 +0200
committerJoel Rosdahl <joel@rosdahl.net>2022-10-06 18:31:47 +0200
commite505f1a57a784a4c58934b0e1dea3facfd326f3d (patch)
tree59634a9b2f6bc11e167baab58390965217b91459
parenta6072098415e3ccf8a2ac6542db35d1f72f8d08b (diff)
downloadccache-e505f1a57a784a4c58934b0e1dea3facfd326f3d.tar.gz
chore: Clean up code slightly
-rw-r--r--src/Depfile.cpp74
-rw-r--r--src/Depfile.hpp2
-rw-r--r--src/util/file.cpp11
-rw-r--r--test/suites/depend.bash8
-rw-r--r--unittest/test_Depfile.cpp94
5 files changed, 84 insertions, 105 deletions
diff --git a/src/Depfile.cpp b/src/Depfile.cpp
index c5accfe1..2d721a6c 100644
--- a/src/Depfile.cpp
+++ b/src/Depfile.cpp
@@ -146,30 +146,30 @@ tokenize(std::string_view file_content)
{
// A dependency file uses Makefile syntax. This is not perfect parser but
// should be enough for parsing a regular dependency file.
- // enhancement:
- // - space between target and colon
- // - no space between colon and first pre-requisite
- // the later is pretty complex because of the windows paths which are
+ //
+ // Note that this is pretty complex because of Windows paths that can be
// identical to a target-colon-prerequisite without spaces (e.g. cat:/meow vs.
- // c:/meow) here are the tests on windows gnu make 4.3 how it handles this:
- // + cat:/meow -> sees "cat" and "/meow"
- // + cat:\meow -> sees "cat" and "\meow"
- // + cat:\ meow -> sees "cat" and " meow"
- // + cat:c:/meow -> sees "cat" and "c:/meow"
- // + cat:c:\meow -> sees "cat" and "c:\meow"
- // + cat:c: -> target pattern contains no '%'. Stop.
- // + cat:c:\ -> target pattern contains no '%'. Stop.
- // + cat:c:/ -> sees "cat" and "c:/"
- // + cat:c:meow -> target pattern contains no '%'. Stop.
- // + c:c:/meow -> sees "c" and "c:/meow"
- // + c:c:\meow -> sees "c" and "c:\meow"
- // + c:z:\meow -> sees "c" and "z:\meow"
- // + c:cd:\meow -> target pattern contains no '%'. Stop.
-
- // the logic for a windows path is:
- // - if there is a colon, if the previous token is 1 char long
- // and that the following char is a slash (fw or bw), then it is
- // a windows path
+ // c:/meow).
+ //
+ // Here are tests on Windows on how GNU Make 4.3 handles different scenarios:
+ //
+ // cat:/meow -> sees "cat" and "/meow"
+ // cat:\meow -> sees "cat" and "\meow"
+ // cat:\ meow -> sees "cat" and " meow"
+ // cat:c:/meow -> sees "cat" and "c:/meow"
+ // cat:c:\meow -> sees "cat" and "c:\meow"
+ // cat:c: -> target pattern contains no '%'. Stop.
+ // cat:c:\ -> target pattern contains no '%'. Stop.
+ // cat:c:/ -> sees "cat" and "c:/"
+ // cat:c:meow -> target pattern contains no '%'. Stop.
+ // c:c:/meow -> sees "c" and "c:/meow"
+ // c:c:\meow -> sees "c" and "c:\meow"
+ // c:z:\meow -> sees "c" and "z:\meow"
+ // c:cd:\meow -> target pattern contains no '%'. Stop.
+ //
+ // Thus, if there is a colon and the previous token is one character long and
+ // the following character is a slash (forward or backward), then it is
+ // interpreted as a Windows path.
std::vector<std::string> result;
const size_t length = file_content.size();
@@ -179,34 +179,32 @@ tokenize(std::string_view file_content)
while (p < length) {
char c = file_content[p];
- if (c == ':') {
- if (p + 1 < length && !is_blank(token) && token.length() == 1) {
- const char next = file_content[p + 1];
- if (next == '/' || next == '\\') {
- // only in this case, this is not a separator and colon is
- // added to token
- token.push_back(c);
- ++p;
- continue;
- }
+ if (c == ':' && p + 1 < length && !is_blank(token) && token.length() == 1) {
+ const char next = file_content[p + 1];
+ if (next == '/' || next == '\\') {
+ // It's a Windows path, so the colon is not a separator and instead
+ // added to the token.
+ token.push_back(c);
+ ++p;
+ continue;
}
}
+
// Each token is separated by whitespace or a colon.
if (isspace(c) || c == ':') {
- // chomp all spaces before next char
+ // Chomp all spaces before next character.
while (p < length && isspace(file_content[p])) {
++p;
}
if (!is_blank(token)) {
- // if there were spaces between a token and the : sign, the :
- // must be added to the same token to make sure it is seen as
- // a target and not as a dependency (ccache requirement)
+ // If there were spaces between a token and the colon, add the colon the
+ // token to make sure it is seen as a target and not as a dependency.
if (p < length) {
const char next = file_content[p];
if (next == ':') {
token.push_back(next);
++p;
- // chomp all spaces before next char
+ // Chomp all spaces before next character.
while (p < length && isspace(file_content[p])) {
++p;
}
diff --git a/src/Depfile.hpp b/src/Depfile.hpp
index d45bf539..af73ff8b 100644
--- a/src/Depfile.hpp
+++ b/src/Depfile.hpp
@@ -37,6 +37,8 @@ std::optional<std::string> rewrite_source_paths(const Context& ctx,
void make_paths_relative_in_output_dep(const Context& ctx);
+// Tokenize `file_content` into a list of files, where the first token is the
+// target and ends with a colon.
std::vector<std::string> tokenize(std::string_view file_content);
} // namespace Depfile
diff --git a/src/util/file.cpp b/src/util/file.cpp
index 964d6846..5ddc05bb 100644
--- a/src/util/file.cpp
+++ b/src/util/file.cpp
@@ -165,18 +165,14 @@ read_file(const std::string& path, size_t size_hint)
#ifdef _WIN32
if constexpr (std::is_same<T, std::string>::value) {
// Convert to UTF-8 if the content starts with a UTF-16 little-endian BOM.
- //
- // Note that this code assumes a little-endian machine, which is why it's
- // #ifdef-ed to only run on Windows (which is always little-endian) where
- // it's actually needed.
if (has_utf16_le_bom(result)) {
result.erase(0, 2); // Remove BOM.
- if (result.empty())
+ if (result.empty()) {
return result;
+ }
std::wstring result_as_u16((result.size() / 2) + 1, '\0');
result_as_u16 = reinterpret_cast<const wchar_t*>(result.c_str());
-
const int size = WideCharToMultiByte(CP_UTF8,
WC_ERR_INVALID_CHARS,
result_as_u16.c_str(),
@@ -185,11 +181,12 @@ read_file(const std::string& path, size_t size_hint)
0,
nullptr,
nullptr);
- if (size <= 0)
+ if (size <= 0) {
return nonstd::make_unexpected(
FMT("Failed to convert {} from UTF-16LE to UTF-8: {}",
path,
Win32Util::error_message(GetLastError())));
+ }
result = std::string(size, '\0');
WideCharToMultiByte(CP_UTF8,
diff --git a/test/suites/depend.bash b/test/suites/depend.bash
index 99c090c4..88bf786d 100644
--- a/test/suites/depend.bash
+++ b/test/suites/depend.bash
@@ -90,8 +90,7 @@ EOF
generate_reference_compiler_output() {
local filename
- if [[ $# -gt 0 ]]
- then
+ if [[ $# -gt 0 ]]; then
filename=$1
else
filename=test.c
@@ -204,7 +203,7 @@ EOF
# dir2 has a change in header which affects object file
# dir3 has a change in header which does not affect object file
# dir4 has an additional include header which should change the dependency file
- # dir5 has no changes, only a different base dir
+ # dir5 has no changes, only a different base directory
TEST "Different sets of headers for the same source code"
set_up_different_sets_of_headers_test
@@ -329,7 +328,8 @@ EOF
generate_reference_compiler_output `pwd`/test.c
CCACHE_DEPEND=1 CCACHE_BASEDIR=$BASEDIR5 $CCACHE_COMPILE $DEPFLAGS -c `pwd`/test.c
expect_equal_object_files reference_test.o test.o
- # from the ccache doc: One known issue is that absolute paths are not reproduced in dependency files
+ # From the manual: "One known issue is that absolute paths are not
+ # reproduced in dependency files":
# expect_equal_content reference_test.d test.d
expect_equal_content test.d $BASEDIR1/test.d
expect_stat direct_cache_hit 7
diff --git a/unittest/test_Depfile.cpp b/unittest/test_Depfile.cpp
index e8ae88df..7f636e01 100644
--- a/unittest/test_Depfile.cpp
+++ b/unittest/test_Depfile.cpp
@@ -78,13 +78,13 @@ TEST_CASE("Depfile::rewrite_source_paths")
TEST_CASE("Depfile::tokenize")
{
- SUBCASE("Parse empty depfile")
+ SUBCASE("Empty")
{
std::vector<std::string> result = Depfile::tokenize("");
CHECK(result.size() == 0);
}
- SUBCASE("Parse simple depfile")
+ SUBCASE("Simple")
{
std::vector<std::string> result =
Depfile::tokenize("cat.o: meow meow purr");
@@ -95,7 +95,7 @@ TEST_CASE("Depfile::tokenize")
CHECK(result[3] == "purr");
}
- SUBCASE("Parse depfile with a dollar sign followed by a dollar sign")
+ SUBCASE("Dollar sign followed by a dollar sign")
{
std::vector<std::string> result = Depfile::tokenize("cat.o: meow$$");
REQUIRE(result.size() == 2);
@@ -103,7 +103,7 @@ TEST_CASE("Depfile::tokenize")
CHECK(result[1] == "meow$");
}
- SUBCASE("Parse depfile with a dollar sign followed by an alphabet")
+ SUBCASE("Dollar sign followed by an alphabet")
{
std::vector<std::string> result = Depfile::tokenize("cat.o: meow$w");
REQUIRE(result.size() == 2);
@@ -111,7 +111,7 @@ TEST_CASE("Depfile::tokenize")
CHECK(result[1] == "meow$w");
}
- SUBCASE("Parse depfile with a backslash followed by a number sign or a colon")
+ SUBCASE("Backslash followed by a number sign or a colon")
{
std::vector<std::string> result =
Depfile::tokenize("cat.o: meow\\# meow\\:");
@@ -121,7 +121,7 @@ TEST_CASE("Depfile::tokenize")
CHECK(result[2] == "meow:");
}
- SUBCASE("Parse depfile with a backslash followed by an alphabet")
+ SUBCASE("Backslash followed by an alphabet")
{
std::vector<std::string> result =
Depfile::tokenize("cat.o: meow\\w purr\\r");
@@ -131,7 +131,7 @@ TEST_CASE("Depfile::tokenize")
CHECK(result[2] == "purr\\r");
}
- SUBCASE("Parse depfile with a backslash followed by a space or a tab")
+ SUBCASE("Backslash followed by a space or a tab")
{
std::vector<std::string> result =
Depfile::tokenize("cat.o: meow\\ meow purr\\\tpurr");
@@ -141,7 +141,7 @@ TEST_CASE("Depfile::tokenize")
CHECK(result[2] == "purr\tpurr");
}
- SUBCASE("Parse depfile with backslashes followed by a space or a tab")
+ SUBCASE("Backslashes followed by a space or a tab")
{
std::vector<std::string> result =
Depfile::tokenize("cat.o: meow\\\\\\ meow purr\\\\ purr");
@@ -152,7 +152,7 @@ TEST_CASE("Depfile::tokenize")
CHECK(result[3] == "purr");
}
- SUBCASE("Parse depfile with a backslash newline")
+ SUBCASE("Backslash newline")
{
std::vector<std::string> result =
Depfile::tokenize("cat.o: meow\\\nmeow\\\n purr\\\n\tpurr");
@@ -164,12 +164,11 @@ TEST_CASE("Depfile::tokenize")
CHECK(result[4] == "purr");
}
- SUBCASE("Parse depfile with a new line")
+ SUBCASE("Newlines")
{
- // This is invalid depfile because it has multiple lines without backslash,
- // which is not valid in Makefile syntax.
- // However, Depfile::tokenize is parsing it to each token, which is
- // expected.
+ // This is an invalid dependency file since it has multiple lines without
+ // backslash, which is not valid Makefile syntax. However, the
+ // Depfile::tokenize's simplistic parser accepts them.
std::vector<std::string> result =
Depfile::tokenize("cat.o: meow\nmeow\npurr\n");
REQUIRE(result.size() == 4);
@@ -179,7 +178,7 @@ TEST_CASE("Depfile::tokenize")
CHECK(result[3] == "purr");
}
- SUBCASE("Parse depfile with a trailing dollar sign")
+ SUBCASE("Trailing dollar sign")
{
std::vector<std::string> result = Depfile::tokenize("cat.o: meow$");
REQUIRE(result.size() == 2);
@@ -187,7 +186,7 @@ TEST_CASE("Depfile::tokenize")
CHECK(result[1] == "meow$");
}
- SUBCASE("Parse depfile with a trailing backslash")
+ SUBCASE("Trailing backslash")
{
std::vector<std::string> result = Depfile::tokenize("cat.o: meow\\");
REQUIRE(result.size() == 2);
@@ -195,7 +194,7 @@ TEST_CASE("Depfile::tokenize")
CHECK(result[1] == "meow\\");
}
- SUBCASE("Parse depfile with a trailing backslash newline")
+ SUBCASE("Trailing backslash newline")
{
std::vector<std::string> result = Depfile::tokenize("cat.o: meow\\\n");
REQUIRE(result.size() == 2);
@@ -203,23 +202,15 @@ TEST_CASE("Depfile::tokenize")
CHECK(result[1] == "meow");
}
- SUBCASE("Parse depfile with a one space before colon")
+ SUBCASE("Space before the colon but not after")
{
- std::vector<std::string> result = Depfile::tokenize("cat.o : meow");
+ std::vector<std::string> result = Depfile::tokenize("cat.o :meow");
REQUIRE(result.size() == 2);
CHECK(result[0] == "cat.o:");
CHECK(result[1] == "meow");
}
- SUBCASE("Parse depfile with a two spaces before colon")
- {
- std::vector<std::string> result = Depfile::tokenize("cat.o : meow");
- REQUIRE(result.size() == 2);
- CHECK(result[0] == "cat.o:");
- CHECK(result[1] == "meow");
- }
-
- SUBCASE("Parse depfile with a plenty of spaces before colon")
+ SUBCASE("Space around the colon")
{
std::vector<std::string> result = Depfile::tokenize("cat.o : meow");
REQUIRE(result.size() == 2);
@@ -227,7 +218,7 @@ TEST_CASE("Depfile::tokenize")
CHECK(result[1] == "meow");
}
- SUBCASE("Parse depfile with no space between colon and dependency")
+ SUBCASE("No space between colon and dependency")
{
std::vector<std::string> result = Depfile::tokenize("cat.o:meow");
REQUIRE(result.size() == 2);
@@ -235,9 +226,7 @@ TEST_CASE("Depfile::tokenize")
CHECK(result[1] == "meow");
}
- SUBCASE(
- "Parse depfile with windows formatted filename (with backslashes in "
- "target)")
+ SUBCASE("Windows filename (with backslashes in target)")
{
std::vector<std::string> result = Depfile::tokenize("e:\\cat.o: meow");
REQUIRE(result.size() == 2);
@@ -245,9 +234,7 @@ TEST_CASE("Depfile::tokenize")
CHECK(result[1] == "meow");
}
- SUBCASE(
- "Parse depfile with windows formatted filename (with backslashes in "
- "prerequisite)")
+ SUBCASE("Windows filename (with backslashes in prerequisite)")
{
std::vector<std::string> result =
Depfile::tokenize("cat.o: c:\\meow\\purr");
@@ -256,8 +243,7 @@ TEST_CASE("Depfile::tokenize")
CHECK(result[1] == "c:\\meow\\purr");
}
- SUBCASE(
- "Parse depfile with windows formatted filename (with slashes in target)")
+ SUBCASE("Windows filename (with slashes in target)")
{
std::vector<std::string> result = Depfile::tokenize("e:/cat.o: meow");
REQUIRE(result.size() == 2);
@@ -265,9 +251,7 @@ TEST_CASE("Depfile::tokenize")
CHECK(result[1] == "meow");
}
- SUBCASE(
- "Parse depfile with windows formatted filename (with slashes in "
- "prerequisite)")
+ SUBCASE("Windows filename (with slashes in prerequisite)")
{
std::vector<std::string> result = Depfile::tokenize("cat.o: c:/meow/purr");
REQUIRE(result.size() == 2);
@@ -275,9 +259,7 @@ TEST_CASE("Depfile::tokenize")
CHECK(result[1] == "c:/meow/purr");
}
- SUBCASE(
- "Parse depfile with windows formatted filename (with slashes and trailing "
- "colon)")
+ SUBCASE("Windows filename (with slashes and trailing colon)")
{
std::vector<std::string> result = Depfile::tokenize("cat.o: c: /meow/purr");
REQUIRE(result.size() == 3);
@@ -286,7 +268,7 @@ TEST_CASE("Depfile::tokenize")
CHECK(result[2] == "/meow/purr");
}
- SUBCASE("Parse depfile with windows formatted filename (subtest1)")
+ SUBCASE("Windows filename: cat:/meow")
{
std::vector<std::string> result = Depfile::tokenize("cat:/meow");
REQUIRE(result.size() == 2);
@@ -294,7 +276,7 @@ TEST_CASE("Depfile::tokenize")
CHECK(result[1] == "/meow");
}
- SUBCASE("Parse depfile with windows formatted filename (subtest2)")
+ SUBCASE("Windows filename: cat:\\meow")
{
std::vector<std::string> result = Depfile::tokenize("cat:\\meow");
REQUIRE(result.size() == 2);
@@ -302,7 +284,7 @@ TEST_CASE("Depfile::tokenize")
CHECK(result[1] == "\\meow");
}
- SUBCASE("Parse depfile with windows formatted filename (subtest3)")
+ SUBCASE("Windows filename: cat:\\ meow")
{
std::vector<std::string> result = Depfile::tokenize("cat:\\ meow");
REQUIRE(result.size() == 2);
@@ -310,7 +292,7 @@ TEST_CASE("Depfile::tokenize")
CHECK(result[1] == " meow");
}
- SUBCASE("Parse depfile with windows formatted filename (subtest4)")
+ SUBCASE("Windows filename: cat:c:/meow")
{
std::vector<std::string> result = Depfile::tokenize("cat:c:/meow");
REQUIRE(result.size() == 2);
@@ -318,7 +300,7 @@ TEST_CASE("Depfile::tokenize")
CHECK(result[1] == "c:/meow");
}
- SUBCASE("Parse depfile with windows formatted filename (subtest5)")
+ SUBCASE("Windows filename: cat:c:\\meow")
{
std::vector<std::string> result = Depfile::tokenize("cat:c:\\meow");
REQUIRE(result.size() == 2);
@@ -326,7 +308,7 @@ TEST_CASE("Depfile::tokenize")
CHECK(result[1] == "c:\\meow");
}
- SUBCASE("Parse depfile with windows formatted filename (subtest6)")
+ SUBCASE("Windows filename: cat:c:")
{
std::vector<std::string> result = Depfile::tokenize("cat:c:");
REQUIRE(result.size() == 2);
@@ -334,7 +316,7 @@ TEST_CASE("Depfile::tokenize")
CHECK(result[1] == "c:");
}
- SUBCASE("Parse depfile with windows formatted filename (subtest7)")
+ SUBCASE("Windows filename: cat:c:\\")
{
std::vector<std::string> result = Depfile::tokenize("cat:c:\\");
REQUIRE(result.size() == 2);
@@ -342,7 +324,7 @@ TEST_CASE("Depfile::tokenize")
CHECK(result[1] == "c:\\");
}
- SUBCASE("Parse depfile with windows formatted filename (subtest8)")
+ SUBCASE("Windows filename: cat:c:/")
{
std::vector<std::string> result = Depfile::tokenize("cat:c:/");
REQUIRE(result.size() == 2);
@@ -350,7 +332,7 @@ TEST_CASE("Depfile::tokenize")
CHECK(result[1] == "c:/");
}
- SUBCASE("Parse depfile with windows formatted filename (subtest9)")
+ SUBCASE("Windows filename: cat:c:meow")
{
std::vector<std::string> result = Depfile::tokenize("cat:c:meow");
REQUIRE(result.size() == 3);
@@ -359,7 +341,7 @@ TEST_CASE("Depfile::tokenize")
CHECK(result[2] == "meow");
}
- SUBCASE("Parse depfile with windows formatted filename (subtest10)")
+ SUBCASE("Windows filename: c:c:/meow")
{
std::vector<std::string> result = Depfile::tokenize("c:c:/meow");
REQUIRE(result.size() == 2);
@@ -367,7 +349,7 @@ TEST_CASE("Depfile::tokenize")
CHECK(result[1] == "c:/meow");
}
- SUBCASE("Parse depfile with windows formatted filename (subtest11)")
+ SUBCASE("Windows filename: c:c:\\meow")
{
std::vector<std::string> result = Depfile::tokenize("c:c:\\meow");
REQUIRE(result.size() == 2);
@@ -375,7 +357,7 @@ TEST_CASE("Depfile::tokenize")
CHECK(result[1] == "c:\\meow");
}
- SUBCASE("Parse depfile with windows formatted filename (subtest12)")
+ SUBCASE("Windows filename: c:z:\\meow")
{
std::vector<std::string> result = Depfile::tokenize("c:z:\\meow");
REQUIRE(result.size() == 2);
@@ -383,7 +365,7 @@ TEST_CASE("Depfile::tokenize")
CHECK(result[1] == "z:\\meow");
}
- SUBCASE("Parse depfile with windows formatted filename (subtest13)")
+ SUBCASE("Windows filename: c:cd:\\meow")
{
std::vector<std::string> result = Depfile::tokenize("c:cd:\\meow");
REQUIRE(result.size() == 3);