diff options
author | Jan Niklas Hasse <jhasse@bixense.com> | 2019-06-18 11:15:46 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-06-18 11:15:46 +0200 |
commit | e6aeab857894226475489f55bba4ec6e01672a41 (patch) | |
tree | f90b0347e6019f5b9adf06c7e9136e073b88df8b | |
parent | 28a7d1491367de7b39c854d166114f76d272f04f (diff) | |
parent | fba5ce07367ce63ade61a560feed36ea9d315b0f (diff) | |
download | ninja-e6aeab857894226475489f55bba4ec6e01672a41.tar.gz |
Merge pull request #1342 from Lekensteyn/depfile-fixes
Follow GCC/Clang behavior wrt depfiles
-rw-r--r-- | src/depfile_parser.cc | 127 | ||||
-rw-r--r-- | src/depfile_parser.in.cc | 49 | ||||
-rw-r--r-- | src/depfile_parser_test.cc | 27 |
3 files changed, 157 insertions, 46 deletions
diff --git a/src/depfile_parser.cc b/src/depfile_parser.cc index 405289f..6faeac6 100644 --- a/src/depfile_parser.cc +++ b/src/depfile_parser.cc @@ -30,9 +30,15 @@ DepfileParser::DepfileParser(DepfileParserOptions options) // How do you end a line with a backslash? The netbsd Make docs suggest // reading the result of a shell command echoing a backslash! // -// Rather than implement all of above, we do a simpler thing here: -// Backslashes escape a set of characters (see "escapes" defined below), -// otherwise they are passed through verbatim. +// Rather than implement all of above, we follow what GCC/Clang produces: +// Backslashes escape a space or hash sign. +// When a space is preceded by 2N+1 backslashes, it is represents N backslashes +// followed by space. +// When a space is preceded by 2N backslashes, it represents 2N backslashes at +// the end of a filename. +// A hash sign is escaped by a single backslash. All other backslashes remain +// unchanged. +// // If anyone actually has depfiles that rely on the more complicated // behavior we can adjust this. bool DepfileParser::Parse(string* content, string* err) { @@ -72,7 +78,7 @@ bool DepfileParser::Parse(string* content, string* err) { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 0, 0, 0, 0, 128, + 128, 128, 128, 128, 0, 128, 0, 128, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, @@ -111,7 +117,8 @@ bool DepfileParser::Parse(string* content, string* err) { if (yych <= '#') goto yy4; goto yy12; } else { - if (yych == '\\') goto yy13; + if (yych <= '?') goto yy4; + if (yych <= '\\') goto yy13; goto yy4; } } @@ -143,6 +150,7 @@ yy9: if (yybm[0+yych] & 128) { goto yy9; } +yy11: { // Got a span of plain text. int len = (int)(in - start); @@ -158,24 +166,22 @@ yy12: goto yy5; yy13: yych = *(yymarker = ++in); - if (yych <= '"') { - if (yych <= '\f') { + if (yych <= 0x1F) { + if (yych <= '\n') { if (yych <= 0x00) goto yy5; - if (yych == '\n') goto yy18; - goto yy16; + if (yych <= '\t') goto yy16; + goto yy17; } else { - if (yych <= '\r') goto yy20; - if (yych == ' ') goto yy22; + if (yych == '\r') goto yy19; goto yy16; } } else { - if (yych <= 'Z') { - if (yych <= '#') goto yy22; - if (yych == '*') goto yy22; - goto yy16; + if (yych <= '#') { + if (yych <= ' ') goto yy21; + if (yych <= '"') goto yy16; + goto yy23; } else { - if (yych <= ']') goto yy22; - if (yych == '|') goto yy22; + if (yych == '\\') goto yy25; goto yy16; } } @@ -188,30 +194,93 @@ yy14: } yy16: ++in; - { - // Let backslash before other characters through verbatim. - *out++ = '\\'; - *out++ = yych; - continue; - } -yy18: + goto yy11; +yy17: ++in; { // A line continuation ends the current file name. break; } -yy20: +yy19: yych = *++in; - if (yych == '\n') goto yy18; + if (yych == '\n') goto yy17; in = yymarker; goto yy5; -yy22: +yy21: ++in; { - // De-escape backslashed character. - *out++ = yych; + // 2N+1 backslashes plus space -> N backslashes plus space. + int len = (int)(in - start); + int n = len / 2 - 1; + if (out < start) + memset(out, '\\', n); + out += n; + *out++ = ' '; continue; } +yy23: + ++in; + { + // De-escape hash sign, but preserve other leading backslashes. + int len = (int)(in - start); + if (len > 2 && out < start) + memset(out, '\\', len - 2); + out += len - 2; + *out++ = '#'; + continue; + } +yy25: + yych = *++in; + if (yych <= 0x1F) { + if (yych <= '\n') { + if (yych <= 0x00) goto yy11; + if (yych <= '\t') goto yy16; + goto yy11; + } else { + if (yych == '\r') goto yy11; + goto yy16; + } + } else { + if (yych <= '#') { + if (yych <= ' ') goto yy26; + if (yych <= '"') goto yy16; + goto yy23; + } else { + if (yych == '\\') goto yy28; + goto yy16; + } + } +yy26: + ++in; + { + // 2N backslashes plus space -> 2N backslashes, end of filename. + int len = (int)(in - start); + if (out < start) + memset(out, '\\', len - 1); + out += len - 1; + break; + } +yy28: + yych = *++in; + if (yych <= 0x1F) { + if (yych <= '\n') { + if (yych <= 0x00) goto yy11; + if (yych <= '\t') goto yy16; + goto yy11; + } else { + if (yych == '\r') goto yy11; + goto yy16; + } + } else { + if (yych <= '#') { + if (yych <= ' ') goto yy21; + if (yych <= '"') goto yy16; + goto yy23; + } else { + if (yych == '\\') goto yy25; + goto yy16; + } + } } } diff --git a/src/depfile_parser.in.cc b/src/depfile_parser.in.cc index f8c94b3..735a0c3 100644 --- a/src/depfile_parser.in.cc +++ b/src/depfile_parser.in.cc @@ -29,9 +29,15 @@ DepfileParser::DepfileParser(DepfileParserOptions options) // How do you end a line with a backslash? The netbsd Make docs suggest // reading the result of a shell command echoing a backslash! // -// Rather than implement all of above, we do a simpler thing here: -// Backslashes escape a set of characters (see "escapes" defined below), -// otherwise they are passed through verbatim. +// Rather than implement all of above, we follow what GCC/Clang produces: +// Backslashes escape a space or hash sign. +// When a space is preceded by 2N+1 backslashes, it is represents N backslashes +// followed by space. +// When a space is preceded by 2N backslashes, it represents 2N backslashes at +// the end of a filename. +// A hash sign is escaped by a single backslash. All other backslashes remain +// unchanged. +// // If anyone actually has depfiles that rely on the more complicated // behavior we can adjust this. bool DepfileParser::Parse(string* content, string* err) { @@ -68,12 +74,33 @@ bool DepfileParser::Parse(string* content, string* err) { re2c:indent:string = " "; nul = "\000"; - escape = [ \\#*[|\]]; newline = '\r'?'\n'; - '\\' escape { - // De-escape backslashed character. - *out++ = yych; + '\\\\'* '\\ ' { + // 2N+1 backslashes plus space -> N backslashes plus space. + int len = (int)(in - start); + int n = len / 2 - 1; + if (out < start) + memset(out, '\\', n); + out += n; + *out++ = ' '; + continue; + } + '\\\\'+ ' ' { + // 2N backslashes plus space -> 2N backslashes, end of filename. + int len = (int)(in - start); + if (out < start) + memset(out, '\\', len - 1); + out += len - 1; + break; + } + '\\'+ '#' { + // De-escape hash sign, but preserve other leading backslashes. + int len = (int)(in - start); + if (len > 2 && out < start) + memset(out, '\\', len - 2); + out += len - 2; + *out++ = '#'; continue; } '$$' { @@ -81,13 +108,7 @@ bool DepfileParser::Parse(string* content, string* err) { *out++ = '$'; continue; } - '\\' [^\000\r\n] { - // Let backslash before other characters through verbatim. - *out++ = '\\'; - *out++ = yych; - continue; - } - [a-zA-Z0-9+,/_:.~()}{%@=!\x80-\xFF-]+ { + '\\'+ [^\000\r\n] | [a-zA-Z0-9+,/_:.~()}{%=@\x5B\x5D!\x80-\xFF-]+ { // Got a span of plain text. int len = (int)(in - start); // Need to shift it over if we're overwriting backslashes. diff --git a/src/depfile_parser_test.cc b/src/depfile_parser_test.cc index 52fe7cd..19224f3 100644 --- a/src/depfile_parser_test.cc +++ b/src/depfile_parser_test.cc @@ -101,15 +101,36 @@ TEST_F(DepfileParserTest, Spaces) { parser_.ins_[2].AsString()); } +TEST_F(DepfileParserTest, MultipleBackslashes) { + // Successive 2N+1 backslashes followed by space (' ') are replaced by N >= 0 + // backslashes and the space. A single backslash before hash sign is removed. + // Other backslashes remain untouched (including 2N backslashes followed by + // space). + string err; + EXPECT_TRUE(Parse( +"a\\ b\\#c.h: \\\\\\\\\\ \\\\\\\\ \\\\share\\info\\\\#1", + &err)); + ASSERT_EQ("", err); + EXPECT_EQ("a b#c.h", + parser_.out_.AsString()); + ASSERT_EQ(3u, parser_.ins_.size()); + EXPECT_EQ("\\\\ ", + parser_.ins_[0].AsString()); + EXPECT_EQ("\\\\\\\\", + parser_.ins_[1].AsString()); + EXPECT_EQ("\\\\share\\info\\#1", + parser_.ins_[2].AsString()); +} + TEST_F(DepfileParserTest, Escapes) { // Put backslashes before a variety of characters, see which ones make // it through. string err; EXPECT_TRUE(Parse( -"\\!\\@\\#$$\\%\\^\\&\\\\:", +"\\!\\@\\#$$\\%\\^\\&\\[\\]\\\\:", &err)); ASSERT_EQ("", err); - EXPECT_EQ("\\!\\@#$\\%\\^\\&\\", + EXPECT_EQ("\\!\\@#$\\%\\^\\&\\[\\]\\\\", parser_.out_.AsString()); ASSERT_EQ(0u, parser_.ins_.size()); } @@ -123,7 +144,7 @@ TEST_F(DepfileParserTest, SpecialChars) { " en@quot.header~ t+t-x!=1 \\\n" " openldap/slapd.d/cn=config/cn=schema/cn={0}core.ldif\\\n" " Fu\303\244ball\\\n" -" a\\[1\\]b@2%c", +" a[1]b@2%c", &err)); ASSERT_EQ("", err); EXPECT_EQ("C:/Program Files (x86)/Microsoft crtdefs.h", |