From ef49ade6a6462bb4a9f673b20b28d79da9cea129 Mon Sep 17 00:00:00 2001 From: ph10 Date: Fri, 4 Jan 2008 19:44:00 +0000 Subject: Craig's patch for the double-counting bug in global replace. git-svn-id: svn://vcs.exim.org/pcre/code/trunk@297 2f5784b3-3f2a-0410-8824-cb99058d5e15 --- ChangeLog | 6 ++++-- pcrecpp.cc | 4 ++-- pcrecpp_unittest.cc | 55 +++++++++++++++++++++++++++++++++++------------------ 3 files changed, 42 insertions(+), 23 deletions(-) diff --git a/ChangeLog b/ChangeLog index 6f516c7..50a0d2a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,7 @@ ChangeLog for PCRE ------------------ -Version 7.5 31-Dec-07 +Version 7.5 04-Jan-08 --------------------- 1. Applied a patch from Craig: "This patch makes it possible to 'ignore' @@ -99,7 +99,9 @@ Version 7.5 31-Dec-07 20. In pcrecpp.cc, the variable 'count' was incremented twice in RE::GlobalReplace(). As a result, the number of replacements returned was - double what it should be. I have removed one of the increments. + double what it should be. I removed one of the increments, but Craig sent a + later patch that removed the other one (the right fix) and added unit tests + that check the return values (which was not done before). 21. Several CMake things: diff --git a/pcrecpp.cc b/pcrecpp.cc index 498e0b3..a318d19 100644 --- a/pcrecpp.cc +++ b/pcrecpp.cc @@ -369,7 +369,7 @@ int RE::GlobalReplace(const StringPiece& rewrite, int start = 0; int lastend = -1; - for (; start <= static_cast(str->length()); count++) { + while (start <= static_cast(str->length())) { int matches = TryMatch(*str, start, UNANCHORED, vec, kVecSize); if (matches <= 0) break; @@ -409,7 +409,7 @@ int RE::GlobalReplace(const StringPiece& rewrite, Rewrite(&out, rewrite, *str, vec, matches); start = matchend; lastend = matchend; - // count++; // Removed by PH 19-Dec-2007: duplicate count increment + count++; } } diff --git a/pcrecpp_unittest.cc b/pcrecpp_unittest.cc index 90351bf..4f49cb2 100644 --- a/pcrecpp_unittest.cc +++ b/pcrecpp_unittest.cc @@ -213,87 +213,103 @@ static void TestReplace() { const char *original; const char *single; const char *global; + int global_count; // the expected return value from ReplaceAll }; static const ReplaceTest tests[] = { { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)", "\\2\\1ay", "the quick brown fox jumps over the lazy dogs.", "ethay quick brown fox jumps over the lazy dogs.", - "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday." }, + "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.", + 9 }, { "\\w+", "\\0-NOSPAM", "paul.haahr@google.com", "paul-NOSPAM.haahr@google.com", - "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM" }, + "paul-NOSPAM.haahr-NOSPAM@google-NOSPAM.com-NOSPAM", + 4 }, { "^", "(START)", "foo", "(START)foo", - "(START)foo" }, + "(START)foo", + 1 }, { "^", "(START)", "", "(START)", - "(START)" }, + "(START)", + 1 }, { "$", "(END)", "", "(END)", - "(END)" }, + "(END)", + 1 }, { "b", "bb", "ababababab", "abbabababab", - "abbabbabbabbabb" }, + "abbabbabbabbabb", + 5 }, { "b", "bb", "bbbbbb", "bbbbbbb", - "bbbbbbbbbbbb" }, + "bbbbbbbbbbbb", + 6 }, { "b+", "bb", "bbbbbb", "bb", - "bb" }, + "bb", + 1 }, { "b*", "bb", "bbbbbb", "bb", - "bb" }, + "bb", + 1 }, { "b*", "bb", "aaaaa", "bbaaaaa", - "bbabbabbabbabbabb" }, + "bbabbabbabbabbabb", + 6 }, { "b*", "bb", "aa\naa\n", "bbaa\naa\n", - "bbabbabb\nbbabbabb\nbb" }, + "bbabbabb\nbbabbabb\nbb", + 7 }, { "b*", "bb", "aa\raa\r", "bbaa\raa\r", - "bbabbabb\rbbabbabb\rbb" }, + "bbabbabb\rbbabbabb\rbb", + 7 }, { "b*", "bb", "aa\r\naa\r\n", "bbaa\r\naa\r\n", - "bbabbabb\r\nbbabbabb\r\nbb" }, + "bbabbabb\r\nbbabbabb\r\nbb", + 7 }, #ifdef SUPPORT_UTF8 { "b*", "bb", "\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", // utf8 "bb\xE3\x83\x9B\xE3\x83\xBC\xE3\x83\xA0\xE3\x81\xB8", - "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb" }, + "bb\xE3\x83\x9B""bb""\xE3\x83\xBC""bb""\xE3\x83\xA0""bb""\xE3\x81\xB8""bb", + 5 }, { "b*", "bb", "\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", // utf8 "bb\xE3\x83\x9B\r\n\xE3\x83\xBC\r\xE3\x83\xA0\n\xE3\x81\xB8\r\n", ("bb\xE3\x83\x9B""bb\r\nbb""\xE3\x83\xBC""bb\rbb""\xE3\x83\xA0" - "bb\nbb""\xE3\x81\xB8""bb\r\nbb") }, + "bb\nbb""\xE3\x81\xB8""bb\r\nbb"), + 9 }, #endif - { "", NULL, NULL, NULL, NULL } + { "", NULL, NULL, NULL, NULL, 0 } }; #ifdef SUPPORT_UTF8 @@ -309,8 +325,9 @@ static void TestReplace() { CHECK(re.Replace(t->rewrite, &one)); CHECK_EQ(one, t->single); string all(t->original); - CHECK(re.GlobalReplace(t->rewrite, &all) > 0); + const int replace_count = re.GlobalReplace(t->rewrite, &all); CHECK_EQ(all, t->global); + CHECK_EQ(replace_count, t->global_count); } // One final test: test \r\n replacement when we're not in CRLF mode @@ -318,14 +335,14 @@ static void TestReplace() { RE re("b*", RE_Options(PCRE_NEWLINE_CR).set_utf8(support_utf8)); assert(re.error().empty()); string all("aa\r\naa\r\n"); - CHECK(re.GlobalReplace("bb", &all) > 0); + CHECK_EQ(re.GlobalReplace("bb", &all), 9); CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb")); } { RE re("b*", RE_Options(PCRE_NEWLINE_LF).set_utf8(support_utf8)); assert(re.error().empty()); string all("aa\r\naa\r\n"); - CHECK(re.GlobalReplace("bb", &all) > 0); + CHECK_EQ(re.GlobalReplace("bb", &all), 9); CHECK_EQ(all, string("bbabbabb\rbb\nbbabbabb\rbb\nbb")); } // TODO: test what happens when no PCRE_NEWLINE_* flag is set. -- cgit v1.2.1