summaryrefslogtreecommitdiff
path: root/pcrecpp.cc
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2010-01-02 16:30:46 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2010-01-02 16:30:46 +0000
commit24654bb77f039f6e0ef4abd80c48bd49fe771557 (patch)
treeabb93fa4cc7f71a340edb5b9765778856f952c89 /pcrecpp.cc
parent1d5dad0b7e2087e3667cbb402e74636970cea259 (diff)
downloadpcre-24654bb77f039f6e0ef4abd80c48bd49fe771557.tar.gz
Fix C++ wrapper GlobalReplace function for empty matches.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@474 2f5784b3-3f2a-0410-8824-cb99058d5e15
Diffstat (limited to 'pcrecpp.cc')
-rw-r--r--pcrecpp.cc107
1 files changed, 65 insertions, 42 deletions
diff --git a/pcrecpp.cc b/pcrecpp.cc
index e8e8a12..9b4ac17 100644
--- a/pcrecpp.cc
+++ b/pcrecpp.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2005, Google Inc.
+// Copyright (c) 2010, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
@@ -331,7 +331,7 @@ bool RE::FindAndConsume(StringPiece* input,
bool RE::Replace(const StringPiece& rewrite,
string *str) const {
int vec[kVecSize];
- int matches = TryMatch(*str, 0, UNANCHORED, vec, kVecSize);
+ int matches = TryMatch(*str, 0, UNANCHORED, true, vec, kVecSize);
if (matches == 0)
return false;
@@ -384,49 +384,64 @@ int RE::GlobalReplace(const StringPiece& rewrite,
string out;
int start = 0;
int lastend = -1;
+ bool last_match_was_empty_string = false;
while (start <= static_cast<int>(str->length())) {
- int matches = TryMatch(*str, start, UNANCHORED, vec, kVecSize);
- if (matches <= 0)
- break;
- int matchstart = vec[0], matchend = vec[1];
- assert(matchstart >= start);
- assert(matchend >= matchstart);
- if (matchstart == matchend && matchstart == lastend) {
- // advance one character if we matched an empty string at the same
- // place as the last match occurred
- matchend = start + 1;
- // If the current char is CR and we're in CRLF mode, skip LF too.
- // Note it's better to call pcre_fullinfo() than to examine
- // all_options(), since options_ could have changed bewteen
- // compile-time and now, but this is simpler and safe enough.
- // Modified by PH to add ANY and ANYCRLF.
- if (start+1 < static_cast<int>(str->length()) &&
- (*str)[start] == '\r' && (*str)[start+1] == '\n' &&
- (NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF ||
- NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANY ||
- NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANYCRLF)
- ) {
- matchend++;
- }
- // We also need to advance more than one char if we're in utf8 mode.
-#ifdef SUPPORT_UTF8
- if (options_.utf8()) {
- while (matchend < static_cast<int>(str->length()) &&
- ((*str)[matchend] & 0xc0) == 0x80)
+ // If the previous match was for the empty string, we shouldn't
+ // just match again: we'll match in the same way and get an
+ // infinite loop. Instead, we do the match in a special way:
+ // anchored -- to force another try at the same position --
+ // and with a flag saying that this time, ignore empty matches.
+ // If this special match returns, that means there's a non-empty
+ // match at this position as well, and we can continue. If not,
+ // we do what perl does, and just advance by one.
+ // Notice that perl prints '@@@' for this;
+ // perl -le '$_ = "aa"; s/b*|aa/@/g; print'
+ int matches;
+ if (last_match_was_empty_string) {
+ matches = TryMatch(*str, start, ANCHOR_START, false, vec, kVecSize);
+ if (matches <= 0) {
+ int matchend = start + 1; // advance one character.
+ // If the current char is CR and we're in CRLF mode, skip LF too.
+ // Note it's better to call pcre_fullinfo() than to examine
+ // all_options(), since options_ could have changed bewteen
+ // compile-time and now, but this is simpler and safe enough.
+ // Modified by PH to add ANY and ANYCRLF.
+ if (matchend < static_cast<int>(str->length()) &&
+ (*str)[start] == '\r' && (*str)[matchend] == '\n' &&
+ (NewlineMode(options_.all_options()) == PCRE_NEWLINE_CRLF ||
+ NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANY ||
+ NewlineMode(options_.all_options()) == PCRE_NEWLINE_ANYCRLF)) {
matchend++;
- }
+ }
+ // We also need to advance more than one char if we're in utf8 mode.
+#ifdef SUPPORT_UTF8
+ if (options_.utf8()) {
+ while (matchend < static_cast<int>(str->length()) &&
+ ((*str)[matchend] & 0xc0) == 0x80)
+ matchend++;
+ }
#endif
- if (matchend <= static_cast<int>(str->length()))
- out.append(*str, start, matchend - start);
- start = matchend;
+ if (start < static_cast<int>(str->length()))
+ out.append(*str, start, matchend - start);
+ start = matchend;
+ last_match_was_empty_string = false;
+ continue;
+ }
} else {
- out.append(*str, start, matchstart - start);
- Rewrite(&out, rewrite, *str, vec, matches);
- start = matchend;
- lastend = matchend;
- count++;
+ matches = TryMatch(*str, start, UNANCHORED, true, vec, kVecSize);
+ if (matches <= 0)
+ break;
}
+ int matchstart = vec[0], matchend = vec[1];
+ assert(matchstart >= start);
+ assert(matchend >= matchstart);
+ out.append(*str, start, matchstart - start);
+ Rewrite(&out, rewrite, *str, vec, matches);
+ start = matchend;
+ lastend = matchend;
+ count++;
+ last_match_was_empty_string = (matchstart == matchend);
}
if (count == 0)
@@ -442,7 +457,7 @@ bool RE::Extract(const StringPiece& rewrite,
const StringPiece& text,
string *out) const {
int vec[kVecSize];
- int matches = TryMatch(text, 0, UNANCHORED, vec, kVecSize);
+ int matches = TryMatch(text, 0, UNANCHORED, true, vec, kVecSize);
if (matches == 0)
return false;
out->erase();
@@ -488,6 +503,7 @@ bool RE::Extract(const StringPiece& rewrite,
int RE::TryMatch(const StringPiece& text,
int startpos,
Anchor anchor,
+ bool empty_ok,
int *vec,
int vecsize) const {
pcre* re = (anchor == ANCHOR_BOTH) ? re_full_ : re_partial_;
@@ -505,12 +521,19 @@ int RE::TryMatch(const StringPiece& text,
extra.flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
extra.match_limit_recursion = options_.match_limit_recursion();
}
+
+ int options = 0;
+ if (anchor != UNANCHORED)
+ options |= PCRE_ANCHORED;
+ if (!empty_ok)
+ options |= PCRE_NOTEMPTY;
+
int rc = pcre_exec(re, // The regular expression object
&extra,
(text.data() == NULL) ? "" : text.data(),
text.size(),
startpos,
- (anchor == UNANCHORED) ? 0 : PCRE_ANCHORED,
+ options,
vec,
vecsize);
@@ -540,7 +563,7 @@ bool RE::DoMatchImpl(const StringPiece& text,
int* vec,
int vecsize) const {
assert((1 + n) * 3 <= vecsize); // results + PCRE workspace
- int matches = TryMatch(text, 0, anchor, vec, vecsize);
+ int matches = TryMatch(text, 0, anchor, true, vec, vecsize);
assert(matches >= 0); // TryMatch never returns negatives
if (matches == 0)
return false;