summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2015-10-19 09:56:27 -0600
committerSteve Hay <steve.m.hay@googlemail.com>2015-10-30 21:04:25 +0000
commit356a09776e833f02957b5372cd17d46307a9efed (patch)
treebe69ec6e1e3d9c8cd70d24f7c1903e7615dc534a
parent4866d497e88693df3d50c8701d7cc6ec23fc0006 (diff)
downloadperl-356a09776e833f02957b5372cd17d46307a9efed.tar.gz
Fix look-behind bug with \b{wb}
The algorithm for determining a word break requires look-behind in some cases. Certain characters are ignored in the look-behind, but until this commit, the parse pointer was unchanged, causing things to get out of sync in some edge cases. (cherry picked from commit b4b2ec55cecba4503a6d679455fa72e5d81eda59)
-rw-r--r--regexec.c20
1 files changed, 17 insertions, 3 deletions
diff --git a/regexec.c b/regexec.c
index e526486f2c..230d6210ea 100644
--- a/regexec.c
+++ b/regexec.c
@@ -4750,10 +4750,24 @@ S_backup_one_WB(pTHX_ WB_enum * previous, const U8 * const strbeg, U8 ** curpos,
* to look it up */
if (*previous != WB_UNKNOWN) {
wb = *previous;
- *previous = WB_UNKNOWN;
- /* XXX Note that doesn't change curpos, and maybe should */
- /* But we always back up over these two types */
+ /* But we need to move backwards by one */
+ if (utf8_target) {
+ *curpos = reghopmaybe3(*curpos, -1, strbeg);
+ if (! *curpos) {
+ *previous = WB_EDGE;
+ *curpos = (U8 *) strbeg;
+ }
+ else {
+ *previous = WB_UNKNOWN;
+ }
+ }
+ else {
+ (*curpos)--;
+ *previous = (*curpos <= strbeg) ? WB_EDGE : WB_UNKNOWN;
+ }
+
+ /* And we always back up over these two types */
if (wb != WB_Extend && wb != WB_Format) {
return wb;
}