summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2015-10-19 09:56:27 -0600
committerKarl Williamson <khw@cpan.org>2015-10-19 12:40:24 -0600
commitb4b2ec55cecba4503a6d679455fa72e5d81eda59 (patch)
tree2f95bd0dfe650c2211d87b64048443d71663f249
parentda271c5413343d25ab18d217cdc225c0be1633d9 (diff)
downloadperl-b4b2ec55cecba4503a6d679455fa72e5d81eda59.tar.gz
Fix look-behind bug with \b{wb}
The algorithm for determining a word break requires look-behind in some cases. Certain characters are ignored in the look-behind, but until this commit, the parse pointer was unchanged, causing things to get out of sync in some edge cases.
-rw-r--r--regexec.c20
1 files changed, 17 insertions, 3 deletions
diff --git a/regexec.c b/regexec.c
index 9f4d395fa7..489a1be5ad 100644
--- a/regexec.c
+++ b/regexec.c
@@ -4743,10 +4743,24 @@ S_backup_one_WB(pTHX_ WB_enum * previous, const U8 * const strbeg, U8 ** curpos,
* to look it up */
if (*previous != WB_UNKNOWN) {
wb = *previous;
- *previous = WB_UNKNOWN;
- /* XXX Note that doesn't change curpos, and maybe should */
- /* But we always back up over these two types */
+ /* But we need to move backwards by one */
+ if (utf8_target) {
+ *curpos = reghopmaybe3(*curpos, -1, strbeg);
+ if (! *curpos) {
+ *previous = WB_EDGE;
+ *curpos = (U8 *) strbeg;
+ }
+ else {
+ *previous = WB_UNKNOWN;
+ }
+ }
+ else {
+ (*curpos)--;
+ *previous = (*curpos <= strbeg) ? WB_EDGE : WB_UNKNOWN;
+ }
+
+ /* And we always back up over these two types */
if (wb != WB_Extend && wb != WB_Format) {
return wb;
}