"" =~ /\b{gcb}/ should fail; same \b{wb}, \b{sb}

The Unicode standard indicates that these breaks should succeed at the beginning and end of text. It appears to me to be an oversight on their part to not make an exception when there is no actual text. (Their test suite does not cover this case.) I blindly implemented their algorithm for 5.22, but it really is the wrong thing to do. (cherry picked from commit a7a8bd1ed56dbdb7e63735924945bbb66b7e2e5c)
author: Karl Williamson <khw@cpan.org> 2015-10-19 12:14:36 -0600
committer: Steve Hay <steve.m.hay@googlemail.com> 2015-10-30 21:15:52 +0000
commit: 23dba96959ea693ba5c974f57c08db2d1d66bd4b (patch)
tree: d80e05e7591ccc5ef900fd747f0a3a87f54e0603
parent: 9e053d5265641a359970d9a120a0247e29e78337 (diff)
download: perl-23dba96959ea693ba5c974f57c08db2d1d66bd4b.tar.gz
1 files changed, 17 insertions, 7 deletions
diff --git a/regexec.c b/regexec.c
index e7dd6046dc..e38c6ca6f1 100644
--- a/regexec.c
+++ b/regexec.c
@@ -2073,13 +2073,17 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                 FBC_BOUND(isWORDCHAR_L1, isWORDCHAR_uni, isWORDCHAR_utf8);
                 break;
             case GCB_BOUND:
-                if (s == reginfo->strbeg) { /* GCB always matches at begin and
-                                               end */
+                if (s == reginfo->strbeg) {
                     if (reginfo->intuit || regtry(reginfo, &s))
                     {
                         goto got_it;
                     }
+
+                    /* Didn't match.  Try at the next position (if there is one) */
                     s += (utf8_target) ? UTF8SKIP(s) : 1;
+                    if (UNLIKELY(s >= reginfo->strend)) {
+                        break;
+                    }
                 }
 
                 if (utf8_target) {
@@ -2118,13 +2122,14 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                 break;
 
             case SB_BOUND:
-                if (s == reginfo->strbeg) { /* SB always matches at beginning */
+                if (s == reginfo->strbeg) {
                     if (reginfo->intuit || regtry(reginfo, &s)) {
                         goto got_it;
                     }
-
-                    /* Didn't match.  Go try at the next position */
                     s += (utf8_target) ? UTF8SKIP(s) : 1;
+                    if (UNLIKELY(s >= reginfo->strend)) {
+                        break;
+                    }
                 }
 
                 if (utf8_target) {
@@ -2183,6 +2188,9 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
                         goto got_it;
                     }
                     s += (utf8_target) ? UTF8SKIP(s) : 1;
+                    if (UNLIKELY(s >= reginfo->strend)) {
+                        break;
+                    }
                 }
 
                 if (utf8_target) {
@@ -5617,8 +5625,10 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
 	case BOUNDU:  /*  /\b/u  */
 
           boundu:
-	    if (utf8_target) {
-
+            if (UNLIKELY(reginfo->strbeg >= reginfo->strend)) {
+                match = FALSE;
+            }
+            else if (utf8_target) {
               bound_utf8:
                 switch((bound_type) FLAGS(scan)) {
                     case TRADITIONAL_BOUND:
author	Karl Williamson <khw@cpan.org>	2015-10-19 12:14:36 -0600
committer	Steve Hay <steve.m.hay@googlemail.com>	2015-10-30 21:15:52 +0000
commit	23dba96959ea693ba5c974f57c08db2d1d66bd4b (patch)
tree	d80e05e7591ccc5ef900fd747f0a3a87f54e0603
parent	9e053d5265641a359970d9a120a0247e29e78337 (diff)
download	perl-23dba96959ea693ba5c974f57c08db2d1d66bd4b.tar.gz