summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2015-10-19 12:14:36 -0600
committerSteve Hay <steve.m.hay@googlemail.com>2015-10-30 21:15:52 +0000
commit23dba96959ea693ba5c974f57c08db2d1d66bd4b (patch)
treed80e05e7591ccc5ef900fd747f0a3a87f54e0603
parent9e053d5265641a359970d9a120a0247e29e78337 (diff)
downloadperl-23dba96959ea693ba5c974f57c08db2d1d66bd4b.tar.gz
"" =~ /\b{gcb}/ should fail; same \b{wb}, \b{sb}
The Unicode standard indicates that these breaks should succeed at the beginning and end of text. It appears to me to be an oversight on their part to not make an exception when there is no actual text. (Their test suite does not cover this case.) I blindly implemented their algorithm for 5.22, but it really is the wrong thing to do. (cherry picked from commit a7a8bd1ed56dbdb7e63735924945bbb66b7e2e5c)
-rw-r--r--regexec.c24
1 files changed, 17 insertions, 7 deletions
diff --git a/regexec.c b/regexec.c
index e7dd6046dc..e38c6ca6f1 100644
--- a/regexec.c
+++ b/regexec.c
@@ -2073,13 +2073,17 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
FBC_BOUND(isWORDCHAR_L1, isWORDCHAR_uni, isWORDCHAR_utf8);
break;
case GCB_BOUND:
- if (s == reginfo->strbeg) { /* GCB always matches at begin and
- end */
+ if (s == reginfo->strbeg) {
if (reginfo->intuit || regtry(reginfo, &s))
{
goto got_it;
}
+
+ /* Didn't match. Try at the next position (if there is one) */
s += (utf8_target) ? UTF8SKIP(s) : 1;
+ if (UNLIKELY(s >= reginfo->strend)) {
+ break;
+ }
}
if (utf8_target) {
@@ -2118,13 +2122,14 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
break;
case SB_BOUND:
- if (s == reginfo->strbeg) { /* SB always matches at beginning */
+ if (s == reginfo->strbeg) {
if (reginfo->intuit || regtry(reginfo, &s)) {
goto got_it;
}
-
- /* Didn't match. Go try at the next position */
s += (utf8_target) ? UTF8SKIP(s) : 1;
+ if (UNLIKELY(s >= reginfo->strend)) {
+ break;
+ }
}
if (utf8_target) {
@@ -2183,6 +2188,9 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
goto got_it;
}
s += (utf8_target) ? UTF8SKIP(s) : 1;
+ if (UNLIKELY(s >= reginfo->strend)) {
+ break;
+ }
}
if (utf8_target) {
@@ -5617,8 +5625,10 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
case BOUNDU: /* /\b/u */
boundu:
- if (utf8_target) {
-
+ if (UNLIKELY(reginfo->strbeg >= reginfo->strend)) {
+ match = FALSE;
+ }
+ else if (utf8_target) {
bound_utf8:
switch((bound_type) FLAGS(scan)) {
case TRADITIONAL_BOUND: