summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2015-10-19 12:14:36 -0600
committerKarl Williamson <khw@cpan.org>2015-10-19 12:40:24 -0600
commita7a8bd1ed56dbdb7e63735924945bbb66b7e2e5c (patch)
tree533c7a08e31e28aa24888065b383635469d30617 /regexec.c
parent67481c39e5c4241caaadaabb962ba281af64d9aa (diff)
downloadperl-a7a8bd1ed56dbdb7e63735924945bbb66b7e2e5c.tar.gz
"" =~ /\b{gcb}/ should fail; same \b{wb}, \b{sb}
The Unicode standard indicates that these breaks should succeed at the beginning and end of text. It appears to me to be an oversight on their part to not make an exception when there is no actual text. (Their test suite does not cover this case.) I blindly implemented their algorithm for 5.22, but it really is the wrong thing to do.
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c24
1 files changed, 17 insertions, 7 deletions
diff --git a/regexec.c b/regexec.c
index 2af7653c60..85c31a69ba 100644
--- a/regexec.c
+++ b/regexec.c
@@ -2065,13 +2065,17 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
FBC_BOUND(isWORDCHAR_L1, isWORDCHAR_uni, isWORDCHAR_utf8);
break;
case GCB_BOUND:
- if (s == reginfo->strbeg) { /* GCB always matches at begin and
- end */
+ if (s == reginfo->strbeg) {
if (reginfo->intuit || regtry(reginfo, &s))
{
goto got_it;
}
+
+ /* Didn't match. Try at the next position (if there is one) */
s += (utf8_target) ? UTF8SKIP(s) : 1;
+ if (UNLIKELY(s >= reginfo->strend)) {
+ break;
+ }
}
if (utf8_target) {
@@ -2112,13 +2116,14 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
break;
case SB_BOUND:
- if (s == reginfo->strbeg) { /* SB always matches at beginning */
+ if (s == reginfo->strbeg) {
if (reginfo->intuit || regtry(reginfo, &s)) {
goto got_it;
}
-
- /* Didn't match. Go try at the next position */
s += (utf8_target) ? UTF8SKIP(s) : 1;
+ if (UNLIKELY(s >= reginfo->strend)) {
+ break;
+ }
}
if (utf8_target) {
@@ -2177,6 +2182,9 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
goto got_it;
}
s += (utf8_target) ? UTF8SKIP(s) : 1;
+ if (UNLIKELY(s >= reginfo->strend)) {
+ break;
+ }
}
if (utf8_target) {
@@ -5610,8 +5618,10 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
case BOUNDU: /* /\b/u */
boundu:
- if (utf8_target) {
-
+ if (UNLIKELY(reginfo->strbeg >= reginfo->strend)) {
+ match = FALSE;
+ }
+ else if (utf8_target) {
bound_utf8:
switch((bound_type) FLAGS(scan)) {
case TRADITIONAL_BOUND: