diff options
author | Karl Williamson <khw@cpan.org> | 2015-10-19 12:14:36 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2015-10-19 12:40:24 -0600 |
commit | a7a8bd1ed56dbdb7e63735924945bbb66b7e2e5c (patch) | |
tree | 533c7a08e31e28aa24888065b383635469d30617 /regexec.c | |
parent | 67481c39e5c4241caaadaabb962ba281af64d9aa (diff) | |
download | perl-a7a8bd1ed56dbdb7e63735924945bbb66b7e2e5c.tar.gz |
"" =~ /\b{gcb}/ should fail; same \b{wb}, \b{sb}
The Unicode standard indicates that these breaks should succeed at the
beginning and end of text. It appears to me to be an oversight on their
part to not make an exception when there is no actual text. (Their test
suite does not cover this case.) I blindly implemented their algorithm
for 5.22, but it really is the wrong thing to do.
Diffstat (limited to 'regexec.c')
-rw-r--r-- | regexec.c | 24 |
1 files changed, 17 insertions, 7 deletions
@@ -2065,13 +2065,17 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, FBC_BOUND(isWORDCHAR_L1, isWORDCHAR_uni, isWORDCHAR_utf8); break; case GCB_BOUND: - if (s == reginfo->strbeg) { /* GCB always matches at begin and - end */ + if (s == reginfo->strbeg) { if (reginfo->intuit || regtry(reginfo, &s)) { goto got_it; } + + /* Didn't match. Try at the next position (if there is one) */ s += (utf8_target) ? UTF8SKIP(s) : 1; + if (UNLIKELY(s >= reginfo->strend)) { + break; + } } if (utf8_target) { @@ -2112,13 +2116,14 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, break; case SB_BOUND: - if (s == reginfo->strbeg) { /* SB always matches at beginning */ + if (s == reginfo->strbeg) { if (reginfo->intuit || regtry(reginfo, &s)) { goto got_it; } - - /* Didn't match. Go try at the next position */ s += (utf8_target) ? UTF8SKIP(s) : 1; + if (UNLIKELY(s >= reginfo->strend)) { + break; + } } if (utf8_target) { @@ -2177,6 +2182,9 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, goto got_it; } s += (utf8_target) ? UTF8SKIP(s) : 1; + if (UNLIKELY(s >= reginfo->strend)) { + break; + } } if (utf8_target) { @@ -5610,8 +5618,10 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) case BOUNDU: /* /\b/u */ boundu: - if (utf8_target) { - + if (UNLIKELY(reginfo->strbeg >= reginfo->strend)) { + match = FALSE; + } + else if (utf8_target) { bound_utf8: switch((bound_type) FLAGS(scan)) { case TRADITIONAL_BOUND: |