diff options
-rw-r--r-- | embed.fnc | 4 | ||||
-rw-r--r-- | embed.h | 2 | ||||
-rw-r--r-- | proto.h | 2 | ||||
-rw-r--r-- | regexec.c | 43 |
4 files changed, 13 insertions, 38 deletions
@@ -2006,8 +2006,8 @@ Es |U8 |regtail_study |NN struct RExC_state_t *pRExC_state \ ERs |I32 |regmatch |NN regmatch_info *reginfo|NN char *startpos|NN regnode *prog ERs |I32 |regrepeat |NN const regexp *prog|NN char **startposp|NN const regnode *p|I32 max|int depth ERs |I32 |regtry |NN regmatch_info *reginfo|NN char **startposp -ERs |bool |reginclass |NULLOK const regexp * const prog|NN const regnode * const n|NN const U8 * const p|NULLOK STRLEN *lenp\ - |bool const do_utf8sv_is_utf8 +ERs |bool |reginclass |NULLOK const regexp * const prog|NN const regnode * const n|NN const U8 * const p\ + |bool const utf8_target Es |CHECKPOINT|regcppush |NN const regexp *rex|I32 parenfloor Es |void |regcppop |NN regexp *rex ERsn |U8* |reghop3 |NN U8 *s|I32 off|NN const U8 *lim @@ -967,7 +967,7 @@ #define regcppush(a,b) S_regcppush(aTHX_ a,b) #define reghop3 S_reghop3 #define reghopmaybe3 S_reghopmaybe3 -#define reginclass(a,b,c,d,e) S_reginclass(aTHX_ a,b,c,d,e) +#define reginclass(a,b,c,d) S_reginclass(aTHX_ a,b,c,d) #define regmatch(a,b,c) S_regmatch(aTHX_ a,b,c) #define regrepeat(a,b,c,d,e) S_regrepeat(aTHX_ a,b,c,d,e) #define regtry(a,b) S_regtry(aTHX_ a,b) @@ -6786,7 +6786,7 @@ STATIC U8* S_reghopmaybe3(U8 *s, I32 off, const U8 *lim) #define PERL_ARGS_ASSERT_REGHOPMAYBE3 \ assert(s); assert(lim) -STATIC bool S_reginclass(pTHX_ const regexp * const prog, const regnode * const n, const U8 * const p, STRLEN *lenp, bool const do_utf8sv_is_utf8) +STATIC bool S_reginclass(pTHX_ const regexp * const prog, const regnode * const n, const U8 * const p, bool const utf8_target) __attribute__warn_unused_result__ __attribute__nonnull__(pTHX_2) __attribute__nonnull__(pTHX_3); @@ -104,7 +104,7 @@ const char* const non_utf8_target_but_utf8_required /* Valid for non-utf8 strings: avoids the reginclass * call if there are no complications: i.e., if everything matchable is * straight forward in the bitmap */ -#define REGINCLASS(prog,p,c) (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) \ +#define REGINCLASS(prog,p,c) (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0) \ : ANYOF_BITMAP_TEST(p,*(c))) /* @@ -1458,9 +1458,8 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, switch (OP(c)) { case ANYOF: if (utf8_target) { - STRLEN inclasslen = strend - s; REXEC_FBC_UTF8_CLASS_SCAN( - reginclass(prog, c, (U8*)s, &inclasslen, utf8_target)); + reginclass(prog, c, (U8*)s, utf8_target)); } else { REXEC_FBC_CLASS_SCAN(REGINCLASS(prog, c, (U8*)s)); @@ -4315,10 +4314,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) if (NEXTCHR_IS_EOS) sayNO; if (utf8_target) { - STRLEN inclasslen = PL_regeol - locinput; - if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, utf8_target)) + if (!reginclass(rex, scan, (U8*)locinput, utf8_target)) sayNO; - locinput += inclasslen; + locinput += UTF8SKIP(locinput); break; } else { @@ -6763,10 +6761,9 @@ S_regrepeat(pTHX_ const regexp *prog, char **startposp, const regnode *p, I32 ma case ANYOF: if (utf8_target) { STRLEN inclasslen; - inclasslen = loceol - scan; while (hardcount < max - && ((inclasslen = loceol - scan) > 0) - && reginclass(prog, p, (U8*)scan, &inclasslen, utf8_target)) + && scan + (inclasslen = UTF8SKIP(scan)) <= loceol + && reginclass(prog, p, (U8*)scan, utf8_target)) { scan += inclasslen; hardcount++; @@ -7321,15 +7318,9 @@ S_core_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bo n is the ANYOF regnode p is the target string - lenp is pointer to the maximum number of bytes of how far to go in p - (This is assumed wthout checking to always be at least the current - character's size) utf8_target tells whether p is in UTF-8. - Returns true if matched; false otherwise. If lenp is not NULL, on return - from a successful match, the value it points to will be updated to how many - bytes in p were matched. If there was no match, the value is undefined, - possibly changed from the input. + Returns true if matched; false otherwise. Note that this can be a synthetic start class, a combination of various nodes, so things you think might be mutually exclusive, such as locale, @@ -7338,19 +7329,18 @@ S_core_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bo */ STATIC bool -S_reginclass(pTHX_ const regexp * const prog, register const regnode * const n, register const U8* const p, STRLEN* lenp, register const bool utf8_target) +S_reginclass(pTHX_ const regexp * const prog, register const regnode * const n, register const U8* const p, register const bool utf8_target) { dVAR; const char flags = ANYOF_FLAGS(n); bool match = FALSE; UV c = *p; - STRLEN c_len = 0; - STRLEN maxlen; PERL_ARGS_ASSERT_REGINCLASS; /* If c is not already the code point, get it */ if (utf8_target && !UTF8_IS_INVARIANT(c)) { + STRLEN c_len = 0; c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &c_len, (UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_ALLOW_FFFF | UTF8_CHECK_ONLY); @@ -7359,21 +7349,6 @@ S_reginclass(pTHX_ const regexp * const prog, register const regnode * const n, if (c_len == (STRLEN)-1) Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)"); } - else { - c_len = 1; - } - - /* Use passed in max length, or one character if none passed in or less - * than one character. And assume will match just one character. This is - * overwritten later if matched more. */ - if (lenp) { - maxlen = (*lenp > c_len) ? *lenp : c_len; - *lenp = c_len; - - } - else { - maxlen = c_len; - } /* If this character is potentially in the bitmap, check it */ if (c < 256) { |