summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2010-10-31 10:21:14 -0600
committerFather Chrysostomos <sprout@cpan.org>2010-10-31 14:50:59 -0700
commit4b3cda86f0c2a54a1ac8bbbaf4a2412f98dff6c0 (patch)
treef093d4345ec0025bbd802e3b6cc975a4363967de /regexec.c
parent6698fab5a29c48acfd94f01ff8a587819f15be79 (diff)
downloadperl-4b3cda86f0c2a54a1ac8bbbaf4a2412f98dff6c0.tar.gz
reginclass: Return matched length even if not utf8
This also allows for less special case testing
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c28
1 files changed, 18 insertions, 10 deletions
diff --git a/regexec.c b/regexec.c
index 2e897a9010..078b6707ad 100644
--- a/regexec.c
+++ b/regexec.c
@@ -6187,11 +6187,10 @@ Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool
character's size)
utf8_target tells whether p is in UTF-8.
- Returns true if matched; false otherwise. For utf8 strings, if lenp is not
- NULL, on return from a successful match, the value it points to will be
- updated to how many bytes in p were matched. The value is undefined,
- possibly changed from the input if there was no match.
- For non-utf8 strings, *lenp is unchanged.
+ Returns true if matched; false otherwise. If lenp is not NULL, on return
+ from a successful match, the value it points to will be updated to how many
+ bytes in p were matched. If there was no match, the value is undefined,
+ possibly changed from the input.
*/
@@ -6207,6 +6206,7 @@ S_reginclass(pTHX_ const regexp *prog, register const regnode *n, register const
PERL_ARGS_ASSERT_REGINCLASS;
+ /* If c is not already the code point, get it */
if (utf8_target && !UTF8_IS_INVARIANT(c)) {
c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
(UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV)
@@ -6217,10 +6217,20 @@ S_reginclass(pTHX_ const regexp *prog, register const regnode *n, register const
Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
}
- maxlen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
+ /* Use passed in max length, or one character if none passed in. And
+ * assume will match just one character. This is overwritten later if
+ * matched more. (Note that the code makes an implicit assumption that any
+ * passed in max is at least one character) */
+ if (lenp) {
+ maxlen = *lenp;
+ *lenp = UNISKIP(NATIVE_TO_UNI(c));
+
+ }
+ else {
+ maxlen = UNISKIP(NATIVE_TO_UNI(c));
+ }
+
if (utf8_target || (flags & ANYOF_UNICODE)) {
- if (lenp)
- *lenp = 0;
if (utf8_target && !ANYOF_RUNTIME(n)) {
if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
match = TRUE;
@@ -6269,8 +6279,6 @@ S_reginclass(pTHX_ const regexp *prog, register const regnode *n, register const
if (! utf8_target) Safefree(utf8_p);
}
}
- if (match && lenp && *lenp == 0)
- *lenp = UNISKIP(NATIVE_TO_UNI(c));
}
if (!match && c < 256) {
if (ANYOF_BITMAP_TEST(n, c))