summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--embed.fnc2
-rw-r--r--embed.h4
-rw-r--r--proto.h18
-rw-r--r--regexec.c42
4 files changed, 34 insertions, 32 deletions
diff --git a/embed.fnc b/embed.fnc
index 64aa735f23..6f743e49af 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -2169,10 +2169,8 @@ ERsn |U8* |reghop3 |NN U8 *s|SSize_t off|NN const U8 *lim
ERsM |SV* |core_regclass_swash|NULLOK const regexp *prog \
|NN const struct regnode *node|bool doinit \
|NULLOK SV **listsvp
-#ifdef XXX_dmq
ERsn |U8* |reghop4 |NN U8 *s|SSize_t off|NN const U8 *llim \
|NN const U8 *rlim
-#endif
ERsn |U8* |reghopmaybe3 |NN U8 *s|SSize_t off|NN const U8 *lim
ERs |char* |find_byclass |NN regexp * prog|NN const regnode *c \
|NN char *s|NN const char *strend \
diff --git a/embed.h b/embed.h
index 2a6064cbcb..d1224eb200 100644
--- a/embed.h
+++ b/embed.h
@@ -1004,6 +1004,7 @@
#define regcppop(a,b) S_regcppop(aTHX_ a,b)
#define regcppush(a,b,c) S_regcppush(aTHX_ a,b,c)
#define reghop3 S_reghop3
+#define reghop4 S_reghop4
#define reghopmaybe3 S_reghopmaybe3
#define reginclass(a,b,c,d,e) S_reginclass(aTHX_ a,b,c,d,e)
#define regmatch(a,b,c) S_regmatch(aTHX_ a,b,c)
@@ -1011,9 +1012,6 @@
#define regtry(a,b) S_regtry(aTHX_ a,b)
#define to_byte_substr(a) S_to_byte_substr(aTHX_ a)
#define to_utf8_substr(a) S_to_utf8_substr(aTHX_ a)
-# if defined(XXX_dmq)
-#define reghop4 S_reghop4
-# endif
# endif
# if defined(PERL_IN_UTF8_C) || defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C)
#define _to_fold_latin1(a,b,c,d) Perl__to_fold_latin1(aTHX_ a,b,c,d)
diff --git a/proto.h b/proto.h
index 0115eab3ea..88e246a9b2 100644
--- a/proto.h
+++ b/proto.h
@@ -7175,6 +7175,14 @@ STATIC U8* S_reghop3(U8 *s, SSize_t off, const U8 *lim)
#define PERL_ARGS_ASSERT_REGHOP3 \
assert(s); assert(lim)
+STATIC U8* S_reghop4(U8 *s, SSize_t off, const U8 *llim, const U8 *rlim)
+ __attribute__warn_unused_result__
+ __attribute__nonnull__(1)
+ __attribute__nonnull__(3)
+ __attribute__nonnull__(4);
+#define PERL_ARGS_ASSERT_REGHOP4 \
+ assert(s); assert(llim); assert(rlim)
+
STATIC U8* S_reghopmaybe3(U8 *s, SSize_t off, const U8 *lim)
__attribute__warn_unused_result__
__attribute__nonnull__(1)
@@ -7224,16 +7232,6 @@ STATIC void S_to_utf8_substr(pTHX_ regexp * prog)
#define PERL_ARGS_ASSERT_TO_UTF8_SUBSTR \
assert(prog)
-# if defined(XXX_dmq)
-STATIC U8* S_reghop4(U8 *s, SSize_t off, const U8 *llim, const U8 *rlim)
- __attribute__warn_unused_result__
- __attribute__nonnull__(1)
- __attribute__nonnull__(3)
- __attribute__nonnull__(4);
-#define PERL_ARGS_ASSERT_REGHOP4 \
- assert(s); assert(llim); assert(rlim)
-
-# endif
#endif
#if defined(PERL_IN_SCOPE_C)
STATIC void S_save_pushptri32ptr(pTHX_ void *const ptr1, const I32 i, void *const ptr2, const int type);
diff --git a/regexec.c b/regexec.c
index f28ef7da0b..8b2d7c5389 100644
--- a/regexec.c
+++ b/regexec.c
@@ -135,6 +135,10 @@ static const char* const non_utf8_target_but_utf8_required
? reghop3((U8*)(pos), off, (U8*)(lim)) \
: (U8*)((pos + off) > lim ? lim : (pos + off)))
+#define HOP4(pos,off,llim, rlim) (reginfo->is_utf8_target \
+ ? reghop4((U8*)(pos), off, (U8*)(llim), (U8*)(rlim)) \
+ : (U8*)(pos + off))
+#define HOP4c(pos,off,llim, rlim) ((char*)HOP4(pos,off,llim, rlim))
#define NEXTCHR_EOS -10 /* nextchr has fallen off the end */
#define NEXTCHR_IS_EOS (nextchr < 0)
@@ -906,11 +910,24 @@ Perl_re_intuit_start(pTHX_
if (check == (utf8_target ? prog->float_utf8 : prog->float_substr)) {
do_other_anchored:
{
- char * const last = HOP3c(s, -start_shift, strbeg);
+ char * last;
char *last1, *last2;
char * const saved_s = s;
SV* must;
+ /* we've previously found a floating substr starting at s.
+ * This means that the regex origin must lie somewhere
+ * between min: HOP3(s, -check_offset_max)
+ * between max: HOP3(s, -check_offset_min)
+ * (except that min will be >= strpos)
+ * So the fixed substr must lie somewhere between
+ * HOP3(min, anchored_offset)
+ * HOP3(max, anchored_offset) + SvCUR(substr)
+ */
+ assert(strpos + start_shift <= s);
+ last = HOP4c(s, prog->anchored_offset-start_shift,
+ strbeg, strend);
+
t = s - prog->check_offset_max;
if (s - strpos > prog->check_offset_max /* signed-corrected t > strpos */
&& (!utf8_target
@@ -922,14 +939,12 @@ Perl_re_intuit_start(pTHX_
t = HOP3c(t, prog->anchored_offset, strend);
if (t < other_last) /* These positions already checked */
t = other_last;
- last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
+
+ assert(prog->minlen > prog->anchored_offset);
+ last2 = last1 = HOP3c(strend,
+ prog->anchored_offset-prog->minlen, strbeg);
if (last < last1)
last1 = last;
- /* XXXX It is not documented what units *_offsets are in.
- We assume bytes, but this is clearly wrong.
- Meaning this code needs to be carefully reviewed for errors.
- dmq.
- */
/* On end-of-str: see comment below. */
must = utf8_target ? prog->anchored_utf8 : prog->anchored_substr;
@@ -940,8 +955,7 @@ Perl_re_intuit_start(pTHX_
else
s = fbm_instr(
(unsigned char*)t,
- HOP3(HOP3(last1, prog->anchored_offset, strend)
- + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
+ HOP3(last1 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
must,
multiline ? FBMrf_MULTILINE : 0
);
@@ -963,8 +977,8 @@ Perl_re_intuit_start(pTHX_
DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
", trying floating at offset %ld...\n",
(long)(HOP3c(saved_s, 1, strend) - i_strpos)));
- other_last = HOP3c(last1, prog->anchored_offset+1, strend);
- s = HOP3c(last, 1, strend);
+ other_last = HOP3c(last1, 1, strend);
+ s = HOP4c(last, 1 - prog->anchored_offset, strbeg, strend);
goto restart;
}
else {
@@ -7797,11 +7811,6 @@ S_reghop3(U8 *s, SSize_t off, const U8* lim)
return s;
}
-#ifdef XXX_dmq
-/* there are a bunch of places where we use two reghop3's that should
- be replaced with this routine. but since thats not done yet
- we ifdef it out - dmq
-*/
STATIC U8 *
S_reghop4(U8 *s, SSize_t off, const U8* llim, const U8* rlim)
{
@@ -7827,7 +7836,6 @@ S_reghop4(U8 *s, SSize_t off, const U8* llim, const U8* rlim)
}
return s;
}
-#endif
STATIC U8 *
S_reghopmaybe3(U8* s, SSize_t off, const U8* lim)