summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MANIFEST3
-rw-r--r--embed.fnc8
-rw-r--r--embed.h8
-rw-r--r--pod/perlreguts.pod12
-rw-r--r--proto.h30
-rw-r--r--regcomp.c76
-rw-r--r--regcomp.h31
-rw-r--r--reginline.h64
8 files changed, 123 insertions, 109 deletions
diff --git a/MANIFEST b/MANIFEST
index b067853201..a11ad17320 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -5531,7 +5531,8 @@ regen/warnings.pl Program to write warnings.h and lib/warnings.pm
regen_perly.pl generate perly.{act,h,tab} from perly.y
regexec.c Regular expression evaluator
regexp.h Public declarations for the above
-regnodes.h Description of nodes of RE engine
+reginline.h Inline subs for the RE engine.
+regnodes.h Description of nodes of the RE engine
run.c The interpreter loop
runtests.SH A script that generates runtests
sbox32_hash.h SBox hash code (32 Bit SBOX based hash function)
diff --git a/embed.fnc b/embed.fnc
index d483eef519..4c04ed8693 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -2003,9 +2003,11 @@ Cp |I32 |regexec_flags |NN REGEXP *const rx|NN char *stringarg \
|NN char *strend|NN char *strbeg \
|SSize_t minend|NN SV *sv \
|NULLOK void *data|U32 flags
-CpR |regnode*|regnext |NULLOK regnode* p
-CpR |bool|check_regnode_after |NULLOK const regnode* p|const STRLEN extra
-CpR |regnode*|regnode_after |NULLOK regnode* p
+#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C)
+CipR |regnode*|regnext |NULLOK const regnode* p
+CipR |bool|check_regnode_after |NULLOK const regnode* p|const STRLEN extra
+CipR |regnode*|regnode_after |NULLOK const regnode* p|bool varies
+#endif
EXp |SV*|reg_named_buff |NN REGEXP * const rx|NULLOK SV * const key \
|NULLOK SV * const value|const U32 flags
EXp |SV*|reg_named_buff_iter |NN REGEXP * const rx|NULLOK const SV * const lastkey \
diff --git a/embed.h b/embed.h
index 8a56949c0c..16a8eebc4e 100644
--- a/embed.h
+++ b/embed.h
@@ -98,7 +98,6 @@
#define cast_iv Perl_cast_iv
#define cast_ulong Perl_cast_ulong
#define cast_uv Perl_cast_uv
-#define check_regnode_after(a,b) Perl_check_regnode_after(aTHX_ a,b)
#define ck_entersub_args_list(a) Perl_ck_entersub_args_list(aTHX_ a)
#define ck_entersub_args_proto(a,b,c) Perl_ck_entersub_args_proto(aTHX_ a,b,c)
#define ck_entersub_args_proto_or_list(a,b,c) Perl_ck_entersub_args_proto_or_list(aTHX_ a,b,c)
@@ -476,8 +475,6 @@
#define regexec_flags(a,b,c,d,e,f,g,h) Perl_regexec_flags(aTHX_ a,b,c,d,e,f,g,h)
#define regfree_internal(a) Perl_regfree_internal(aTHX_ a)
#define reginitcolors() Perl_reginitcolors(aTHX)
-#define regnext(a) Perl_regnext(aTHX_ a)
-#define regnode_after(a) Perl_regnode_after(aTHX_ a)
#define repeatcpy Perl_repeatcpy
#define require_pv(a) Perl_require_pv(aTHX_ a)
#define rninstr Perl_rninstr
@@ -820,6 +817,11 @@
#define dump_mstats(a) Perl_dump_mstats(aTHX_ a)
#define get_mstats(a,b,c) Perl_get_mstats(aTHX_ a,b,c)
#endif
+#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C)
+#define check_regnode_after(a,b) Perl_check_regnode_after(aTHX_ a,b)
+#define regnext(a) Perl_regnext(aTHX_ a)
+#define regnode_after(a,b) Perl_regnode_after(aTHX_ a,b)
+#endif
#if defined(PERL_IN_SV_C)
#define more_sv() Perl_more_sv(aTHX)
#endif
diff --git a/pod/perlreguts.pod b/pod/perlreguts.pod
index 3cc9e7f7ff..3b73d14355 100644
--- a/pod/perlreguts.pod
+++ b/pod/perlreguts.pod
@@ -239,12 +239,12 @@ select regnode types in the execution phase. It is also heavily used in
the code for dumping the regexp program for debugging.
There are a selection of macros which can be used to compute this as
-efficiently as possible depending on the circumstances, for instance if
-the code is dedicated to handling a specific type of regnode you can use
-C<REGNODE_AFTER_type()> to produce the most efficient code. If you have
-already extracted the nodes opcode then you can use
-C<REGNODE_AFTER_opcode()>, and if you just have a pointer to a regnode you
-can use C<REGNODE_AFTER_dynamic()>.
+efficiently as possible depending on the circumstances. The canonical
+macro is C<REGNODE_AFTER()>, which is the most powerful and should handle
+any case we have, but is also potentially the slowest. There are two
+additional macros for the special case that you KNOW the current regnode
+size is constant, and you know its type or opcode. In which case you can
+use C<REGNODE_AFTER_opcode()> or C<REGNODE_AFTER_type()>.
In older versions of the regex engine C<REGNODE_AFTER()> was called
C<NEXTOPER> but this was found to be confusing and it was renamed. There
diff --git a/proto.h b/proto.h
index a7e3d028ad..9231af4c10 100644
--- a/proto.h
+++ b/proto.h
@@ -481,10 +481,6 @@ PERL_CALLCONV UV Perl_cast_uv(NV f)
__attribute__warn_unused_result__;
#define PERL_ARGS_ASSERT_CAST_UV
-PERL_CALLCONV bool Perl_check_regnode_after(pTHX_ const regnode* p, const STRLEN extra)
- __attribute__warn_unused_result__;
-#define PERL_ARGS_ASSERT_CHECK_REGNODE_AFTER
-
PERL_CALLCONV bool Perl_check_utf8_print(pTHX_ const U8 *s, const STRLEN len)
__attribute__visibility__("hidden");
#define PERL_ARGS_ASSERT_CHECK_UTF8_PRINT \
@@ -3331,14 +3327,6 @@ PERL_CALLCONV void Perl_regfree_internal(pTHX_ REGEXP *const rx);
assert(rx)
PERL_CALLCONV void Perl_reginitcolors(pTHX);
#define PERL_ARGS_ASSERT_REGINITCOLORS
-PERL_CALLCONV regnode* Perl_regnext(pTHX_ regnode* p)
- __attribute__warn_unused_result__;
-#define PERL_ARGS_ASSERT_REGNEXT
-
-PERL_CALLCONV regnode* Perl_regnode_after(pTHX_ regnode* p)
- __attribute__warn_unused_result__;
-#define PERL_ARGS_ASSERT_REGNODE_AFTER
-
PERL_CALLCONV void Perl_repeatcpy(char* to, const char* from, I32 len, IV count);
#define PERL_ARGS_ASSERT_REPEATCPY \
assert(to); assert(from)
@@ -6674,12 +6662,30 @@ PERL_STATIC_INLINE const char * S_get_regex_charset_name(const U32 flags, STRLEN
#endif
#endif
#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C)
+#ifndef PERL_NO_INLINE_FUNCTIONS
+PERL_STATIC_INLINE bool Perl_check_regnode_after(pTHX_ const regnode* p, const STRLEN extra)
+ __attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_CHECK_REGNODE_AFTER
+#endif
+
PERL_CALLCONV int Perl_re_printf(pTHX_ const char *fmt, ...)
__attribute__visibility__("hidden")
__attribute__format__(__printf__,pTHX_1,pTHX_2);
#define PERL_ARGS_ASSERT_RE_PRINTF \
assert(fmt)
+#ifndef PERL_NO_INLINE_FUNCTIONS
+PERL_STATIC_INLINE regnode* Perl_regnext(pTHX_ const regnode* p)
+ __attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_REGNEXT
+#endif
+
+#ifndef PERL_NO_INLINE_FUNCTIONS
+PERL_STATIC_INLINE regnode* Perl_regnode_after(pTHX_ const regnode* p, bool varies)
+ __attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_REGNODE_AFTER
+#endif
+
PERL_CALLCONV void Perl_regprop(pTHX_ const regexp *prog, SV* sv, const regnode* o, const regmatch_info *reginfo, const RExC_state_t *pRExC_state)
__attribute__visibility__("hidden");
#define PERL_ARGS_ASSERT_REGPROP \
diff --git a/regcomp.c b/regcomp.c
index 61d57cbcb6..a83bab9422 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -3806,9 +3806,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
}
trie->prefixlen = (state-1);
if (str) {
- regnode *n = convert+NODE_SZ_STR(convert);
- assert( NODE_SZ_STR(convert) <= U16_MAX );
- NEXT_OFF(convert) = (U16)(NODE_SZ_STR(convert));
+ regnode *n = REGNODE_AFTER(convert);
+ assert( n - convert <= U16_MAX );
+ NEXT_OFF(convert) = n - convert;
trie->startstate = state;
trie->minlen -= (state - 1);
trie->maxlen -= (state - 1);
@@ -4227,7 +4227,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
regnode *n = regnext(scan);
U32 stringok = 1;
- regnode *next = scan + NODE_SZ_STR(scan);
+ regnode *next = REGNODE_AFTER_varies(scan);
U32 merged = 0;
U32 stopnow = 0;
#ifdef DEBUGGING
@@ -4380,10 +4380,10 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
DEBUG_PEEP("merg", n, depth, 0);
merged++;
+ next = REGNODE_AFTER_varies(n);
NEXT_OFF(scan) += NEXT_OFF(n);
assert( ( STR_LEN(scan) + STR_LEN(n) ) < 256 );
setSTR_LEN(scan, (U8)(STR_LEN(scan) + STR_LEN(n)));
- next = n + NODE_SZ_STR(n);
/* Now we can overwrite *n : */
Move(STRING(n), STRING(scan) + oldl, STR_LEN(n), char);
#ifdef DEBUGGING
@@ -4612,7 +4612,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
#ifdef DEBUGGING
/* Allow dumping but overwriting the collection of skipped
* ops and/or strings with fake optimized ops */
- n = scan + NODE_SZ_STR(scan);
+ n = REGNODE_AFTER_varies(scan);
while (n <= stop) {
OP(n) = OPTIMIZED;
FLAGS(n) = 0;
@@ -20656,7 +20656,7 @@ S_optimize_regclass(pTHX_
Copy(low_utf8, /* Add the common bytes */
((struct regnode_anyofhs *) REGNODE_p(*ret))->string,
len, U8);
- RExC_emit += NODE_SZ_STR(REGNODE_p(*ret));
+ RExC_emit = REGNODE_OFFSET(REGNODE_AFTER_varies(REGNODE_p(*ret)));
set_ANYOF_arg(pRExC_state, REGNODE_p(*ret), cp_list,
NULL, only_utf8_locale_list);
return op;
@@ -22940,64 +22940,6 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
#endif /* USE_ITHREADS */
-#ifndef PERL_IN_XSUB_RE
-
-/*
- - regnext - dig the "next" pointer out of a node
- */
-regnode *
-Perl_regnext(pTHX_ regnode *p)
-{
- I32 offset;
-
- if (!p)
- return(NULL);
-
- if (OP(p) > REGNODE_MAX) { /* regnode.type is unsigned */
- Perl_croak(aTHX_ "Corrupted regexp opcode %d > %d",
- (int)OP(p), (int)REGNODE_MAX);
- }
-
- offset = (PL_regnode_off_by_arg[OP(p)] ? ARG(p) : NEXT_OFF(p));
- if (offset == 0)
- return(NULL);
-
- return(p+offset);
-}
-
-/*
- - regnode_after - find the node physically following p in memory,
- taking into account the size of p as determined by OP(p), our
- sizing data, and possibly the STR_SZ() macro.
- */
-regnode *
-Perl_regnode_after(pTHX_ const regnode *p)
-{
- assert(p);
- const U8 op = OP(p);
- assert(op < REGNODE_MAX);
- const regnode *ret = p + NODE_STEP_REGNODE + PL_regnode_arg_len[op];
- if (PL_regnode_arg_len_varies[op])
- ret += STR_SZ(STR_LEN(p));
- return (regnode *)ret;
-}
-
-/* validate that the passed in node and extra length would match that
- * returned by regnode_after() */
-bool
-Perl_check_regnode_after(pTHX_ const regnode *p, const STRLEN extra)
-{
- const regnode *nextoper = regnode_after((regnode *)p,FALSE);
- /* this should be the ONLY place that REGNODE_AFTER_PLUS is used outside
- * of regcomp.h */
- const regnode *other= REGNODE_AFTER_PLUS(p, extra);
- if (nextoper != other) {
- return FALSE;
- }
- return TRUE;
-}
-#endif
-
STATIC void
S_re_croak(pTHX_ bool utf8, const char* pat,...)
{
@@ -23715,7 +23657,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
if (op == CLOSE || op == SRCLOSE || op == WHILEM)
indent--;
next = regnext((regnode *)node);
- const regnode *after = regnode_after((regnode *)node);
+ const regnode *after = regnode_after((regnode *)node,0);
/* Where, what. */
if (op == OPTIMIZED) {
@@ -23819,7 +23761,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
}
else if (PL_regnode_kind[op] == EXACT || op == ANYOFHs) {
/* Literal string, where present. */
- node += NODE_SZ_STR(node); /* NOT REGNODE_AFTER! */
+ node = (const regnode *)REGNODE_AFTER_varies(node);
}
else {
node = REGNODE_AFTER_opcode(node,op);
diff --git a/regcomp.h b/regcomp.h
index a5c0695e5b..01e37889c5 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -406,14 +406,9 @@ struct regnode_ssc {
#define OPERAND(p) STRING(p)
/* The number of (smallest) regnode equivalents that a string of length l bytes
- * occupies */
+ * occupies - Used by the REGNODE_AFTER() macros and functions. */
#define STR_SZ(l) (((l) + sizeof(regnode) - 1) / sizeof(regnode))
-/* The number of (smallest) regnode equivalents that the node 'p' which uses
- * 'struct regnode_string' occupies. (These are EXACTish nodes and a few
- * others.) */
-#define NODE_SZ_STR(p) (STR_SZ(STR_LEN(p)) + 1 + PL_regnode_arg_len[(p)->type])
-
#define setSTR_LEN(p,v) \
STMT_START{ \
if (OP(p) == LEXACT || OP(p) == LEXACT_REQ8) \
@@ -428,8 +423,6 @@ struct regnode_ssc {
#undef NODE_ALIGN
#undef ARG_LOC
-#undef REGNODE_AFTER
-#undef REGNODE_BEFORE
#define NODE_ALIGN(node)
#define ARG_LOC(p) (((struct regnode_1 *)p)->arg1)
@@ -470,7 +463,7 @@ struct regnode_ssc {
*
*/
#define REGNODE_AFTER_PLUS(p,extra) ((p) + NODE_STEP_REGNODE + (extra))
-/* under DEBUGGING we check that all REGNODE_AFTER style macro did the
+/* under DEBUGGING we check that all REGNODE_AFTER optimized macros did the
* same thing that Perl_regnode_after() would have done. Note that when
* not compiled under DEBUGGING the assert_() macro is empty. Thus we
* don't have to implement different versions for DEBUGGING and not DEBUGGING,
@@ -479,19 +472,19 @@ struct regnode_ssc {
#define REGNODE_AFTER_PLUS_DEBUG(p,extra) \
(assert_(check_regnode_after(p,extra)) REGNODE_AFTER_PLUS((p),(extra)))
-/* try to avoid using either of the following two directly. They exist for legacy
- * purposes only */
-#define REGNODE_AFTER_plus(p,extra) REGNODE_AFTER_PLUS_DEBUG((p),(extra))
-#define REGNODE_AFTER(p) REGNODE_AFTER_plus((p),0)
/* find the regnode after this p by using the opcode we previously extracted
* with OP(p) */
-#define REGNODE_AFTER_opcode(p,op) REGNODE_AFTER_plus((p),PL_regnode_arg_len[op])
-/* find the regnode after this p by using OP(p) to find the regnode type of p */
-#define REGNODE_AFTER_dynamic(p) REGNODE_AFTER_opcode((p),OP(p))
+#define REGNODE_AFTER_opcode(p,op) REGNODE_AFTER_PLUS_DEBUG((p),PL_regnode_arg_len[op])
+
/* find the regnode after this p by using the size of the struct associated with
+ * the opcode for p. use this when you *know* that p is pointer to a given type*/
+#define REGNODE_AFTER_type(p,t) REGNODE_AFTER_PLUS_DEBUG((p),EXTRA_SIZE(t))
/* find the regnode after this p by using OP(p) to find the regnode type of p */
-#define REGNODE_AFTER(p) regnode_after(p)
+#define REGNODE_AFTER_varies(p) regnode_after(p,TRUE)
+
+/* find the regnode after this p by using OP(p) to find the regnode type of p */
+#define REGNODE_AFTER(p) regnode_after(p,FALSE)
/* REGNODE_BEFORE() is trickier to deal with in terms of validation, execution.
@@ -1375,6 +1368,10 @@ typedef enum {
# define GET_REGCLASS_AUX_DATA(a,b,c,d,e,f) get_re_gclass_aux_data(a,b,c,d,e,f)
#endif
+#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C)
+#include "reginline.h"
+#endif
+
#endif /* PERL_REGCOMP_H_ */
/*
diff --git a/reginline.h b/reginline.h
new file mode 100644
index 0000000000..1d1d4ef80d
--- /dev/null
+++ b/reginline.h
@@ -0,0 +1,64 @@
+
+#ifndef PERL_REGINLINE_H
+
+/*
+ - regnext - dig the "next" pointer out of a node
+ */
+PERL_STATIC_INLINE
+regnode *
+Perl_regnext(pTHX_ const regnode *p)
+{
+ I32 offset;
+
+ if (!p)
+ return(NULL);
+
+ if (OP(p) > REGNODE_MAX) { /* regnode.type is unsigned */
+ Perl_croak(aTHX_ "Corrupted regexp opcode %d > %d",
+ (int)OP(p), (int)REGNODE_MAX);
+ }
+
+ offset = (PL_regnode_off_by_arg[OP(p)] ? ARG(p) : NEXT_OFF(p));
+ if (offset == 0)
+ return(NULL);
+
+ return(regnode *)(p+offset);
+}
+
+/*
+ - regnode_after - find the node physically following p in memory,
+ taking into account the size of p as determined by OP(p), our
+ sizing data, and possibly the STR_SZ() macro.
+ */
+PERL_STATIC_INLINE
+regnode *
+Perl_regnode_after(pTHX_ const regnode *p, const bool varies)
+{
+ assert(p);
+ const U8 op = OP(p);
+ assert(op < REGNODE_MAX);
+ const regnode *ret = p + NODE_STEP_REGNODE + PL_regnode_arg_len[op];
+ if (varies || PL_regnode_arg_len_varies[op])
+ ret += STR_SZ(STR_LEN(p));
+ return (regnode *)ret;
+}
+
+/* validate that the passed in node and extra length would match that
+ * returned by regnode_after() */
+PERL_STATIC_INLINE
+bool
+Perl_check_regnode_after(pTHX_ const regnode *p, const STRLEN extra)
+{
+ const regnode *nextoper = regnode_after((regnode *)p,FALSE);
+ const regnode *other = REGNODE_AFTER_PLUS(p, extra);
+ if (nextoper != other) {
+ return FALSE;
+ }
+ return TRUE;
+}
+
+#define PERL_REGINLINE_H
+#endif
+/*
+ * ex: set ts=8 sts=4 sw=4 et:
+ */