diff options
-rw-r--r-- | MANIFEST | 3 | ||||
-rw-r--r-- | embed.fnc | 8 | ||||
-rw-r--r-- | embed.h | 8 | ||||
-rw-r--r-- | pod/perlreguts.pod | 12 | ||||
-rw-r--r-- | proto.h | 30 | ||||
-rw-r--r-- | regcomp.c | 76 | ||||
-rw-r--r-- | regcomp.h | 31 | ||||
-rw-r--r-- | reginline.h | 64 |
8 files changed, 123 insertions, 109 deletions
@@ -5531,7 +5531,8 @@ regen/warnings.pl Program to write warnings.h and lib/warnings.pm regen_perly.pl generate perly.{act,h,tab} from perly.y regexec.c Regular expression evaluator regexp.h Public declarations for the above -regnodes.h Description of nodes of RE engine +reginline.h Inline subs for the RE engine. +regnodes.h Description of nodes of the RE engine run.c The interpreter loop runtests.SH A script that generates runtests sbox32_hash.h SBox hash code (32 Bit SBOX based hash function) @@ -2003,9 +2003,11 @@ Cp |I32 |regexec_flags |NN REGEXP *const rx|NN char *stringarg \ |NN char *strend|NN char *strbeg \ |SSize_t minend|NN SV *sv \ |NULLOK void *data|U32 flags -CpR |regnode*|regnext |NULLOK regnode* p -CpR |bool|check_regnode_after |NULLOK const regnode* p|const STRLEN extra -CpR |regnode*|regnode_after |NULLOK regnode* p +#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) +CipR |regnode*|regnext |NULLOK const regnode* p +CipR |bool|check_regnode_after |NULLOK const regnode* p|const STRLEN extra +CipR |regnode*|regnode_after |NULLOK const regnode* p|bool varies +#endif EXp |SV*|reg_named_buff |NN REGEXP * const rx|NULLOK SV * const key \ |NULLOK SV * const value|const U32 flags EXp |SV*|reg_named_buff_iter |NN REGEXP * const rx|NULLOK const SV * const lastkey \ @@ -98,7 +98,6 @@ #define cast_iv Perl_cast_iv #define cast_ulong Perl_cast_ulong #define cast_uv Perl_cast_uv -#define check_regnode_after(a,b) Perl_check_regnode_after(aTHX_ a,b) #define ck_entersub_args_list(a) Perl_ck_entersub_args_list(aTHX_ a) #define ck_entersub_args_proto(a,b,c) Perl_ck_entersub_args_proto(aTHX_ a,b,c) #define ck_entersub_args_proto_or_list(a,b,c) Perl_ck_entersub_args_proto_or_list(aTHX_ a,b,c) @@ -476,8 +475,6 @@ #define regexec_flags(a,b,c,d,e,f,g,h) Perl_regexec_flags(aTHX_ a,b,c,d,e,f,g,h) #define regfree_internal(a) Perl_regfree_internal(aTHX_ a) #define reginitcolors() Perl_reginitcolors(aTHX) -#define regnext(a) Perl_regnext(aTHX_ a) -#define regnode_after(a) Perl_regnode_after(aTHX_ a) #define repeatcpy Perl_repeatcpy #define require_pv(a) Perl_require_pv(aTHX_ a) #define rninstr Perl_rninstr @@ -820,6 +817,11 @@ #define dump_mstats(a) Perl_dump_mstats(aTHX_ a) #define get_mstats(a,b,c) Perl_get_mstats(aTHX_ a,b,c) #endif +#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) +#define check_regnode_after(a,b) Perl_check_regnode_after(aTHX_ a,b) +#define regnext(a) Perl_regnext(aTHX_ a) +#define regnode_after(a,b) Perl_regnode_after(aTHX_ a,b) +#endif #if defined(PERL_IN_SV_C) #define more_sv() Perl_more_sv(aTHX) #endif diff --git a/pod/perlreguts.pod b/pod/perlreguts.pod index 3cc9e7f7ff..3b73d14355 100644 --- a/pod/perlreguts.pod +++ b/pod/perlreguts.pod @@ -239,12 +239,12 @@ select regnode types in the execution phase. It is also heavily used in the code for dumping the regexp program for debugging. There are a selection of macros which can be used to compute this as -efficiently as possible depending on the circumstances, for instance if -the code is dedicated to handling a specific type of regnode you can use -C<REGNODE_AFTER_type()> to produce the most efficient code. If you have -already extracted the nodes opcode then you can use -C<REGNODE_AFTER_opcode()>, and if you just have a pointer to a regnode you -can use C<REGNODE_AFTER_dynamic()>. +efficiently as possible depending on the circumstances. The canonical +macro is C<REGNODE_AFTER()>, which is the most powerful and should handle +any case we have, but is also potentially the slowest. There are two +additional macros for the special case that you KNOW the current regnode +size is constant, and you know its type or opcode. In which case you can +use C<REGNODE_AFTER_opcode()> or C<REGNODE_AFTER_type()>. In older versions of the regex engine C<REGNODE_AFTER()> was called C<NEXTOPER> but this was found to be confusing and it was renamed. There @@ -481,10 +481,6 @@ PERL_CALLCONV UV Perl_cast_uv(NV f) __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_CAST_UV -PERL_CALLCONV bool Perl_check_regnode_after(pTHX_ const regnode* p, const STRLEN extra) - __attribute__warn_unused_result__; -#define PERL_ARGS_ASSERT_CHECK_REGNODE_AFTER - PERL_CALLCONV bool Perl_check_utf8_print(pTHX_ const U8 *s, const STRLEN len) __attribute__visibility__("hidden"); #define PERL_ARGS_ASSERT_CHECK_UTF8_PRINT \ @@ -3331,14 +3327,6 @@ PERL_CALLCONV void Perl_regfree_internal(pTHX_ REGEXP *const rx); assert(rx) PERL_CALLCONV void Perl_reginitcolors(pTHX); #define PERL_ARGS_ASSERT_REGINITCOLORS -PERL_CALLCONV regnode* Perl_regnext(pTHX_ regnode* p) - __attribute__warn_unused_result__; -#define PERL_ARGS_ASSERT_REGNEXT - -PERL_CALLCONV regnode* Perl_regnode_after(pTHX_ regnode* p) - __attribute__warn_unused_result__; -#define PERL_ARGS_ASSERT_REGNODE_AFTER - PERL_CALLCONV void Perl_repeatcpy(char* to, const char* from, I32 len, IV count); #define PERL_ARGS_ASSERT_REPEATCPY \ assert(to); assert(from) @@ -6674,12 +6662,30 @@ PERL_STATIC_INLINE const char * S_get_regex_charset_name(const U32 flags, STRLEN #endif #endif #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) +#ifndef PERL_NO_INLINE_FUNCTIONS +PERL_STATIC_INLINE bool Perl_check_regnode_after(pTHX_ const regnode* p, const STRLEN extra) + __attribute__warn_unused_result__; +#define PERL_ARGS_ASSERT_CHECK_REGNODE_AFTER +#endif + PERL_CALLCONV int Perl_re_printf(pTHX_ const char *fmt, ...) __attribute__visibility__("hidden") __attribute__format__(__printf__,pTHX_1,pTHX_2); #define PERL_ARGS_ASSERT_RE_PRINTF \ assert(fmt) +#ifndef PERL_NO_INLINE_FUNCTIONS +PERL_STATIC_INLINE regnode* Perl_regnext(pTHX_ const regnode* p) + __attribute__warn_unused_result__; +#define PERL_ARGS_ASSERT_REGNEXT +#endif + +#ifndef PERL_NO_INLINE_FUNCTIONS +PERL_STATIC_INLINE regnode* Perl_regnode_after(pTHX_ const regnode* p, bool varies) + __attribute__warn_unused_result__; +#define PERL_ARGS_ASSERT_REGNODE_AFTER +#endif + PERL_CALLCONV void Perl_regprop(pTHX_ const regexp *prog, SV* sv, const regnode* o, const regmatch_info *reginfo, const RExC_state_t *pRExC_state) __attribute__visibility__("hidden"); #define PERL_ARGS_ASSERT_REGPROP \ @@ -3806,9 +3806,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, } trie->prefixlen = (state-1); if (str) { - regnode *n = convert+NODE_SZ_STR(convert); - assert( NODE_SZ_STR(convert) <= U16_MAX ); - NEXT_OFF(convert) = (U16)(NODE_SZ_STR(convert)); + regnode *n = REGNODE_AFTER(convert); + assert( n - convert <= U16_MAX ); + NEXT_OFF(convert) = n - convert; trie->startstate = state; trie->minlen -= (state - 1); trie->maxlen -= (state - 1); @@ -4227,7 +4227,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan, regnode *n = regnext(scan); U32 stringok = 1; - regnode *next = scan + NODE_SZ_STR(scan); + regnode *next = REGNODE_AFTER_varies(scan); U32 merged = 0; U32 stopnow = 0; #ifdef DEBUGGING @@ -4380,10 +4380,10 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan, DEBUG_PEEP("merg", n, depth, 0); merged++; + next = REGNODE_AFTER_varies(n); NEXT_OFF(scan) += NEXT_OFF(n); assert( ( STR_LEN(scan) + STR_LEN(n) ) < 256 ); setSTR_LEN(scan, (U8)(STR_LEN(scan) + STR_LEN(n))); - next = n + NODE_SZ_STR(n); /* Now we can overwrite *n : */ Move(STRING(n), STRING(scan) + oldl, STR_LEN(n), char); #ifdef DEBUGGING @@ -4612,7 +4612,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan, #ifdef DEBUGGING /* Allow dumping but overwriting the collection of skipped * ops and/or strings with fake optimized ops */ - n = scan + NODE_SZ_STR(scan); + n = REGNODE_AFTER_varies(scan); while (n <= stop) { OP(n) = OPTIMIZED; FLAGS(n) = 0; @@ -20656,7 +20656,7 @@ S_optimize_regclass(pTHX_ Copy(low_utf8, /* Add the common bytes */ ((struct regnode_anyofhs *) REGNODE_p(*ret))->string, len, U8); - RExC_emit += NODE_SZ_STR(REGNODE_p(*ret)); + RExC_emit = REGNODE_OFFSET(REGNODE_AFTER_varies(REGNODE_p(*ret))); set_ANYOF_arg(pRExC_state, REGNODE_p(*ret), cp_list, NULL, only_utf8_locale_list); return op; @@ -22940,64 +22940,6 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param) #endif /* USE_ITHREADS */ -#ifndef PERL_IN_XSUB_RE - -/* - - regnext - dig the "next" pointer out of a node - */ -regnode * -Perl_regnext(pTHX_ regnode *p) -{ - I32 offset; - - if (!p) - return(NULL); - - if (OP(p) > REGNODE_MAX) { /* regnode.type is unsigned */ - Perl_croak(aTHX_ "Corrupted regexp opcode %d > %d", - (int)OP(p), (int)REGNODE_MAX); - } - - offset = (PL_regnode_off_by_arg[OP(p)] ? ARG(p) : NEXT_OFF(p)); - if (offset == 0) - return(NULL); - - return(p+offset); -} - -/* - - regnode_after - find the node physically following p in memory, - taking into account the size of p as determined by OP(p), our - sizing data, and possibly the STR_SZ() macro. - */ -regnode * -Perl_regnode_after(pTHX_ const regnode *p) -{ - assert(p); - const U8 op = OP(p); - assert(op < REGNODE_MAX); - const regnode *ret = p + NODE_STEP_REGNODE + PL_regnode_arg_len[op]; - if (PL_regnode_arg_len_varies[op]) - ret += STR_SZ(STR_LEN(p)); - return (regnode *)ret; -} - -/* validate that the passed in node and extra length would match that - * returned by regnode_after() */ -bool -Perl_check_regnode_after(pTHX_ const regnode *p, const STRLEN extra) -{ - const regnode *nextoper = regnode_after((regnode *)p,FALSE); - /* this should be the ONLY place that REGNODE_AFTER_PLUS is used outside - * of regcomp.h */ - const regnode *other= REGNODE_AFTER_PLUS(p, extra); - if (nextoper != other) { - return FALSE; - } - return TRUE; -} -#endif - STATIC void S_re_croak(pTHX_ bool utf8, const char* pat,...) { @@ -23715,7 +23657,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node, if (op == CLOSE || op == SRCLOSE || op == WHILEM) indent--; next = regnext((regnode *)node); - const regnode *after = regnode_after((regnode *)node); + const regnode *after = regnode_after((regnode *)node,0); /* Where, what. */ if (op == OPTIMIZED) { @@ -23819,7 +23761,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node, } else if (PL_regnode_kind[op] == EXACT || op == ANYOFHs) { /* Literal string, where present. */ - node += NODE_SZ_STR(node); /* NOT REGNODE_AFTER! */ + node = (const regnode *)REGNODE_AFTER_varies(node); } else { node = REGNODE_AFTER_opcode(node,op); @@ -406,14 +406,9 @@ struct regnode_ssc { #define OPERAND(p) STRING(p) /* The number of (smallest) regnode equivalents that a string of length l bytes - * occupies */ + * occupies - Used by the REGNODE_AFTER() macros and functions. */ #define STR_SZ(l) (((l) + sizeof(regnode) - 1) / sizeof(regnode)) -/* The number of (smallest) regnode equivalents that the node 'p' which uses - * 'struct regnode_string' occupies. (These are EXACTish nodes and a few - * others.) */ -#define NODE_SZ_STR(p) (STR_SZ(STR_LEN(p)) + 1 + PL_regnode_arg_len[(p)->type]) - #define setSTR_LEN(p,v) \ STMT_START{ \ if (OP(p) == LEXACT || OP(p) == LEXACT_REQ8) \ @@ -428,8 +423,6 @@ struct regnode_ssc { #undef NODE_ALIGN #undef ARG_LOC -#undef REGNODE_AFTER -#undef REGNODE_BEFORE #define NODE_ALIGN(node) #define ARG_LOC(p) (((struct regnode_1 *)p)->arg1) @@ -470,7 +463,7 @@ struct regnode_ssc { * */ #define REGNODE_AFTER_PLUS(p,extra) ((p) + NODE_STEP_REGNODE + (extra)) -/* under DEBUGGING we check that all REGNODE_AFTER style macro did the +/* under DEBUGGING we check that all REGNODE_AFTER optimized macros did the * same thing that Perl_regnode_after() would have done. Note that when * not compiled under DEBUGGING the assert_() macro is empty. Thus we * don't have to implement different versions for DEBUGGING and not DEBUGGING, @@ -479,19 +472,19 @@ struct regnode_ssc { #define REGNODE_AFTER_PLUS_DEBUG(p,extra) \ (assert_(check_regnode_after(p,extra)) REGNODE_AFTER_PLUS((p),(extra))) -/* try to avoid using either of the following two directly. They exist for legacy - * purposes only */ -#define REGNODE_AFTER_plus(p,extra) REGNODE_AFTER_PLUS_DEBUG((p),(extra)) -#define REGNODE_AFTER(p) REGNODE_AFTER_plus((p),0) /* find the regnode after this p by using the opcode we previously extracted * with OP(p) */ -#define REGNODE_AFTER_opcode(p,op) REGNODE_AFTER_plus((p),PL_regnode_arg_len[op]) -/* find the regnode after this p by using OP(p) to find the regnode type of p */ -#define REGNODE_AFTER_dynamic(p) REGNODE_AFTER_opcode((p),OP(p)) +#define REGNODE_AFTER_opcode(p,op) REGNODE_AFTER_PLUS_DEBUG((p),PL_regnode_arg_len[op]) + /* find the regnode after this p by using the size of the struct associated with + * the opcode for p. use this when you *know* that p is pointer to a given type*/ +#define REGNODE_AFTER_type(p,t) REGNODE_AFTER_PLUS_DEBUG((p),EXTRA_SIZE(t)) /* find the regnode after this p by using OP(p) to find the regnode type of p */ -#define REGNODE_AFTER(p) regnode_after(p) +#define REGNODE_AFTER_varies(p) regnode_after(p,TRUE) + +/* find the regnode after this p by using OP(p) to find the regnode type of p */ +#define REGNODE_AFTER(p) regnode_after(p,FALSE) /* REGNODE_BEFORE() is trickier to deal with in terms of validation, execution. @@ -1375,6 +1368,10 @@ typedef enum { # define GET_REGCLASS_AUX_DATA(a,b,c,d,e,f) get_re_gclass_aux_data(a,b,c,d,e,f) #endif +#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) +#include "reginline.h" +#endif + #endif /* PERL_REGCOMP_H_ */ /* diff --git a/reginline.h b/reginline.h new file mode 100644 index 0000000000..1d1d4ef80d --- /dev/null +++ b/reginline.h @@ -0,0 +1,64 @@ + +#ifndef PERL_REGINLINE_H + +/* + - regnext - dig the "next" pointer out of a node + */ +PERL_STATIC_INLINE +regnode * +Perl_regnext(pTHX_ const regnode *p) +{ + I32 offset; + + if (!p) + return(NULL); + + if (OP(p) > REGNODE_MAX) { /* regnode.type is unsigned */ + Perl_croak(aTHX_ "Corrupted regexp opcode %d > %d", + (int)OP(p), (int)REGNODE_MAX); + } + + offset = (PL_regnode_off_by_arg[OP(p)] ? ARG(p) : NEXT_OFF(p)); + if (offset == 0) + return(NULL); + + return(regnode *)(p+offset); +} + +/* + - regnode_after - find the node physically following p in memory, + taking into account the size of p as determined by OP(p), our + sizing data, and possibly the STR_SZ() macro. + */ +PERL_STATIC_INLINE +regnode * +Perl_regnode_after(pTHX_ const regnode *p, const bool varies) +{ + assert(p); + const U8 op = OP(p); + assert(op < REGNODE_MAX); + const regnode *ret = p + NODE_STEP_REGNODE + PL_regnode_arg_len[op]; + if (varies || PL_regnode_arg_len_varies[op]) + ret += STR_SZ(STR_LEN(p)); + return (regnode *)ret; +} + +/* validate that the passed in node and extra length would match that + * returned by regnode_after() */ +PERL_STATIC_INLINE +bool +Perl_check_regnode_after(pTHX_ const regnode *p, const STRLEN extra) +{ + const regnode *nextoper = regnode_after((regnode *)p,FALSE); + const regnode *other = REGNODE_AFTER_PLUS(p, extra); + if (nextoper != other) { + return FALSE; + } + return TRUE; +} + +#define PERL_REGINLINE_H +#endif +/* + * ex: set ts=8 sts=4 sw=4 et: + */ |