diff options
author | Karl Williamson <public@khwilliamson.com> | 2013-09-22 20:43:02 -0600 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2013-09-24 11:36:19 -0600 |
commit | fb38762fa113a105b623d0eb7681d2cc03b0c161 (patch) | |
tree | 856ded20e87ead281fe4e4f6ee428ebed2e09a7f | |
parent | 1867fb0b42a19559a61e89d01bda75f34e36b66a (diff) | |
download | perl-fb38762fa113a105b623d0eb7681d2cc03b0c161.tar.gz |
regcomp.c: Add some static functions
This commit adds some functions that are currently unused, but will be
used in a future commit. This commit is essentially to make the
differences smaller in that commit, as 'diff' is getting confused and
not outputting the logical differences. The functions are added in a
block at the beginning of the file to avoid the 'diff' issues. A later
white-space only commit will move them to more appropriate positions.
-rw-r--r-- | embed.fnc | 17 | ||||
-rw-r--r-- | embed.h | 9 | ||||
-rw-r--r-- | proto.h | 51 | ||||
-rw-r--r-- | regcomp.c | 221 | ||||
-rw-r--r-- | regcomp.h | 7 |
5 files changed, 305 insertions, 0 deletions
@@ -2059,12 +2059,29 @@ Esn |void |ssc_anything |NN const RExC_state_t *pRExC_state \ EsRn |int |ssc_is_anything|NN const regnode_ssc *ssc Esn |void |ssc_init |NN const RExC_state_t *pRExC_state \ |NN regnode_ssc *ssc +EsR |int |ssc_is_cp_posixl_init|NN const RExC_state_t *pRExC_state \ + |NN const regnode_ssc *ssc Es |void |ssc_and |NN const RExC_state_t *pRExC_state \ |NN regnode_ssc *ssc \ |NN const regnode_ssc *and_with +Esn |void |ssc_flags_and |NN regnode_ssc *ssc|const U8 and_with Esn |void |ssc_or |NN const RExC_state_t *pRExC_state \ |NN regnode_ssc *ssc \ |NN const regnode_ssc *or_with +Es |SV* |get_ANYOF_cp_list_for_ssc \ + |NN const RExC_state_t *pRExC_state \ + |NN const regnode_charclass_posixl* const node +Ei |void |ssc_intersection|NN regnode_ssc *ssc \ + |NN SV* const invlist|const bool invert_2nd +Ei |void |ssc_union |NN regnode_ssc *ssc \ + |NN SV* const invlist|const bool invert_2nd +Ei |void |ssc_add_range |NN regnode_ssc *ssc \ + |UV const start|UV const end +Ei |void |ssc_cp_and |NN regnode_ssc *ssc \ + |UV const cp +Ei |void |ssc_clear_locale|NN regnode_ssc *ssc +Es |void |ssc_finalize |NN RExC_state_t *pRExC_state \ + |NN regnode_ssc *ssc Es |SSize_t|study_chunk |NN RExC_state_t *pRExC_state \ |NN regnode **scanp|NN SSize_t *minlenp \ |NN SSize_t *deltap|NN regnode *last \ @@ -903,6 +903,7 @@ #define alloc_maybe_populate_EXACT(a,b,c,d,e) S_alloc_maybe_populate_EXACT(aTHX_ a,b,c,d,e) #define compute_EXACTish(a) S_compute_EXACTish(aTHX_ a) #define could_it_be_a_POSIX_class(a) S_could_it_be_a_POSIX_class(aTHX_ a) +#define get_ANYOF_cp_list_for_ssc(a,b) S_get_ANYOF_cp_list_for_ssc(aTHX_ a,b) #define get_invlist_iter_addr(a) S_get_invlist_iter_addr(aTHX_ a) #define get_invlist_previous_index_addr(a) S_get_invlist_previous_index_addr(aTHX_ a) #define grok_bslash_N(a,b,c,d,e,f,g) S_grok_bslash_N(aTHX_ a,b,c,d,e,f,g) @@ -944,11 +945,19 @@ #define regwhite S_regwhite #define scan_commit(a,b,c,d) S_scan_commit(aTHX_ a,b,c,d) #define set_ANYOF_arg(a,b,c,d,e,f) S_set_ANYOF_arg(aTHX_ a,b,c,d,e,f) +#define ssc_add_range(a,b,c) S_ssc_add_range(aTHX_ a,b,c) #define ssc_and(a,b,c) S_ssc_and(aTHX_ a,b,c) #define ssc_anything S_ssc_anything +#define ssc_clear_locale(a) S_ssc_clear_locale(aTHX_ a) +#define ssc_cp_and(a,b) S_ssc_cp_and(aTHX_ a,b) +#define ssc_finalize(a,b) S_ssc_finalize(aTHX_ a,b) +#define ssc_flags_and S_ssc_flags_and #define ssc_init S_ssc_init +#define ssc_intersection(a,b,c) S_ssc_intersection(aTHX_ a,b,c) #define ssc_is_anything S_ssc_is_anything +#define ssc_is_cp_posixl_init(a,b) S_ssc_is_cp_posixl_init(aTHX_ a,b) #define ssc_or S_ssc_or +#define ssc_union(a,b,c) S_ssc_union(aTHX_ a,b,c) #define study_chunk(a,b,c,d,e,f,g,h,i,j,k) S_study_chunk(aTHX_ a,b,c,d,e,f,g,h,i,j,k) # endif # if defined(PERL_IN_REGCOMP_C) || defined (PERL_IN_DUMP_C) @@ -6557,6 +6557,12 @@ STATIC bool S_could_it_be_a_POSIX_class(pTHX_ RExC_state_t *pRExC_state) #define PERL_ARGS_ASSERT_COULD_IT_BE_A_POSIX_CLASS \ assert(pRExC_state) +STATIC SV* S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state, const regnode_charclass_posixl* const node) + __attribute__nonnull__(pTHX_1) + __attribute__nonnull__(pTHX_2); +#define PERL_ARGS_ASSERT_GET_ANYOF_CP_LIST_FOR_SSC \ + assert(pRExC_state); assert(node) + PERL_STATIC_INLINE STRLEN* S_get_invlist_iter_addr(pTHX_ SV* invlist) __attribute__warn_unused_result__ __attribute__nonnull__(pTHX_1); @@ -6809,6 +6815,11 @@ STATIC void S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state, regnode* cons #define PERL_ARGS_ASSERT_SET_ANYOF_ARG \ assert(pRExC_state); assert(node) +PERL_STATIC_INLINE void S_ssc_add_range(pTHX_ regnode_ssc *ssc, UV const start, UV const end) + __attribute__nonnull__(pTHX_1); +#define PERL_ARGS_ASSERT_SSC_ADD_RANGE \ + assert(ssc) + STATIC void S_ssc_and(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc, const regnode_ssc *and_with) __attribute__nonnull__(pTHX_1) __attribute__nonnull__(pTHX_2) @@ -6822,18 +6833,52 @@ STATIC void S_ssc_anything(const RExC_state_t *pRExC_state, regnode_ssc *ssc) #define PERL_ARGS_ASSERT_SSC_ANYTHING \ assert(pRExC_state); assert(ssc) +PERL_STATIC_INLINE void S_ssc_clear_locale(pTHX_ regnode_ssc *ssc) + __attribute__nonnull__(pTHX_1); +#define PERL_ARGS_ASSERT_SSC_CLEAR_LOCALE \ + assert(ssc) + +PERL_STATIC_INLINE void S_ssc_cp_and(pTHX_ regnode_ssc *ssc, UV const cp) + __attribute__nonnull__(pTHX_1); +#define PERL_ARGS_ASSERT_SSC_CP_AND \ + assert(ssc) + +STATIC void S_ssc_finalize(pTHX_ RExC_state_t *pRExC_state, regnode_ssc *ssc) + __attribute__nonnull__(pTHX_1) + __attribute__nonnull__(pTHX_2); +#define PERL_ARGS_ASSERT_SSC_FINALIZE \ + assert(pRExC_state); assert(ssc) + +STATIC void S_ssc_flags_and(regnode_ssc *ssc, const U8 and_with) + __attribute__nonnull__(1); +#define PERL_ARGS_ASSERT_SSC_FLAGS_AND \ + assert(ssc) + STATIC void S_ssc_init(const RExC_state_t *pRExC_state, regnode_ssc *ssc) __attribute__nonnull__(1) __attribute__nonnull__(2); #define PERL_ARGS_ASSERT_SSC_INIT \ assert(pRExC_state); assert(ssc) +PERL_STATIC_INLINE void S_ssc_intersection(pTHX_ regnode_ssc *ssc, SV* const invlist, const bool invert_2nd) + __attribute__nonnull__(pTHX_1) + __attribute__nonnull__(pTHX_2); +#define PERL_ARGS_ASSERT_SSC_INTERSECTION \ + assert(ssc); assert(invlist) + STATIC int S_ssc_is_anything(const regnode_ssc *ssc) __attribute__warn_unused_result__ __attribute__nonnull__(1); #define PERL_ARGS_ASSERT_SSC_IS_ANYTHING \ assert(ssc) +STATIC int S_ssc_is_cp_posixl_init(pTHX_ const RExC_state_t *pRExC_state, const regnode_ssc *ssc) + __attribute__warn_unused_result__ + __attribute__nonnull__(pTHX_1) + __attribute__nonnull__(pTHX_2); +#define PERL_ARGS_ASSERT_SSC_IS_CP_POSIXL_INIT \ + assert(pRExC_state); assert(ssc) + STATIC void S_ssc_or(const RExC_state_t *pRExC_state, regnode_ssc *ssc, const regnode_ssc *or_with) __attribute__nonnull__(1) __attribute__nonnull__(2) @@ -6841,6 +6886,12 @@ STATIC void S_ssc_or(const RExC_state_t *pRExC_state, regnode_ssc *ssc, const re #define PERL_ARGS_ASSERT_SSC_OR \ assert(pRExC_state); assert(ssc); assert(or_with) +PERL_STATIC_INLINE void S_ssc_union(pTHX_ regnode_ssc *ssc, SV* const invlist, const bool invert_2nd) + __attribute__nonnull__(pTHX_1) + __attribute__nonnull__(pTHX_2); +#define PERL_ARGS_ASSERT_SSC_UNION \ + assert(ssc); assert(invlist) + STATIC SSize_t S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, SSize_t *minlenp, SSize_t *deltap, regnode *last, struct scan_data_t *data, I32 stopparen, U8* recursed, regnode_ssc *and_withp, U32 flags, U32 depth) __attribute__nonnull__(pTHX_1) __attribute__nonnull__(pTHX_2) @@ -259,6 +259,227 @@ struct RExC_state_t { #define _invlist_intersection_complement_2nd(a, b, output) \ _invlist_intersection_maybe_complement_2nd(a, b, TRUE, output) +STATIC void +S_ssc_flags_and(regnode_ssc *ssc, const U8 and_with) +{ + /* Take the flags 'and_with' and accumulate them anded into the flags for + * the SSC 'ssc'. The non-SSC related flags in 'and_with' are ignored. */ + + const U8 ssc_only_flags = ANYOF_FLAGS(ssc) & ~ANYOF_LOCALE_FLAGS; + + PERL_ARGS_ASSERT_SSC_FLAGS_AND; + + /* Use just the SSC-related flags from 'and_with' */ + ANYOF_FLAGS(ssc) &= (and_with & ANYOF_LOCALE_FLAGS); + ANYOF_FLAGS(ssc) |= ssc_only_flags; +} + +STATIC int +S_ssc_is_cp_posixl_init(pTHX_ const RExC_state_t *pRExC_state, + const regnode_ssc *ssc) +{ + /* Returns TRUE if the SSC 'ssc' is in its initial state with regard only + * to the list of code points matched, and locale posix classes; hence does + * not check its flags) */ + + UV start, end; + bool ret; + + PERL_ARGS_ASSERT_SSC_IS_CP_POSIXL_INIT; + + assert(OP(ssc) == ANYOF_SYNTHETIC); + + invlist_iterinit(ssc->invlist); + ret = invlist_iternext(ssc->invlist, &start, &end) + && start == 0 + && end == UV_MAX; + + invlist_iterfinish(ssc->invlist); + + if (! ret) { + return FALSE; + } + + if (RExC_contains_locale) { + if (! (ANYOF_FLAGS(ssc) & ANYOF_LOCALE) + || ! (ANYOF_FLAGS(ssc) & ANYOF_POSIXL) + || ! ANYOF_POSIXL_TEST_ALL_SET(ssc)) + { + return FALSE; + } + } + + return TRUE; +} + +STATIC SV* +S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state, + const regnode_charclass_posixl* const node) +{ + /* Returns a mortal inversion list defining which code points are matched + * by 'node', which is of type ANYOF. Handles complementing the result if + * appropriate. If some code points aren't knowable at this time, the + * returned list must, and will, contain every possible code point. */ + + SV* invlist = sv_2mortal(_new_invlist(0)); + unsigned int i; + const U32 n = ARG(node); + + PERL_ARGS_ASSERT_GET_ANYOF_CP_LIST_FOR_SSC; + + /* Look at the data structure created by S_set_ANYOF_arg() */ + if (n != ANYOF_NONBITMAP_EMPTY) { + SV * const rv = MUTABLE_SV(RExC_rxi->data->data[n]); + AV * const av = MUTABLE_AV(SvRV(rv)); + SV **const ary = AvARRAY(av); + assert(RExC_rxi->data->what[n] == 's'); + + if (ary[1] && ary[1] != &PL_sv_undef) { /* Has compile-time swash */ + invlist = sv_2mortal(invlist_clone(_get_swash_invlist(ary[1]))); + } + else if (ary[0] && ary[0] != &PL_sv_undef) { + + /* Here, no compile-time swash, and there are things that won't be + * known until runtime -- we have to assume it could be anything */ + return _add_range_to_invlist(invlist, 0, UV_MAX); + } + else { + + /* Here no compile-time swash, and no run-time only data. Use the + * node's inversion list */ + invlist = sv_2mortal(invlist_clone(ary[2])); + } + } + + /* An ANYOF node contains a bitmap for the first 256 code points, and an + * inversion list for the others, but if there are code points that should + * match only conditionally on the target string being UTF-8, those are + * placed in the inversion list, and not the bitmap. Since there are + * circumstances under which they could match, they are included in the + * SSC. But if the ANYOF node is to be inverted, we have to exclude them + * here, so that when we invert below, the end result actually does include + * them. (Think about "\xe0" =~ /[^\xc0]/di;). We have to do this here + * before we add the unconditionally matched code points */ + if (ANYOF_FLAGS(node) & ANYOF_INVERT) { + _invlist_intersection_complement_2nd(invlist, + PL_UpperLatin1, + &invlist); + } + + /* Add in the points from the bit map */ + for (i = 0; i < 256; i++) { + if (ANYOF_BITMAP_TEST(node, i)) { + invlist = add_cp_to_invlist(invlist, i); + } + } + + /* If this can match all upper Latin1 code points, have to add them + * as well */ + if (ANYOF_FLAGS(node) & ANYOF_NON_UTF8_LATIN1_ALL) { + _invlist_union(invlist, PL_UpperLatin1, &invlist); + } + + /* Similarly for these */ + if (ANYOF_FLAGS(node) & ANYOF_ABOVE_LATIN1_ALL) { + invlist = _add_range_to_invlist(invlist, 256, UV_MAX); + } + + if (ANYOF_FLAGS(node) & ANYOF_INVERT) { + _invlist_invert(invlist); + } + + return invlist; +} + +PERL_STATIC_INLINE void +S_ssc_union(pTHX_ regnode_ssc *ssc, SV* const invlist, const bool invert2nd) +{ + PERL_ARGS_ASSERT_SSC_UNION; + + assert(OP(ssc) == ANYOF_SYNTHETIC); + + _invlist_union_maybe_complement_2nd(ssc->invlist, + invlist, + invert2nd, + &ssc->invlist); +} + +PERL_STATIC_INLINE void +S_ssc_intersection(pTHX_ regnode_ssc *ssc, + SV* const invlist, + const bool invert2nd) +{ + PERL_ARGS_ASSERT_SSC_INTERSECTION; + + assert(OP(ssc) == ANYOF_SYNTHETIC); + + _invlist_intersection_maybe_complement_2nd(ssc->invlist, + invlist, + invert2nd, + &ssc->invlist); +} + +PERL_STATIC_INLINE void +S_ssc_add_range(pTHX_ regnode_ssc *ssc, const UV start, const UV end) +{ + PERL_ARGS_ASSERT_SSC_ADD_RANGE; + + assert(OP(ssc) == ANYOF_SYNTHETIC); + + ssc->invlist = _add_range_to_invlist(ssc->invlist, start, end); +} + +PERL_STATIC_INLINE void +S_ssc_cp_and(pTHX_ regnode_ssc *ssc, const UV cp) +{ + /* AND just the single code point 'cp' into the SSC 'ssc' */ + + SV* cp_list = _new_invlist(2); + + PERL_ARGS_ASSERT_SSC_CP_AND; + + assert(OP(ssc) == ANYOF_SYNTHETIC); + + cp_list = add_cp_to_invlist(cp_list, cp); + ssc_intersection(ssc, cp_list, + FALSE /* Not inverted */ + ); + SvREFCNT_dec_NN(cp_list); +} + +PERL_STATIC_INLINE void +S_ssc_clear_locale(pTHX_ regnode_ssc *ssc) +{ + /* Set the SSC 'ssc' to not match any locale things */ + + PERL_ARGS_ASSERT_SSC_CLEAR_LOCALE; + + assert(OP(ssc) == ANYOF_SYNTHETIC); + + ANYOF_POSIXL_ZERO(ssc); + ANYOF_FLAGS(ssc) &= ~ANYOF_LOCALE_FLAGS; +} + +STATIC void +S_ssc_finalize(pTHX_ RExC_state_t *pRExC_state, regnode_ssc *ssc) +{ + /* The inversion list in the SSC is marked mortal; now we need a more + * permanent copy, which is stored the same way that is done in a regular + * ANYOF node, with the first 256 code points in a bit map */ + + SV* invlist = invlist_clone(ssc->invlist); + + PERL_ARGS_ASSERT_SSC_FINALIZE; + + assert(OP(ssc) == ANYOF_SYNTHETIC); + + populate_ANYOF_from_invlist( (regnode *) ssc, &invlist); + + set_ANYOF_arg(pRExC_state, (regnode *) ssc, invlist, NULL, NULL, FALSE); + + assert(! (ANYOF_FLAGS(ssc) & ANYOF_LOCALE) || RExC_contains_locale); +} + /* About scan_data_t. During optimisation we recurse through the regexp program performing @@ -365,6 +365,9 @@ struct regnode_ssc { |ANYOF_LOC_FOLD \ |ANYOF_POSIXL \ |ANYOF_NONBITMAP_NON_UTF8) +#define ANYOF_LOCALE_FLAGS (ANYOF_LOCALE \ + |ANYOF_LOC_FOLD \ + |ANYOF_POSIXL) /* Character classes for node->classflags of ANYOF */ /* Should be synchronized with a table in regprop() */ @@ -472,6 +475,10 @@ struct regnode_ssc { && (((regnode_charclass_posixl*)(p))->classflags)) #define ANYOF_CLASS_TEST_ANY_SET(p) ANYOF_POSIXL_TEST_ANY_SET(p) +#define ANYOF_POSIXL_TEST_ALL_SET(p) \ + ((ANYOF_FLAGS(p) & ANYOF_POSIXL) \ + && ((regnode_charclass_posixl*) (p))->classflags == ((1U << ((ANYOF_POSIXL_MAX) - 1))) - 1) + #define ANYOF_POSIXL_OR(source, dest) STMT_START { (dest)->classflags |= (source)->classflags ; } STMT_END #define ANYOF_CLASS_OR(source, dest) ANYOF_POSIXL_OR((source), (dest)) |