summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2013-09-22 20:43:02 -0600
committerKarl Williamson <public@khwilliamson.com>2013-09-24 11:36:19 -0600
commitfb38762fa113a105b623d0eb7681d2cc03b0c161 (patch)
tree856ded20e87ead281fe4e4f6ee428ebed2e09a7f
parent1867fb0b42a19559a61e89d01bda75f34e36b66a (diff)
downloadperl-fb38762fa113a105b623d0eb7681d2cc03b0c161.tar.gz
regcomp.c: Add some static functions
This commit adds some functions that are currently unused, but will be used in a future commit. This commit is essentially to make the differences smaller in that commit, as 'diff' is getting confused and not outputting the logical differences. The functions are added in a block at the beginning of the file to avoid the 'diff' issues. A later white-space only commit will move them to more appropriate positions.
-rw-r--r--embed.fnc17
-rw-r--r--embed.h9
-rw-r--r--proto.h51
-rw-r--r--regcomp.c221
-rw-r--r--regcomp.h7
5 files changed, 305 insertions, 0 deletions
diff --git a/embed.fnc b/embed.fnc
index bb0805f6c4..ec203f9c1f 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -2059,12 +2059,29 @@ Esn |void |ssc_anything |NN const RExC_state_t *pRExC_state \
EsRn |int |ssc_is_anything|NN const regnode_ssc *ssc
Esn |void |ssc_init |NN const RExC_state_t *pRExC_state \
|NN regnode_ssc *ssc
+EsR |int |ssc_is_cp_posixl_init|NN const RExC_state_t *pRExC_state \
+ |NN const regnode_ssc *ssc
Es |void |ssc_and |NN const RExC_state_t *pRExC_state \
|NN regnode_ssc *ssc \
|NN const regnode_ssc *and_with
+Esn |void |ssc_flags_and |NN regnode_ssc *ssc|const U8 and_with
Esn |void |ssc_or |NN const RExC_state_t *pRExC_state \
|NN regnode_ssc *ssc \
|NN const regnode_ssc *or_with
+Es |SV* |get_ANYOF_cp_list_for_ssc \
+ |NN const RExC_state_t *pRExC_state \
+ |NN const regnode_charclass_posixl* const node
+Ei |void |ssc_intersection|NN regnode_ssc *ssc \
+ |NN SV* const invlist|const bool invert_2nd
+Ei |void |ssc_union |NN regnode_ssc *ssc \
+ |NN SV* const invlist|const bool invert_2nd
+Ei |void |ssc_add_range |NN regnode_ssc *ssc \
+ |UV const start|UV const end
+Ei |void |ssc_cp_and |NN regnode_ssc *ssc \
+ |UV const cp
+Ei |void |ssc_clear_locale|NN regnode_ssc *ssc
+Es |void |ssc_finalize |NN RExC_state_t *pRExC_state \
+ |NN regnode_ssc *ssc
Es |SSize_t|study_chunk |NN RExC_state_t *pRExC_state \
|NN regnode **scanp|NN SSize_t *minlenp \
|NN SSize_t *deltap|NN regnode *last \
diff --git a/embed.h b/embed.h
index 45d9f89767..fca8736feb 100644
--- a/embed.h
+++ b/embed.h
@@ -903,6 +903,7 @@
#define alloc_maybe_populate_EXACT(a,b,c,d,e) S_alloc_maybe_populate_EXACT(aTHX_ a,b,c,d,e)
#define compute_EXACTish(a) S_compute_EXACTish(aTHX_ a)
#define could_it_be_a_POSIX_class(a) S_could_it_be_a_POSIX_class(aTHX_ a)
+#define get_ANYOF_cp_list_for_ssc(a,b) S_get_ANYOF_cp_list_for_ssc(aTHX_ a,b)
#define get_invlist_iter_addr(a) S_get_invlist_iter_addr(aTHX_ a)
#define get_invlist_previous_index_addr(a) S_get_invlist_previous_index_addr(aTHX_ a)
#define grok_bslash_N(a,b,c,d,e,f,g) S_grok_bslash_N(aTHX_ a,b,c,d,e,f,g)
@@ -944,11 +945,19 @@
#define regwhite S_regwhite
#define scan_commit(a,b,c,d) S_scan_commit(aTHX_ a,b,c,d)
#define set_ANYOF_arg(a,b,c,d,e,f) S_set_ANYOF_arg(aTHX_ a,b,c,d,e,f)
+#define ssc_add_range(a,b,c) S_ssc_add_range(aTHX_ a,b,c)
#define ssc_and(a,b,c) S_ssc_and(aTHX_ a,b,c)
#define ssc_anything S_ssc_anything
+#define ssc_clear_locale(a) S_ssc_clear_locale(aTHX_ a)
+#define ssc_cp_and(a,b) S_ssc_cp_and(aTHX_ a,b)
+#define ssc_finalize(a,b) S_ssc_finalize(aTHX_ a,b)
+#define ssc_flags_and S_ssc_flags_and
#define ssc_init S_ssc_init
+#define ssc_intersection(a,b,c) S_ssc_intersection(aTHX_ a,b,c)
#define ssc_is_anything S_ssc_is_anything
+#define ssc_is_cp_posixl_init(a,b) S_ssc_is_cp_posixl_init(aTHX_ a,b)
#define ssc_or S_ssc_or
+#define ssc_union(a,b,c) S_ssc_union(aTHX_ a,b,c)
#define study_chunk(a,b,c,d,e,f,g,h,i,j,k) S_study_chunk(aTHX_ a,b,c,d,e,f,g,h,i,j,k)
# endif
# if defined(PERL_IN_REGCOMP_C) || defined (PERL_IN_DUMP_C)
diff --git a/proto.h b/proto.h
index 91949484fb..568cdf733c 100644
--- a/proto.h
+++ b/proto.h
@@ -6557,6 +6557,12 @@ STATIC bool S_could_it_be_a_POSIX_class(pTHX_ RExC_state_t *pRExC_state)
#define PERL_ARGS_ASSERT_COULD_IT_BE_A_POSIX_CLASS \
assert(pRExC_state)
+STATIC SV* S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state, const regnode_charclass_posixl* const node)
+ __attribute__nonnull__(pTHX_1)
+ __attribute__nonnull__(pTHX_2);
+#define PERL_ARGS_ASSERT_GET_ANYOF_CP_LIST_FOR_SSC \
+ assert(pRExC_state); assert(node)
+
PERL_STATIC_INLINE STRLEN* S_get_invlist_iter_addr(pTHX_ SV* invlist)
__attribute__warn_unused_result__
__attribute__nonnull__(pTHX_1);
@@ -6809,6 +6815,11 @@ STATIC void S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state, regnode* cons
#define PERL_ARGS_ASSERT_SET_ANYOF_ARG \
assert(pRExC_state); assert(node)
+PERL_STATIC_INLINE void S_ssc_add_range(pTHX_ regnode_ssc *ssc, UV const start, UV const end)
+ __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_SSC_ADD_RANGE \
+ assert(ssc)
+
STATIC void S_ssc_and(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc, const regnode_ssc *and_with)
__attribute__nonnull__(pTHX_1)
__attribute__nonnull__(pTHX_2)
@@ -6822,18 +6833,52 @@ STATIC void S_ssc_anything(const RExC_state_t *pRExC_state, regnode_ssc *ssc)
#define PERL_ARGS_ASSERT_SSC_ANYTHING \
assert(pRExC_state); assert(ssc)
+PERL_STATIC_INLINE void S_ssc_clear_locale(pTHX_ regnode_ssc *ssc)
+ __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_SSC_CLEAR_LOCALE \
+ assert(ssc)
+
+PERL_STATIC_INLINE void S_ssc_cp_and(pTHX_ regnode_ssc *ssc, UV const cp)
+ __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_SSC_CP_AND \
+ assert(ssc)
+
+STATIC void S_ssc_finalize(pTHX_ RExC_state_t *pRExC_state, regnode_ssc *ssc)
+ __attribute__nonnull__(pTHX_1)
+ __attribute__nonnull__(pTHX_2);
+#define PERL_ARGS_ASSERT_SSC_FINALIZE \
+ assert(pRExC_state); assert(ssc)
+
+STATIC void S_ssc_flags_and(regnode_ssc *ssc, const U8 and_with)
+ __attribute__nonnull__(1);
+#define PERL_ARGS_ASSERT_SSC_FLAGS_AND \
+ assert(ssc)
+
STATIC void S_ssc_init(const RExC_state_t *pRExC_state, regnode_ssc *ssc)
__attribute__nonnull__(1)
__attribute__nonnull__(2);
#define PERL_ARGS_ASSERT_SSC_INIT \
assert(pRExC_state); assert(ssc)
+PERL_STATIC_INLINE void S_ssc_intersection(pTHX_ regnode_ssc *ssc, SV* const invlist, const bool invert_2nd)
+ __attribute__nonnull__(pTHX_1)
+ __attribute__nonnull__(pTHX_2);
+#define PERL_ARGS_ASSERT_SSC_INTERSECTION \
+ assert(ssc); assert(invlist)
+
STATIC int S_ssc_is_anything(const regnode_ssc *ssc)
__attribute__warn_unused_result__
__attribute__nonnull__(1);
#define PERL_ARGS_ASSERT_SSC_IS_ANYTHING \
assert(ssc)
+STATIC int S_ssc_is_cp_posixl_init(pTHX_ const RExC_state_t *pRExC_state, const regnode_ssc *ssc)
+ __attribute__warn_unused_result__
+ __attribute__nonnull__(pTHX_1)
+ __attribute__nonnull__(pTHX_2);
+#define PERL_ARGS_ASSERT_SSC_IS_CP_POSIXL_INIT \
+ assert(pRExC_state); assert(ssc)
+
STATIC void S_ssc_or(const RExC_state_t *pRExC_state, regnode_ssc *ssc, const regnode_ssc *or_with)
__attribute__nonnull__(1)
__attribute__nonnull__(2)
@@ -6841,6 +6886,12 @@ STATIC void S_ssc_or(const RExC_state_t *pRExC_state, regnode_ssc *ssc, const re
#define PERL_ARGS_ASSERT_SSC_OR \
assert(pRExC_state); assert(ssc); assert(or_with)
+PERL_STATIC_INLINE void S_ssc_union(pTHX_ regnode_ssc *ssc, SV* const invlist, const bool invert_2nd)
+ __attribute__nonnull__(pTHX_1)
+ __attribute__nonnull__(pTHX_2);
+#define PERL_ARGS_ASSERT_SSC_UNION \
+ assert(ssc); assert(invlist)
+
STATIC SSize_t S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, SSize_t *minlenp, SSize_t *deltap, regnode *last, struct scan_data_t *data, I32 stopparen, U8* recursed, regnode_ssc *and_withp, U32 flags, U32 depth)
__attribute__nonnull__(pTHX_1)
__attribute__nonnull__(pTHX_2)
diff --git a/regcomp.c b/regcomp.c
index cade9624ea..ec24583f1f 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -259,6 +259,227 @@ struct RExC_state_t {
#define _invlist_intersection_complement_2nd(a, b, output) \
_invlist_intersection_maybe_complement_2nd(a, b, TRUE, output)
+STATIC void
+S_ssc_flags_and(regnode_ssc *ssc, const U8 and_with)
+{
+ /* Take the flags 'and_with' and accumulate them anded into the flags for
+ * the SSC 'ssc'. The non-SSC related flags in 'and_with' are ignored. */
+
+ const U8 ssc_only_flags = ANYOF_FLAGS(ssc) & ~ANYOF_LOCALE_FLAGS;
+
+ PERL_ARGS_ASSERT_SSC_FLAGS_AND;
+
+ /* Use just the SSC-related flags from 'and_with' */
+ ANYOF_FLAGS(ssc) &= (and_with & ANYOF_LOCALE_FLAGS);
+ ANYOF_FLAGS(ssc) |= ssc_only_flags;
+}
+
+STATIC int
+S_ssc_is_cp_posixl_init(pTHX_ const RExC_state_t *pRExC_state,
+ const regnode_ssc *ssc)
+{
+ /* Returns TRUE if the SSC 'ssc' is in its initial state with regard only
+ * to the list of code points matched, and locale posix classes; hence does
+ * not check its flags) */
+
+ UV start, end;
+ bool ret;
+
+ PERL_ARGS_ASSERT_SSC_IS_CP_POSIXL_INIT;
+
+ assert(OP(ssc) == ANYOF_SYNTHETIC);
+
+ invlist_iterinit(ssc->invlist);
+ ret = invlist_iternext(ssc->invlist, &start, &end)
+ && start == 0
+ && end == UV_MAX;
+
+ invlist_iterfinish(ssc->invlist);
+
+ if (! ret) {
+ return FALSE;
+ }
+
+ if (RExC_contains_locale) {
+ if (! (ANYOF_FLAGS(ssc) & ANYOF_LOCALE)
+ || ! (ANYOF_FLAGS(ssc) & ANYOF_POSIXL)
+ || ! ANYOF_POSIXL_TEST_ALL_SET(ssc))
+ {
+ return FALSE;
+ }
+ }
+
+ return TRUE;
+}
+
+STATIC SV*
+S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
+ const regnode_charclass_posixl* const node)
+{
+ /* Returns a mortal inversion list defining which code points are matched
+ * by 'node', which is of type ANYOF. Handles complementing the result if
+ * appropriate. If some code points aren't knowable at this time, the
+ * returned list must, and will, contain every possible code point. */
+
+ SV* invlist = sv_2mortal(_new_invlist(0));
+ unsigned int i;
+ const U32 n = ARG(node);
+
+ PERL_ARGS_ASSERT_GET_ANYOF_CP_LIST_FOR_SSC;
+
+ /* Look at the data structure created by S_set_ANYOF_arg() */
+ if (n != ANYOF_NONBITMAP_EMPTY) {
+ SV * const rv = MUTABLE_SV(RExC_rxi->data->data[n]);
+ AV * const av = MUTABLE_AV(SvRV(rv));
+ SV **const ary = AvARRAY(av);
+ assert(RExC_rxi->data->what[n] == 's');
+
+ if (ary[1] && ary[1] != &PL_sv_undef) { /* Has compile-time swash */
+ invlist = sv_2mortal(invlist_clone(_get_swash_invlist(ary[1])));
+ }
+ else if (ary[0] && ary[0] != &PL_sv_undef) {
+
+ /* Here, no compile-time swash, and there are things that won't be
+ * known until runtime -- we have to assume it could be anything */
+ return _add_range_to_invlist(invlist, 0, UV_MAX);
+ }
+ else {
+
+ /* Here no compile-time swash, and no run-time only data. Use the
+ * node's inversion list */
+ invlist = sv_2mortal(invlist_clone(ary[2]));
+ }
+ }
+
+ /* An ANYOF node contains a bitmap for the first 256 code points, and an
+ * inversion list for the others, but if there are code points that should
+ * match only conditionally on the target string being UTF-8, those are
+ * placed in the inversion list, and not the bitmap. Since there are
+ * circumstances under which they could match, they are included in the
+ * SSC. But if the ANYOF node is to be inverted, we have to exclude them
+ * here, so that when we invert below, the end result actually does include
+ * them. (Think about "\xe0" =~ /[^\xc0]/di;). We have to do this here
+ * before we add the unconditionally matched code points */
+ if (ANYOF_FLAGS(node) & ANYOF_INVERT) {
+ _invlist_intersection_complement_2nd(invlist,
+ PL_UpperLatin1,
+ &invlist);
+ }
+
+ /* Add in the points from the bit map */
+ for (i = 0; i < 256; i++) {
+ if (ANYOF_BITMAP_TEST(node, i)) {
+ invlist = add_cp_to_invlist(invlist, i);
+ }
+ }
+
+ /* If this can match all upper Latin1 code points, have to add them
+ * as well */
+ if (ANYOF_FLAGS(node) & ANYOF_NON_UTF8_LATIN1_ALL) {
+ _invlist_union(invlist, PL_UpperLatin1, &invlist);
+ }
+
+ /* Similarly for these */
+ if (ANYOF_FLAGS(node) & ANYOF_ABOVE_LATIN1_ALL) {
+ invlist = _add_range_to_invlist(invlist, 256, UV_MAX);
+ }
+
+ if (ANYOF_FLAGS(node) & ANYOF_INVERT) {
+ _invlist_invert(invlist);
+ }
+
+ return invlist;
+}
+
+PERL_STATIC_INLINE void
+S_ssc_union(pTHX_ regnode_ssc *ssc, SV* const invlist, const bool invert2nd)
+{
+ PERL_ARGS_ASSERT_SSC_UNION;
+
+ assert(OP(ssc) == ANYOF_SYNTHETIC);
+
+ _invlist_union_maybe_complement_2nd(ssc->invlist,
+ invlist,
+ invert2nd,
+ &ssc->invlist);
+}
+
+PERL_STATIC_INLINE void
+S_ssc_intersection(pTHX_ regnode_ssc *ssc,
+ SV* const invlist,
+ const bool invert2nd)
+{
+ PERL_ARGS_ASSERT_SSC_INTERSECTION;
+
+ assert(OP(ssc) == ANYOF_SYNTHETIC);
+
+ _invlist_intersection_maybe_complement_2nd(ssc->invlist,
+ invlist,
+ invert2nd,
+ &ssc->invlist);
+}
+
+PERL_STATIC_INLINE void
+S_ssc_add_range(pTHX_ regnode_ssc *ssc, const UV start, const UV end)
+{
+ PERL_ARGS_ASSERT_SSC_ADD_RANGE;
+
+ assert(OP(ssc) == ANYOF_SYNTHETIC);
+
+ ssc->invlist = _add_range_to_invlist(ssc->invlist, start, end);
+}
+
+PERL_STATIC_INLINE void
+S_ssc_cp_and(pTHX_ regnode_ssc *ssc, const UV cp)
+{
+ /* AND just the single code point 'cp' into the SSC 'ssc' */
+
+ SV* cp_list = _new_invlist(2);
+
+ PERL_ARGS_ASSERT_SSC_CP_AND;
+
+ assert(OP(ssc) == ANYOF_SYNTHETIC);
+
+ cp_list = add_cp_to_invlist(cp_list, cp);
+ ssc_intersection(ssc, cp_list,
+ FALSE /* Not inverted */
+ );
+ SvREFCNT_dec_NN(cp_list);
+}
+
+PERL_STATIC_INLINE void
+S_ssc_clear_locale(pTHX_ regnode_ssc *ssc)
+{
+ /* Set the SSC 'ssc' to not match any locale things */
+
+ PERL_ARGS_ASSERT_SSC_CLEAR_LOCALE;
+
+ assert(OP(ssc) == ANYOF_SYNTHETIC);
+
+ ANYOF_POSIXL_ZERO(ssc);
+ ANYOF_FLAGS(ssc) &= ~ANYOF_LOCALE_FLAGS;
+}
+
+STATIC void
+S_ssc_finalize(pTHX_ RExC_state_t *pRExC_state, regnode_ssc *ssc)
+{
+ /* The inversion list in the SSC is marked mortal; now we need a more
+ * permanent copy, which is stored the same way that is done in a regular
+ * ANYOF node, with the first 256 code points in a bit map */
+
+ SV* invlist = invlist_clone(ssc->invlist);
+
+ PERL_ARGS_ASSERT_SSC_FINALIZE;
+
+ assert(OP(ssc) == ANYOF_SYNTHETIC);
+
+ populate_ANYOF_from_invlist( (regnode *) ssc, &invlist);
+
+ set_ANYOF_arg(pRExC_state, (regnode *) ssc, invlist, NULL, NULL, FALSE);
+
+ assert(! (ANYOF_FLAGS(ssc) & ANYOF_LOCALE) || RExC_contains_locale);
+}
+
/* About scan_data_t.
During optimisation we recurse through the regexp program performing
diff --git a/regcomp.h b/regcomp.h
index 0eb989fa7a..f0153fc12c 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -365,6 +365,9 @@ struct regnode_ssc {
|ANYOF_LOC_FOLD \
|ANYOF_POSIXL \
|ANYOF_NONBITMAP_NON_UTF8)
+#define ANYOF_LOCALE_FLAGS (ANYOF_LOCALE \
+ |ANYOF_LOC_FOLD \
+ |ANYOF_POSIXL)
/* Character classes for node->classflags of ANYOF */
/* Should be synchronized with a table in regprop() */
@@ -472,6 +475,10 @@ struct regnode_ssc {
&& (((regnode_charclass_posixl*)(p))->classflags))
#define ANYOF_CLASS_TEST_ANY_SET(p) ANYOF_POSIXL_TEST_ANY_SET(p)
+#define ANYOF_POSIXL_TEST_ALL_SET(p) \
+ ((ANYOF_FLAGS(p) & ANYOF_POSIXL) \
+ && ((regnode_charclass_posixl*) (p))->classflags == ((1U << ((ANYOF_POSIXL_MAX) - 1))) - 1)
+
#define ANYOF_POSIXL_OR(source, dest) STMT_START { (dest)->classflags |= (source)->classflags ; } STMT_END
#define ANYOF_CLASS_OR(source, dest) ANYOF_POSIXL_OR((source), (dest))