summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2013-02-11 15:23:40 -0700
committerKarl Williamson <public@khwilliamson.com>2013-07-03 19:21:16 -0600
commit2c3365de8c1168f115576a4976d067e3b911c490 (patch)
tree0d20a3928cee038e457071d03e5fa8404a3a256c
parent533c4e2f08b42d977e5004e823d4849f7473d2d0 (diff)
downloadperl-2c3365de8c1168f115576a4976d067e3b911c490.tar.gz
regcomp.c: Make inversion lists SVt_PVLV
This is the 2nd step in separating the inversion list body from header. This commit gives inversion lists the header from a SVt_PVLV, and repurposes one of its fields into being the length of the inversion list. This is a temporary measure, in case binary compatibility is an issue. Future commits will create a new SV type just for inversion lists. This SV type was chosen because it has a sufficient number of fields to accommodate all the header fields from inversion lists.
-rw-r--r--embed.fnc2
-rw-r--r--inline_invlist.c4
-rw-r--r--proto.h2
-rw-r--r--regcomp.c23
4 files changed, 19 insertions, 12 deletions
diff --git a/embed.fnc b/embed.fnc
index 8ad9a13aeb..d8a5c63783 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1468,7 +1468,7 @@ EXp |SV* |_core_swash_init|NN const char* pkg|NN const char* name \
#endif
#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C)
EXMpR |SV* |_invlist_contents|NN SV* const invlist
-EiMR |UV* |_get_invlist_len_addr |NN SV* invlist
+EiMR |STRLEN*|_get_invlist_len_addr |NN SV* invlist
EiMR |UV |_invlist_len |NN SV* const invlist
EMiR |bool |_invlist_contains_cp|NN SV* const invlist|const UV cp
EXpMR |IV |_invlist_search |NN SV* const invlist|const UV cp
diff --git a/inline_invlist.c b/inline_invlist.c
index b194c0d264..f20f6daae0 100644
--- a/inline_invlist.c
+++ b/inline_invlist.c
@@ -41,7 +41,7 @@
#define ELEMENT_RANGE_MATCHES_INVLIST(i) (! ((i) & 1))
#define PREV_RANGE_MATCHES_INVLIST(i) (! ELEMENT_RANGE_MATCHES_INVLIST(i))
-PERL_STATIC_INLINE UV*
+PERL_STATIC_INLINE STRLEN*
S__get_invlist_len_addr(pTHX_ SV* invlist)
{
/* Return the address of the UV that contains the current number
@@ -49,7 +49,7 @@ S__get_invlist_len_addr(pTHX_ SV* invlist)
PERL_ARGS_ASSERT__GET_INVLIST_LEN_ADDR;
- return (UV *) (SvPVX(invlist) + (INVLIST_LEN_OFFSET * sizeof (UV)));
+ return &(LvTARGLEN(invlist));
}
PERL_STATIC_INLINE UV
diff --git a/proto.h b/proto.h
index f5dc0dceeb..e4e43e177f 100644
--- a/proto.h
+++ b/proto.h
@@ -6743,7 +6743,7 @@ STATIC I32 S_study_chunk(pTHX_ struct RExC_state_t *pRExC_state, regnode **scanp
#endif
#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C)
-PERL_STATIC_INLINE UV* S__get_invlist_len_addr(pTHX_ SV* invlist)
+PERL_STATIC_INLINE STRLEN* S__get_invlist_len_addr(pTHX_ SV* invlist)
__attribute__warn_unused_result__
__attribute__nonnull__(pTHX_1);
#define PERL_ARGS_ASSERT__GET_INVLIST_LEN_ADDR \
diff --git a/regcomp.c b/regcomp.c
index b2bf63c117..0ee7bc5b6b 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -7027,10 +7027,13 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
* interfaces are highly subject to change, so as much as possible is static to
* this file. An inversion list is here implemented as a malloc'd C UV array
* with some added info that is placed as UVs at the beginning in a header
- * portion. An inversion list for Unicode is an array of code points, sorted
- * by ordinal number. The zeroth element is the first code point in the list.
- * The 1th element is the first element beyond that not in the list. In other
- * words, the first range is
+ * portion. Currently it is a SVt_PVLV, with some of the header fields from
+ * that repurposed for uses here.
+ *
+ * An inversion list for Unicode is an array of code points, sorted by ordinal
+ * number. The zeroth element is the first code point in the list. The 1th
+ * element is the first element beyond that not in the list. In other words,
+ * the first range is
* invlist[0]..(invlist[1]-1)
* The other ranges follow. Thus every element whose index is divisible by two
* marks the beginning of a range that is in the list, and every element not
@@ -7127,7 +7130,7 @@ S_invlist_set_len(pTHX_ SV* const invlist, const UV len)
*_get_invlist_len_addr(invlist) = len;
- assert(len <= SvLEN(invlist));
+ assert(SvLEN(invlist) == 0 || len <= SvLEN(invlist));
SvCUR_set(invlist, TO_INTERNAL_SIZE(len));
/* Note that when inverting, SvCUR shouldn't change */
@@ -7207,7 +7210,9 @@ Perl__new_invlist(pTHX_ IV initial_size)
}
/* Allocate the initial space */
- new_list = newSV(TO_INTERNAL_SIZE(initial_size));
+ new_list = newSV_type(SVt_PVLV);
+ SvGROW(new_list, TO_INTERNAL_SIZE(initial_size) + 1); /* 1 is for trailing
+ NUL */
invlist_set_len(new_list, 0);
/* Force iterinit() to be used to get iteration to work */
@@ -7237,7 +7242,7 @@ S__new_invlist_C_array(pTHX_ UV* list)
* form, including internal fields. Thus this is a dangerous routine that
* should not be used in the wrong hands */
- SV* invlist = newSV_type(SVt_PV);
+ SV* invlist = newSV_type(SVt_PVLV);
PERL_ARGS_ASSERT__NEW_INVLIST_C_ARRAY;
@@ -7249,6 +7254,7 @@ S__new_invlist_C_array(pTHX_ UV* list)
if (*get_invlist_version_id_addr(invlist) != INVLIST_VERSION_ID) {
Perl_croak(aTHX_ "panic: Incorrect version for previously generated inversion list");
}
+ invlist_set_len(invlist, list[INVLIST_LEN_OFFSET]);
/* Initialize the iteration pointer.
* XXX This could be done at compile time in charclass_invlists.h, but I
@@ -8043,7 +8049,7 @@ Perl__invlist_invert(pTHX_ SV* const invlist)
* have a zero; removes it otherwise. As described above, the data
* structure is set up so that this is very efficient */
- UV* len_pos = _get_invlist_len_addr(invlist);
+ STRLEN* len_pos = _get_invlist_len_addr(invlist);
PERL_ARGS_ASSERT__INVLIST_INVERT;
@@ -8120,6 +8126,7 @@ S_invlist_clone(pTHX_ SV* const invlist)
PERL_ARGS_ASSERT_INVLIST_CLONE;
SvCUR_set(new_invlist, length); /* This isn't done automatically */
+ invlist_set_len(new_invlist, _invlist_len(invlist));
Copy(SvPVX(invlist), SvPVX(new_invlist), length, char);
return new_invlist;