summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2013-01-10 16:42:19 -0700
committerKarl Williamson <public@khwilliamson.com>2013-01-11 11:50:38 -0700
commit410299687df9bab6276843b8261158e45e14f988 (patch)
treefb9b2162b4fab39dc126bb25223091dd2d848cd2
parentd71b76f689c577372aa8a0b376960b13c24d0a88 (diff)
downloadperl-410299687df9bab6276843b8261158e45e14f988.tar.gz
regcomp.c: Add capability for regclass() to return inversion list
This is currently unused, but will have regclass() return an inversion list instead of a node.
-rw-r--r--embed.fnc3
-rw-r--r--embed.h2
-rw-r--r--proto.h2
-rw-r--r--regcomp.c44
-rw-r--r--t/porting/diag.t1
5 files changed, 43 insertions, 9 deletions
diff --git a/embed.fnc b/embed.fnc
index 88a244469c..2972b6d359 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1962,7 +1962,8 @@ Es |STRLEN |reguni |NN const struct RExC_state_t *pRExC_state \
Es |regnode*|regclass |NN struct RExC_state_t *pRExC_state \
|NN I32 *flagp|U32 depth|const bool stop_at_1 \
|bool allow_multi_fold \
- |const bool silence_non_portable
+ |const bool silence_non_portable \
+ |NULLOK SV** ret_invlist
Es |regnode*|reg_node |NN struct RExC_state_t *pRExC_state|U8 op
Es |UV |reg_recode |const char value|NN SV **encp
Es |regnode*|regpiece |NN struct RExC_state_t *pRExC_state \
diff --git a/embed.h b/embed.h
index eff78d4dbd..9a5b636bdf 100644
--- a/embed.h
+++ b/embed.h
@@ -935,7 +935,7 @@
#define reganode(a,b,c) S_reganode(aTHX_ a,b,c)
#define regatom(a,b,c) S_regatom(aTHX_ a,b,c)
#define regbranch(a,b,c,d) S_regbranch(aTHX_ a,b,c,d)
-#define regclass(a,b,c,d,e,f) S_regclass(aTHX_ a,b,c,d,e,f)
+#define regclass(a,b,c,d,e,f,g) S_regclass(aTHX_ a,b,c,d,e,f,g)
#define reginsert(a,b,c,d) S_reginsert(aTHX_ a,b,c,d)
#define regpatws S_regpatws
#define regpiece(a,b,c) S_regpiece(aTHX_ a,b,c)
diff --git a/proto.h b/proto.h
index 6816e56171..0cab673c77 100644
--- a/proto.h
+++ b/proto.h
@@ -6639,7 +6639,7 @@ STATIC regnode* S_regbranch(pTHX_ struct RExC_state_t *pRExC_state, I32 *flagp,
#define PERL_ARGS_ASSERT_REGBRANCH \
assert(pRExC_state); assert(flagp)
-STATIC regnode* S_regclass(pTHX_ struct RExC_state_t *pRExC_state, I32 *flagp, U32 depth, const bool stop_at_1, bool allow_multi_fold, const bool silence_non_portable)
+STATIC regnode* S_regclass(pTHX_ struct RExC_state_t *pRExC_state, I32 *flagp, U32 depth, const bool stop_at_1, bool allow_multi_fold, const bool silence_non_portable, SV** ret_invlist)
__attribute__nonnull__(pTHX_1)
__attribute__nonnull__(pTHX_2);
#define PERL_ARGS_ASSERT_REGCLASS \
diff --git a/regcomp.c b/regcomp.c
index 3d1d4ce257..bc4891581b 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -10108,7 +10108,8 @@ tryagain:
ret = regclass(pRExC_state, flagp,depth+1,
FALSE, /* means parse the whole char class */
TRUE, /* allow multi-char folds */
- FALSE); /* don't silence non-portable warnings. */
+ FALSE, /* don't silence non-portable warnings. */
+ NULL);
if (*RExC_parse != ']') {
RExC_parse = oregcomp_parse;
vFAIL("Unmatched [");
@@ -10305,9 +10306,10 @@ tryagain:
ret = regclass(pRExC_state, flagp,depth+1,
TRUE, /* means just parse this element */
FALSE, /* don't allow multi-char folds */
- FALSE); /* don't silence non-portable warnings.
+ FALSE, /* don't silence non-portable warnings.
It would be a bug if these returned
non-portables */
+ NULL);
RExC_parse--;
@@ -11291,7 +11293,7 @@ S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value, SV *free_me,
STATIC regnode *
S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
const bool stop_at_1, bool allow_multi_folds,
- const bool silence_non_portable)
+ const bool silence_non_portable, SV** ret_invlist)
{
/* parse a bracketed class specification. Most of these will produce an
* ANYOF node; but something like [a] will produce an EXACT node; [aA], an
@@ -11607,8 +11609,13 @@ parseit:
}
/* Here didn't find it. It could be a user-defined
- * property that will be available at run-time. Add it
- * to the list to look up then */
+ * property that will be available at run-time. If we
+ * accept only compile-time properties, is an error;
+ * otherwise add it to the list for run-time look up */
+ if (ret_invlist) {
+ RExC_parse = e + 1;
+ vFAIL3("Property '%.*s' is unknown", (int) n, name);
+ }
Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%s\n",
(value == 'p' ? '+' : '!'),
name);
@@ -11889,6 +11896,18 @@ parseit:
* class */
const char *Xname = swash_property_names[classnum];
+ /* If returning the inversion list, we can't defer
+ * getting this until runtime */
+ if (ret_invlist && ! PL_utf8_swash_ptrs[classnum]) {
+ PL_utf8_swash_ptrs[classnum] =
+ _core_swash_init("utf8", Xname, &PL_sv_undef,
+ 1, /* binary */
+ 0, /* not tr/// */
+ NULL, /* No inversion list */
+ NULL /* No flags */
+ );
+ assert(PL_utf8_swash_ptrs[classnum]);
+ }
if ( ! PL_utf8_swash_ptrs[classnum]) {
if (namedclass % 2 == 0) { /* A non-complemented
class */
@@ -12350,7 +12369,7 @@ parseit:
/* If the character class contains only a single element, it may be
* optimizable into another node type which is smaller and runs faster.
* Check if this is the case for this class */
- if (element_count == 1) {
+ if (element_count == 1 && ! ret_invlist) {
U8 op = END;
U8 arg = 0;
@@ -12859,6 +12878,19 @@ parseit:
invert = FALSE;
}
+ if (ret_invlist) {
+ *ret_invlist = cp_list;
+
+ /* Discard the generated node */
+ if (SIZE_ONLY) {
+ RExC_size = orig_size;
+ }
+ else {
+ RExC_emit = orig_emit;
+ }
+ return END;
+ }
+
/* If we didn't do folding, it's because some information isn't available
* until runtime; set the run-time fold flag for these. (We don't have to
* worry about properties folding, as that is taken care of by the swash
diff --git a/t/porting/diag.t b/t/porting/diag.t
index d86a870e00..8745044566 100644
--- a/t/porting/diag.t
+++ b/t/porting/diag.t
@@ -631,6 +631,7 @@ Useless (%sc) - %suse /gc modifier in regex; marked by <-- HERE in m/%s/
Useless use of (?-p) in regex; marked by <-- HERE in m/%s/
Unmatched '%c' in POSIX class in regex; marked by <-- HERE in m/%s/
Unmatched '[' in POSIX class in regex; marked by <-- HERE in m/%s/
+Property '%s' is unknown in regex; marked by <-- HERE in m/%s/
Need exactly 3 octal digits in regex; marked by <-- HERE in m/%s/
Unrecognized escape \%c in character class in regex; marked by <-- HERE in m/%s/