diff options
author | Karl Williamson <public@khwilliamson.com> | 2013-01-10 16:42:19 -0700 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2013-01-11 11:50:38 -0700 |
commit | 410299687df9bab6276843b8261158e45e14f988 (patch) | |
tree | fb9b2162b4fab39dc126bb25223091dd2d848cd2 | |
parent | d71b76f689c577372aa8a0b376960b13c24d0a88 (diff) | |
download | perl-410299687df9bab6276843b8261158e45e14f988.tar.gz |
regcomp.c: Add capability for regclass() to return inversion list
This is currently unused, but will have regclass() return an inversion
list instead of a node.
-rw-r--r-- | embed.fnc | 3 | ||||
-rw-r--r-- | embed.h | 2 | ||||
-rw-r--r-- | proto.h | 2 | ||||
-rw-r--r-- | regcomp.c | 44 | ||||
-rw-r--r-- | t/porting/diag.t | 1 |
5 files changed, 43 insertions, 9 deletions
@@ -1962,7 +1962,8 @@ Es |STRLEN |reguni |NN const struct RExC_state_t *pRExC_state \ Es |regnode*|regclass |NN struct RExC_state_t *pRExC_state \ |NN I32 *flagp|U32 depth|const bool stop_at_1 \ |bool allow_multi_fold \ - |const bool silence_non_portable + |const bool silence_non_portable \ + |NULLOK SV** ret_invlist Es |regnode*|reg_node |NN struct RExC_state_t *pRExC_state|U8 op Es |UV |reg_recode |const char value|NN SV **encp Es |regnode*|regpiece |NN struct RExC_state_t *pRExC_state \ @@ -935,7 +935,7 @@ #define reganode(a,b,c) S_reganode(aTHX_ a,b,c) #define regatom(a,b,c) S_regatom(aTHX_ a,b,c) #define regbranch(a,b,c,d) S_regbranch(aTHX_ a,b,c,d) -#define regclass(a,b,c,d,e,f) S_regclass(aTHX_ a,b,c,d,e,f) +#define regclass(a,b,c,d,e,f,g) S_regclass(aTHX_ a,b,c,d,e,f,g) #define reginsert(a,b,c,d) S_reginsert(aTHX_ a,b,c,d) #define regpatws S_regpatws #define regpiece(a,b,c) S_regpiece(aTHX_ a,b,c) @@ -6639,7 +6639,7 @@ STATIC regnode* S_regbranch(pTHX_ struct RExC_state_t *pRExC_state, I32 *flagp, #define PERL_ARGS_ASSERT_REGBRANCH \ assert(pRExC_state); assert(flagp) -STATIC regnode* S_regclass(pTHX_ struct RExC_state_t *pRExC_state, I32 *flagp, U32 depth, const bool stop_at_1, bool allow_multi_fold, const bool silence_non_portable) +STATIC regnode* S_regclass(pTHX_ struct RExC_state_t *pRExC_state, I32 *flagp, U32 depth, const bool stop_at_1, bool allow_multi_fold, const bool silence_non_portable, SV** ret_invlist) __attribute__nonnull__(pTHX_1) __attribute__nonnull__(pTHX_2); #define PERL_ARGS_ASSERT_REGCLASS \ @@ -10108,7 +10108,8 @@ tryagain: ret = regclass(pRExC_state, flagp,depth+1, FALSE, /* means parse the whole char class */ TRUE, /* allow multi-char folds */ - FALSE); /* don't silence non-portable warnings. */ + FALSE, /* don't silence non-portable warnings. */ + NULL); if (*RExC_parse != ']') { RExC_parse = oregcomp_parse; vFAIL("Unmatched ["); @@ -10305,9 +10306,10 @@ tryagain: ret = regclass(pRExC_state, flagp,depth+1, TRUE, /* means just parse this element */ FALSE, /* don't allow multi-char folds */ - FALSE); /* don't silence non-portable warnings. + FALSE, /* don't silence non-portable warnings. It would be a bug if these returned non-portables */ + NULL); RExC_parse--; @@ -11291,7 +11293,7 @@ S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value, SV *free_me, STATIC regnode * S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, const bool stop_at_1, bool allow_multi_folds, - const bool silence_non_portable) + const bool silence_non_portable, SV** ret_invlist) { /* parse a bracketed class specification. Most of these will produce an * ANYOF node; but something like [a] will produce an EXACT node; [aA], an @@ -11607,8 +11609,13 @@ parseit: } /* Here didn't find it. It could be a user-defined - * property that will be available at run-time. Add it - * to the list to look up then */ + * property that will be available at run-time. If we + * accept only compile-time properties, is an error; + * otherwise add it to the list for run-time look up */ + if (ret_invlist) { + RExC_parse = e + 1; + vFAIL3("Property '%.*s' is unknown", (int) n, name); + } Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%s\n", (value == 'p' ? '+' : '!'), name); @@ -11889,6 +11896,18 @@ parseit: * class */ const char *Xname = swash_property_names[classnum]; + /* If returning the inversion list, we can't defer + * getting this until runtime */ + if (ret_invlist && ! PL_utf8_swash_ptrs[classnum]) { + PL_utf8_swash_ptrs[classnum] = + _core_swash_init("utf8", Xname, &PL_sv_undef, + 1, /* binary */ + 0, /* not tr/// */ + NULL, /* No inversion list */ + NULL /* No flags */ + ); + assert(PL_utf8_swash_ptrs[classnum]); + } if ( ! PL_utf8_swash_ptrs[classnum]) { if (namedclass % 2 == 0) { /* A non-complemented class */ @@ -12350,7 +12369,7 @@ parseit: /* If the character class contains only a single element, it may be * optimizable into another node type which is smaller and runs faster. * Check if this is the case for this class */ - if (element_count == 1) { + if (element_count == 1 && ! ret_invlist) { U8 op = END; U8 arg = 0; @@ -12859,6 +12878,19 @@ parseit: invert = FALSE; } + if (ret_invlist) { + *ret_invlist = cp_list; + + /* Discard the generated node */ + if (SIZE_ONLY) { + RExC_size = orig_size; + } + else { + RExC_emit = orig_emit; + } + return END; + } + /* If we didn't do folding, it's because some information isn't available * until runtime; set the run-time fold flag for these. (We don't have to * worry about properties folding, as that is taken care of by the swash diff --git a/t/porting/diag.t b/t/porting/diag.t index d86a870e00..8745044566 100644 --- a/t/porting/diag.t +++ b/t/porting/diag.t @@ -631,6 +631,7 @@ Useless (%sc) - %suse /gc modifier in regex; marked by <-- HERE in m/%s/ Useless use of (?-p) in regex; marked by <-- HERE in m/%s/ Unmatched '%c' in POSIX class in regex; marked by <-- HERE in m/%s/ Unmatched '[' in POSIX class in regex; marked by <-- HERE in m/%s/ +Property '%s' is unknown in regex; marked by <-- HERE in m/%s/ Need exactly 3 octal digits in regex; marked by <-- HERE in m/%s/ Unrecognized escape \%c in character class in regex; marked by <-- HERE in m/%s/ |