diff options
author | Karl Williamson <public@khwilliamson.com> | 2012-07-20 10:23:14 -0600 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2012-07-24 21:13:49 -0600 |
commit | 0658cddeb9feb16c427ac50f4000b008516b9958 (patch) | |
tree | 6107b53e26cbdce9e42393b8aadf84dd14600772 /regcomp.c | |
parent | 3615ea5819e869b314f723b0ce28dfb1d80017ef (diff) | |
download | perl-0658cddeb9feb16c427ac50f4000b008516b9958.tar.gz |
regcomp.c: Use POSIXA, NPOSIXA
This commit optimizes character classes which are matched under /a or
/aa and consist of a single Posix class, into POSIXA or NPOSIXA regop
types. For example /[[:word:]]/a. Since [:ascii:] is always
ascii-restricted no matter what the charset modifier is, it is always
optimized.
These nodes should execute somewhat faster than a generic ANYOF node,
and are significantly smaller, taking 2 bytes instead of 12.
The flags field of the node structure is used to hold an enum indicating
which of the 15 Posix classes is being matched.
Diffstat (limited to 'regcomp.c')
-rw-r--r-- | regcomp.c | 40 |
1 files changed, 39 insertions, 1 deletions
@@ -11124,6 +11124,10 @@ S_add_alternate(pTHX_ AV** alternate_ptr, U8* string, STRLEN len) * changed since initialization, then there is a run-time definition. */ #define HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION (SvCUR(listsv) != initial_listsv_len) +/* This converts the named class defined in regcomp.h to its equivalent class + * number defined in handy.h. */ +#define namedclass_to_classnum(class) ((class) / 2) + /* parse a class specification and produce either an ANYOF node that matches the pattern or perhaps will be optimized into an EXACTish node @@ -11865,6 +11869,7 @@ parseit: * Check if this is the case for this class */ if (element_count == 1) { U8 op = END; + U8 arg = 0; if (namedclass > OOB_NAMEDCLASS) { /* this is a named class, like \w or [:digit:] or \p{foo} */ @@ -11942,7 +11947,26 @@ parseit: op = (invert) ? NVERTWS : VERTWS; break; + case ANYOF_MAX: + break; + default: + /* A generic posix class. All the /a ones can be handled + * by the POSIXA opcode. And all are closed under folding + * in the ASCII range, so FOLD doesn't matter */ + if (AT_LEAST_ASCII_RESTRICTED + || (! LOC && namedclass == ANYOF_ASCII)) + { + /* The odd numbered ones are the complements of the + * next-lower even number one */ + if (namedclass % 2 == 1) { + invert = ! invert; + namedclass--; + } + arg = namedclass_to_classnum(namedclass); + op = (invert) ? NPOSIXA : POSIXA; + } + break; } } else if (value == prevvalue) { @@ -11994,7 +12018,12 @@ parseit: ret = reg_node(pRExC_state, op); - if (PL_regkind[op] == EXACT) { + if (PL_regkind[op] == POSIXD) { + if (! SIZE_ONLY) { + FLAGS(ret) = arg; + } + } + else if (PL_regkind[op] == EXACT) { alloc_maybe_populate_EXACT(pRExC_state, ret, 0, value); } @@ -13543,6 +13572,15 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o) Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]); } + else if (k == POSIXD) { + U8 index = FLAGS(o) * 2; + if (index > (sizeof(anyofs) / sizeof(anyofs[0]))) { + Perl_sv_catpvf(aTHX_ sv, "[illegal type=%d])", index); + } + else { + sv_catpv(sv, anyofs[index]); + } + } else if (k == BRANCHJ && (OP(o) == UNLESSM || OP(o) == IFMATCH)) Perl_sv_catpvf(aTHX_ sv, "[%d]", -(o->flags)); #else |