summaryrefslogtreecommitdiff
path: root/regcomp.c
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2012-07-20 10:23:14 -0600
committerKarl Williamson <public@khwilliamson.com>2012-07-24 21:13:49 -0600
commit0658cddeb9feb16c427ac50f4000b008516b9958 (patch)
tree6107b53e26cbdce9e42393b8aadf84dd14600772 /regcomp.c
parent3615ea5819e869b314f723b0ce28dfb1d80017ef (diff)
downloadperl-0658cddeb9feb16c427ac50f4000b008516b9958.tar.gz
regcomp.c: Use POSIXA, NPOSIXA
This commit optimizes character classes which are matched under /a or /aa and consist of a single Posix class, into POSIXA or NPOSIXA regop types. For example /[[:word:]]/a. Since [:ascii:] is always ascii-restricted no matter what the charset modifier is, it is always optimized. These nodes should execute somewhat faster than a generic ANYOF node, and are significantly smaller, taking 2 bytes instead of 12. The flags field of the node structure is used to hold an enum indicating which of the 15 Posix classes is being matched.
Diffstat (limited to 'regcomp.c')
-rw-r--r--regcomp.c40
1 files changed, 39 insertions, 1 deletions
diff --git a/regcomp.c b/regcomp.c
index 5a87e9cc61..8f4884499e 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -11124,6 +11124,10 @@ S_add_alternate(pTHX_ AV** alternate_ptr, U8* string, STRLEN len)
* changed since initialization, then there is a run-time definition. */
#define HAS_NONLOCALE_RUNTIME_PROPERTY_DEFINITION (SvCUR(listsv) != initial_listsv_len)
+/* This converts the named class defined in regcomp.h to its equivalent class
+ * number defined in handy.h. */
+#define namedclass_to_classnum(class) ((class) / 2)
+
/*
parse a class specification and produce either an ANYOF node that
matches the pattern or perhaps will be optimized into an EXACTish node
@@ -11865,6 +11869,7 @@ parseit:
* Check if this is the case for this class */
if (element_count == 1) {
U8 op = END;
+ U8 arg = 0;
if (namedclass > OOB_NAMEDCLASS) { /* this is a named class, like \w or
[:digit:] or \p{foo} */
@@ -11942,7 +11947,26 @@ parseit:
op = (invert) ? NVERTWS : VERTWS;
break;
+ case ANYOF_MAX:
+ break;
+ default:
+ /* A generic posix class. All the /a ones can be handled
+ * by the POSIXA opcode. And all are closed under folding
+ * in the ASCII range, so FOLD doesn't matter */
+ if (AT_LEAST_ASCII_RESTRICTED
+ || (! LOC && namedclass == ANYOF_ASCII))
+ {
+ /* The odd numbered ones are the complements of the
+ * next-lower even number one */
+ if (namedclass % 2 == 1) {
+ invert = ! invert;
+ namedclass--;
+ }
+ arg = namedclass_to_classnum(namedclass);
+ op = (invert) ? NPOSIXA : POSIXA;
+ }
+ break;
}
}
else if (value == prevvalue) {
@@ -11994,7 +12018,12 @@ parseit:
ret = reg_node(pRExC_state, op);
- if (PL_regkind[op] == EXACT) {
+ if (PL_regkind[op] == POSIXD) {
+ if (! SIZE_ONLY) {
+ FLAGS(ret) = arg;
+ }
+ }
+ else if (PL_regkind[op] == EXACT) {
alloc_maybe_populate_EXACT(pRExC_state, ret, 0, value);
}
@@ -13543,6 +13572,15 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
}
+ else if (k == POSIXD) {
+ U8 index = FLAGS(o) * 2;
+ if (index > (sizeof(anyofs) / sizeof(anyofs[0]))) {
+ Perl_sv_catpvf(aTHX_ sv, "[illegal type=%d])", index);
+ }
+ else {
+ sv_catpv(sv, anyofs[index]);
+ }
+ }
else if (k == BRANCHJ && (OP(o) == UNLESSM || OP(o) == IFMATCH))
Perl_sv_catpvf(aTHX_ sv, "[%d]", -(o->flags));
#else