diff options
author | Karl Williamson <khw@cpan.org> | 2015-09-07 22:18:55 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2015-09-08 10:05:56 -0600 |
commit | 992001bfb28aa89a918dfb566d0413ea40d9b0f5 (patch) | |
tree | 4a3674e24f8dd55b992fab482eb528a14c17b266 | |
parent | 308482c27259302fb2ca8c60b8383609a0e9f314 (diff) | |
download | perl-992001bfb28aa89a918dfb566d0413ea40d9b0f5.tar.gz |
Slightly shorten most regex patterns
A compiled pattern requires a byte for each non-default modifier, like
/i. Previously, the worst case was presumed in allocating the space
(every modifier being non-default). Now, only the actual needed space
is reserved.
-rw-r--r-- | globvar.sym | 1 | ||||
-rw-r--r-- | regcomp.c | 16 |
2 files changed, 9 insertions, 8 deletions
diff --git a/globvar.sym b/globvar.sym index 1183d6742b..2943fc6691 100644 --- a/globvar.sym +++ b/globvar.sym @@ -5,6 +5,7 @@ PL_No PL_Yes PL_bincompat_options +PL_bitcount PL_block_type PL_charclass PL_check @@ -6794,25 +6794,25 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, || ! has_charset); bool has_runon = ((RExC_seen & REG_RUN_ON_COMMENT_SEEN) == REG_RUN_ON_COMMENT_SEEN); - U16 reganch = (U16)((r->extflags & RXf_PMf_STD_PMMOD) + U8 reganch = (U8)((r->extflags & RXf_PMf_STD_PMMOD) >> RXf_PMf_STD_PMMOD_SHIFT); const char *fptr = STD_PAT_MODS; /*"msixn"*/ char *p; - /* Allocate for the worst case, which is all the std flags are turned - * on. If more precision is desired, we could do a population count of - * the flags set. This could be done with a small lookup table, or by - * shifting, masking and adding, or even, when available, assembly - * language for a machine-language population count. - * We never output a minus, as all those are defaults, so are + + /* We output all the necessary flags; we never output a minus, as all + * those are defaults, so are * covered by the caret */ const STRLEN wraplen = plen + has_p + has_runon + has_default /* If needs a caret */ + + PL_bitcount[reganch] /* 1 char for each set standard flag */ /* If needs a character set specifier */ + ((has_charset) ? MAX_CHARSET_NAME_LENGTH : 0) - + (sizeof(STD_PAT_MODS) - 1) + (sizeof("(?:)") - 1); + /* make sure PL_bitcount bounds not exceeded */ + assert(sizeof(STD_PAT_MODS) <= 8); + Newx(p, wraplen + 1, char); /* +1 for the ending NUL */ r->xpv_len_u.xpvlenu_pv = p; if (RExC_utf8) |