summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2015-09-07 22:18:55 -0600
committerKarl Williamson <khw@cpan.org>2015-09-08 10:05:56 -0600
commit992001bfb28aa89a918dfb566d0413ea40d9b0f5 (patch)
tree4a3674e24f8dd55b992fab482eb528a14c17b266
parent308482c27259302fb2ca8c60b8383609a0e9f314 (diff)
downloadperl-992001bfb28aa89a918dfb566d0413ea40d9b0f5.tar.gz
Slightly shorten most regex patterns
A compiled pattern requires a byte for each non-default modifier, like /i. Previously, the worst case was presumed in allocating the space (every modifier being non-default). Now, only the actual needed space is reserved.
-rw-r--r--globvar.sym1
-rw-r--r--regcomp.c16
2 files changed, 9 insertions, 8 deletions
diff --git a/globvar.sym b/globvar.sym
index 1183d6742b..2943fc6691 100644
--- a/globvar.sym
+++ b/globvar.sym
@@ -5,6 +5,7 @@
PL_No
PL_Yes
PL_bincompat_options
+PL_bitcount
PL_block_type
PL_charclass
PL_check
diff --git a/regcomp.c b/regcomp.c
index ccbccf8faa..24af9d7aef 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -6794,25 +6794,25 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
|| ! has_charset);
bool has_runon = ((RExC_seen & REG_RUN_ON_COMMENT_SEEN)
== REG_RUN_ON_COMMENT_SEEN);
- U16 reganch = (U16)((r->extflags & RXf_PMf_STD_PMMOD)
+ U8 reganch = (U8)((r->extflags & RXf_PMf_STD_PMMOD)
>> RXf_PMf_STD_PMMOD_SHIFT);
const char *fptr = STD_PAT_MODS; /*"msixn"*/
char *p;
- /* Allocate for the worst case, which is all the std flags are turned
- * on. If more precision is desired, we could do a population count of
- * the flags set. This could be done with a small lookup table, or by
- * shifting, masking and adding, or even, when available, assembly
- * language for a machine-language population count.
- * We never output a minus, as all those are defaults, so are
+
+ /* We output all the necessary flags; we never output a minus, as all
+ * those are defaults, so are
* covered by the caret */
const STRLEN wraplen = plen + has_p + has_runon
+ has_default /* If needs a caret */
+ + PL_bitcount[reganch] /* 1 char for each set standard flag */
/* If needs a character set specifier */
+ ((has_charset) ? MAX_CHARSET_NAME_LENGTH : 0)
- + (sizeof(STD_PAT_MODS) - 1)
+ (sizeof("(?:)") - 1);
+ /* make sure PL_bitcount bounds not exceeded */
+ assert(sizeof(STD_PAT_MODS) <= 8);
+
Newx(p, wraplen + 1, char); /* +1 for the ending NUL */
r->xpv_len_u.xpvlenu_pv = p;
if (RExC_utf8)