summaryrefslogtreecommitdiff
path: root/regcomp.c
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2020-02-12 09:26:22 -0700
committerSawyer X <xsawyerx@cpan.org>2020-05-27 11:09:54 +0300
commitda3350397f4030c098c43eb8ce4ea964e886d2eb (patch)
treeeffb16b028366c320a2ef90de7c5116da5ccc49a /regcomp.c
parent28bfed14e7471c9d91c7ca03b095d6b8899c85ab (diff)
downloadperl-da3350397f4030c098c43eb8ce4ea964e886d2eb.tar.gz
regcomp.c: Improve dump ANYOFR patterns
On ASCII platforms, where its easy to calculate, when dumping a pattern, don't output the lowest first UTF-8 byte when the entire range is ASCII. The info about this minimum byte is carried in the node, but is ignored unless the pattern is UTF-8, and in the case of UTF-8 invariant characters gives no extra help. The information is quite useful for large code points, so we can quickly rule out large swaths of potential matches without having to convert the target UTF-8 string to code point format. But for ASCII matches it isn't helpful and dumping it is just extra noise.
Diffstat (limited to 'regcomp.c')
-rw-r--r--regcomp.c13
1 files changed, 9 insertions, 4 deletions
diff --git a/regcomp.c b/regcomp.c
index 01d96ec269..203dbdc2b7 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -21421,11 +21421,16 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
: (OP(o) == ANYOFH || OP(o) == ANYOFR)
? 0xFF
: lowest;
- Perl_sv_catpvf(aTHX_ sv, " (First UTF-8 byte=%02X", lowest);
- if (lowest != highest) {
- Perl_sv_catpvf(aTHX_ sv, "-%02X", highest);
+#ifndef EBCDIC
+ if (OP(o) != ANYOFR || ! isASCII(ANYOFRbase(o) + ANYOFRdelta(o)))
+#endif
+ {
+ Perl_sv_catpvf(aTHX_ sv, " (First UTF-8 byte=%02X", lowest);
+ if (lowest != highest) {
+ Perl_sv_catpvf(aTHX_ sv, "-%02X", highest);
+ }
+ Perl_sv_catpvf(aTHX_ sv, ")");
}
- Perl_sv_catpvf(aTHX_ sv, ")");
}
SvREFCNT_dec(unresolved);