summaryrefslogtreecommitdiff
path: root/regcomp.c
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2013-07-27 18:45:18 -0600
committerKarl Williamson <public@khwilliamson.com>2013-07-30 12:05:47 -0600
commitf202c207b253f1f19a37243471f35742d6bb309c (patch)
tree6427b46606be4436f4dc5c914d7ffe255ee176b2 /regcomp.c
parent9a1ec8a9cfbf5741c8f41cdf8d1f5fe0c3600696 (diff)
downloadperl-f202c207b253f1f19a37243471f35742d6bb309c.tar.gz
regcomp.c: Debug output clearer ranges
It's not immediately obvious what the character class [!-~] matches. Better is its equivalent: [\x21-\x7e]. This commit changes the debug output to be the latter for character class matches, while retaining the current behavior where it is clear what the range matches, in, e.g., [J-R]. Ranges like [A-z] include more than just alphabetics, so they are now output as [\x41-\x7a]. (Debug output is done, for example, when the command line option -Dr is specified.)
Diffstat (limited to 'regcomp.c')
-rw-r--r--regcomp.c30
1 files changed, 27 insertions, 3 deletions
diff --git a/regcomp.c b/regcomp.c
index 72dd943a20..b3c66b8bee 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -15526,13 +15526,37 @@ S_put_latin1_charclass_innards(pTHX_ SV *sv, char *bitmap)
if (rangestart == -1)
rangestart = i;
} else if (rangestart != -1) {
- if (i <= rangestart + 3)
+ int j = i - 1;
+ if (i <= rangestart + 3) { /* Individual chars in short ranges */
for (; rangestart < i; rangestart++)
put_byte(sv, rangestart);
- else {
+ }
+ else if ( j > 255
+ || ! isALPHANUMERIC(rangestart)
+ || ! isALPHANUMERIC(j)
+ || isDIGIT(rangestart) != isDIGIT(j)
+ || isUPPER(rangestart) != isUPPER(j)
+ || isLOWER(rangestart) != isLOWER(j)
+
+ /* This final test should get optimized out except
+ * on EBCDIC platforms, where it causes ranges that
+ * cross discontinuities like i/j to be shown as hex
+ * instead of the misleading, e.g. H-K (since that
+ * range includes more than H, I, J, K). */
+ || (j - rangestart)
+ != NATIVE_TO_ASCII(j) - NATIVE_TO_ASCII(rangestart))
+ {
+ Perl_sv_catpvf(aTHX_ sv, "\\x{%02x}-\\x{%02x}",
+ rangestart,
+ (j < 256) ? j : 255);
+ }
+ else { /* Here, the ends of the range are both digits, or both
+ uppercase, or both lowercase; and there's no
+ discontinuity in the range (which could happen on EBCDIC
+ platforms) */
put_byte(sv, rangestart);
sv_catpvs(sv, "-");
- put_byte(sv, i - 1);
+ put_byte(sv, j);
}
rangestart = -1;
has_output_anything = TRUE;