diff options
author | Karl Williamson <public@khwilliamson.com> | 2013-07-27 18:45:18 -0600 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2013-07-30 12:05:47 -0600 |
commit | f202c207b253f1f19a37243471f35742d6bb309c (patch) | |
tree | 6427b46606be4436f4dc5c914d7ffe255ee176b2 /regcomp.c | |
parent | 9a1ec8a9cfbf5741c8f41cdf8d1f5fe0c3600696 (diff) | |
download | perl-f202c207b253f1f19a37243471f35742d6bb309c.tar.gz |
regcomp.c: Debug output clearer ranges
It's not immediately obvious what the character class [!-~] matches.
Better is its equivalent: [\x21-\x7e]. This commit changes the debug
output to be the latter for character class matches, while retaining the
current behavior where it is clear what the range matches, in, e.g.,
[J-R]. Ranges like [A-z] include more than just alphabetics, so they
are now output as [\x41-\x7a]. (Debug output is done, for example, when
the command line option -Dr is specified.)
Diffstat (limited to 'regcomp.c')
-rw-r--r-- | regcomp.c | 30 |
1 files changed, 27 insertions, 3 deletions
@@ -15526,13 +15526,37 @@ S_put_latin1_charclass_innards(pTHX_ SV *sv, char *bitmap) if (rangestart == -1) rangestart = i; } else if (rangestart != -1) { - if (i <= rangestart + 3) + int j = i - 1; + if (i <= rangestart + 3) { /* Individual chars in short ranges */ for (; rangestart < i; rangestart++) put_byte(sv, rangestart); - else { + } + else if ( j > 255 + || ! isALPHANUMERIC(rangestart) + || ! isALPHANUMERIC(j) + || isDIGIT(rangestart) != isDIGIT(j) + || isUPPER(rangestart) != isUPPER(j) + || isLOWER(rangestart) != isLOWER(j) + + /* This final test should get optimized out except + * on EBCDIC platforms, where it causes ranges that + * cross discontinuities like i/j to be shown as hex + * instead of the misleading, e.g. H-K (since that + * range includes more than H, I, J, K). */ + || (j - rangestart) + != NATIVE_TO_ASCII(j) - NATIVE_TO_ASCII(rangestart)) + { + Perl_sv_catpvf(aTHX_ sv, "\\x{%02x}-\\x{%02x}", + rangestart, + (j < 256) ? j : 255); + } + else { /* Here, the ends of the range are both digits, or both + uppercase, or both lowercase; and there's no + discontinuity in the range (which could happen on EBCDIC + platforms) */ put_byte(sv, rangestart); sv_catpvs(sv, "-"); - put_byte(sv, i - 1); + put_byte(sv, j); } rangestart = -1; has_output_anything = TRUE; |