diff options
author | Felipe Gasper <felipe@felipegasper.com> | 2022-01-28 21:35:34 -0500 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2022-03-04 09:17:54 -0700 |
commit | 8131b14be4bd2c1e114ba08e4a2cbe76db0a4e22 (patch) | |
tree | 642169683c1562fe1cc4d7fe352a6d47ec88686e | |
parent | 8a4f7e019ceeb33732fce1e96ddd60afa7ffd716 (diff) | |
download | perl-8131b14be4bd2c1e114ba08e4a2cbe76db0a4e22.tar.gz |
Make sv_dump() (and Devel::Peek) escape PV contents with hex, not octal.
This improves the readability of SV dumps for those more familiar with
hex than octal--which, in 2022, is probably an outsized majority of
programmers.
-rw-r--r-- | dump.c | 45 | ||||
-rw-r--r-- | ext/Devel-Peek/t/Peek.t | 12 | ||||
-rw-r--r-- | perl.h | 4 | ||||
-rw-r--r-- | pod/perldelta.pod | 7 |
4 files changed, 49 insertions, 19 deletions
@@ -91,6 +91,9 @@ S_append_flags(pTHX_ SV *sv, U32 flags, const struct flag_to_name *start, PERL_PV_ESCAPE_NONASCII | PERL_PV_ESCAPE_DWIM \ | ((utf8) ? PERL_PV_ESCAPE_UNI : 0) ) +#define _pv_display_for_dump(dsv, pv, cur, len, pvlim) \ + _pv_display_flags(aTHX_ dsv, pv, cur, len, pvlim, PERL_PV_ESCAPE_DWIM_ALL_HEX) + /* =for apidoc pv_escape @@ -153,11 +156,21 @@ Unused or not for public use */ #define PV_ESCAPE_OCTBUFSIZE 32 +#define PV_BYTE_HEX_UC "x%02" UVXf +#define PV_BYTE_HEX_LC "x%02" UVxf + char * Perl_pv_escape( pTHX_ SV *dsv, char const * const str, const STRLEN count, const STRLEN max, - STRLEN * const escaped, const U32 flags ) + STRLEN * const escaped, U32 flags ) { + + bool use_uc_hex = false; + if (flags & PERL_PV_ESCAPE_DWIM_ALL_HEX) { + use_uc_hex = true; + flags |= PERL_PV_ESCAPE_DWIM; + } + const char esc = (flags & PERL_PV_ESCAPE_RE) ? '%' : '\\'; const char dq = (flags & PERL_PV_ESCAPE_QUOTE) ? '"' : esc; char octbuf[PV_ESCAPE_OCTBUFSIZE] = "%123456789ABCDF"; @@ -193,7 +206,7 @@ Perl_pv_escape( pTHX_ SV *dsv, char const * const str, else chsize = my_snprintf( octbuf, PV_ESCAPE_OCTBUFSIZE, ((flags & PERL_PV_ESCAPE_DWIM) && !isuni) - ? "%cx%02" UVxf + ? ( use_uc_hex ? ("%c" PV_BYTE_HEX_UC) : ("%c" PV_BYTE_HEX_LC) ) : "%cx{%02" UVxf "}", esc, u); } else if (flags & PERL_PV_ESCAPE_NOBACKSLASH) { @@ -222,9 +235,9 @@ Perl_pv_escape( pTHX_ SV *dsv, char const * const str, chsize = 1; break; default: - if ( (flags & PERL_PV_ESCAPE_DWIM) && c != '\0' ) { + if ( (flags & PERL_PV_ESCAPE_DWIM_ALL_HEX) || ((flags & PERL_PV_ESCAPE_DWIM) && c != '\0') ) { chsize = my_snprintf( octbuf, PV_ESCAPE_OCTBUFSIZE, - isuni ? "%cx{%02" UVxf "}" : "%cx%02" UVxf, + isuni ? "%cx{%02" UVxf "}" : ( use_uc_hex ? ("%c" PV_BYTE_HEX_UC) : ("%c" PV_BYTE_HEX_LC) ), esc, u); } else if ((pv+readsize < end) && isDIGIT((U8)*(pv+readsize))) @@ -345,6 +358,17 @@ Perl_pv_pretty( pTHX_ SV *dsv, char const * const str, const STRLEN count, return SvPVX(dsv); } +STATIC char * +_pv_display_flags(pTHX_ SV *dsv, const char *pv, STRLEN cur, STRLEN len, STRLEN pvlim, I32 pretty_flags) +{ + PERL_ARGS_ASSERT_PV_DISPLAY; + + pv_pretty( dsv, pv, cur, pvlim, NULL, NULL, PERL_PV_PRETTY_DUMP | pretty_flags ); + if (len > cur && pv[cur] == '\0') + sv_catpvs( dsv, "\\0"); + return SvPVX(dsv); +} + /* =for apidoc pv_display @@ -363,12 +387,7 @@ Note that the final string may be up to 7 chars longer than pvlim. char * Perl_pv_display(pTHX_ SV *dsv, const char *pv, STRLEN cur, STRLEN len, STRLEN pvlim) { - PERL_ARGS_ASSERT_PV_DISPLAY; - - pv_pretty( dsv, pv, cur, pvlim, NULL, NULL, PERL_PV_PRETTY_DUMP); - if (len > cur && pv[cur] == '\0') - sv_catpvs( dsv, "\\0"); - return SvPVX(dsv); + return _pv_display_flags(aTHX_ dsv, pv, cur, len, pvlim, 0); } char * @@ -1912,7 +1931,7 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, bo PTR2UV(ptr)); if (SvOOK(sv)) { PerlIO_printf(file, "( %s . ) ", - pv_display(d, ptr - delta, delta, 0, + _pv_display_for_dump(d, ptr - delta, delta, 0, pvlim)); } if (type == SVt_INVLIST) { @@ -1921,7 +1940,7 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, bo _invlist_dump(file, level, " ", sv); } else { - PerlIO_printf(file, "%s", pv_display(d, ptr, SvCUR(sv), + PerlIO_printf(file, "%s", _pv_display_for_dump(d, ptr, SvCUR(sv), re ? 0 : SvLEN(sv), pvlim)); if (SvUTF8(sv)) /* the 6? \x{....} */ @@ -2229,7 +2248,7 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, bo keypv = SvPV_const(keysv, len); elt = HeVAL(he); - Perl_dump_indent(aTHX_ level+1, file, "Elt %s ", pv_display(d, keypv, len, 0, pvlim)); + Perl_dump_indent(aTHX_ level+1, file, "Elt %s ", _pv_display_for_dump(d, keypv, len, 0, pvlim)); if (SvUTF8(keysv)) PerlIO_printf(file, "[UTF8 \"%s\"] ", sv_uni_display(d, keysv, 6 * SvCUR(keysv), UNI_DISPLAY_QQ)); if (HvEITER_get(hv) == he) diff --git a/ext/Devel-Peek/t/Peek.t b/ext/Devel-Peek/t/Peek.t index fd8d0b44d4..634ab55c85 100644 --- a/ext/Devel-Peek/t/Peek.t +++ b/ext/Devel-Peek/t/Peek.t @@ -527,7 +527,7 @@ do_test('string with Unicode', REFCNT = 1 FLAGS = \\((?:PADTMP,)?POK,READONLY,pPOK,UTF8\\) # $] < 5.019003 FLAGS = \\((?:PADTMP,)?POK,(?:IsCOW,)?pPOK,UTF8\\) # $] >=5.019003 - PV = $ADDR "\\\214\\\101\\\0\\\235\\\101"\\\0 \[UTF8 "\\\x\{100\}\\\x\{0\}\\\x\{200\}"\] + PV = $ADDR "\\\\x8C\\\\x41\\x00\\\\x9D\\\\x41"\\\0 \[UTF8 "\\\x\{100\}\\\x\{0\}\\\x\{200\}"\] CUR = 5 LEN = \\d+ COW_REFCNT = 1 # $] < 5.019007 @@ -539,7 +539,7 @@ do_test('string with Unicode', REFCNT = 1 FLAGS = \\((?:PADTMP,)?POK,READONLY,pPOK,UTF8\\) # $] < 5.019003 FLAGS = \\((?:PADTMP,)?POK,(?:IsCOW,)?pPOK,UTF8\\) # $] >=5.019003 - PV = $ADDR "\\\304\\\200\\\0\\\310\\\200"\\\0 \[UTF8 "\\\x\{100\}\\\x\{0\}\\\x\{200\}"\] + PV = $ADDR "\\\\xC4\\\\x80\\\x00\\\\xC8\\\\x80"\\\0 \[UTF8 "\\\x\{100\}\\\x\{0\}\\\x\{200\}"\] CUR = 5 LEN = \\d+ COW_REFCNT = 1 # $] < 5.019007 @@ -561,11 +561,11 @@ do_test('reference to hash containing Unicode', KEYS = 1 FILL = 1 MAX = 7 - Elt "\\\214\\\101" \[UTF8 "\\\x\{100\}"\] HASH = $ADDR + Elt "\\\\x8C\\\\x41" \[UTF8 "\\\x\{100\}"\] HASH = $ADDR SV = PV\\($ADDR\\) at $ADDR REFCNT = 1 FLAGS = \\(POK,(?:IsCOW,)?pPOK,UTF8\\) - PV = $ADDR "\\\235\\\101"\\\0 \[UTF8 "\\\x\{200\}"\] + PV = $ADDR "\\\\x9D\\\\x41"\\\0 \[UTF8 "\\\x\{200\}"\] CUR = 2 LEN = \\d+ COW_REFCNT = 1 # $] < 5.019007 @@ -588,11 +588,11 @@ do_test('reference to hash containing Unicode', KEYS = 1 FILL = 1 MAX = 7 - Elt "\\\304\\\200" \[UTF8 "\\\x\{100\}"\] HASH = $ADDR + Elt "\\\\xC4\\\\x80" \[UTF8 "\\\x\{100\}"\] HASH = $ADDR SV = PV\\($ADDR\\) at $ADDR REFCNT = 1 FLAGS = \\(POK,(?:IsCOW,)?pPOK,UTF8\\) - PV = $ADDR "\\\310\\\200"\\\0 \[UTF8 "\\\x\{200\}"\] + PV = $ADDR "\\\\xC8\\\\x80"\\\0 \[UTF8 "\\\x\{200\}"\] CUR = 2 LEN = \\d+ COW_REFCNT = 1 # $] < 5.019007 @@ -7709,8 +7709,12 @@ Allows one ending \0 #define PERL_PV_PRETTY_NOCLEAR PERL_PV_ESCAPE_NOCLEAR #define PERL_PV_ESCAPE_RE 0x008000 +/* Escape PV with hex, except leave NULs as octal: */ #define PERL_PV_ESCAPE_DWIM 0x010000 +/* Escape PV with all hex, including NUL. */ +#define PERL_PV_ESCAPE_DWIM_ALL_HEX 0x020000 + /* used by pv_display in dump.c*/ #define PERL_PV_PRETTY_DUMP PERL_PV_PRETTY_ELLIPSES|PERL_PV_PRETTY_QUOTE diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 9c7db69c4f..cf427e0f5e 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -364,6 +364,13 @@ well. XXX +=item * + +C<sv_dump> (and L<Devel::Peek>’s C<Dump> function) now escapes high-bit +octets in the PV as hex rather than octal. Since most folks understand hex +more readily than octal, this should make these dumps a bit more legible. +This does B<not> affect any other diagnostic interfaces like C<pv_display>. + =back =head1 Selected Bug Fixes |