summaryrefslogtreecommitdiff
path: root/dump.c
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2022-08-01 16:21:12 +0200
committerYves Orton <demerphq@gmail.com>2022-08-25 14:46:33 +0200
commit33ef5d2cdf69f4a7b22ff4c220758689b59b28a8 (patch)
tree82f059df72c6aac72eff8ff761051f69debba20e /dump.c
parent697eaf802a042beb1c1c6f1983a08a147f12eb72 (diff)
downloadperl-33ef5d2cdf69f4a7b22ff4c220758689b59b28a8.tar.gz
sv.c - add a _QUOTEDPREFIX version of SVf, UTF8f, and HEKf for use in error messages.
These new formats are intended to be used in error messages where we want to show the contents of a string without any possible hidden characters not rendering in the error message, and where it would be unreasonable to show every character of the string if it is very long. A good example would be when we want to say that a class name is illegal. Consider: "Foo\0"->thing() should not throw an error message about "Foo" being missing, the fact there is a null in there should be visible to the developer. Similarly if we had ("x" x 1000_000)->thing() we also do not want to throw a 1MB error message as it is generally just unhelpful, a class name that long is almost certainly a mistake. Currently this patch restricts it to showing 256 characters, the first 128 followed by an ellipses followed by the last 128 characters, but the docs are such that we can change that if we wish, I suspect something like 100 would be more reasonable. You can override the define PERL_QUOTEDPREFIX_LEN to a longer value in Configure if you wish. Example usage: other= newSVpvs("Some\0::Thing\n"); sv_catpvf(msg_sv,"%" SVf_QUOTEDPREFIX, SVfARG(other)); Should append "Some\0::Thing\n" to the msg_sv. If it were very long it would have ellipses infixed. The class name "x" x 1_000_000 would show Can't locate object method "non_existent_method" via \ package "x[repeated 128 times]"..."x[repeated 128 times]" \ (perhaps you forgot to load \ "x[repeated 128 times]"..."x[repeated 128 times]"?) at -e line 1. (but obviously as one line with the literal text of the class instead of "[repeated 128 times]") This patch changes a variety of error messages that used to output the full string always. I haven't changed every place that this could happen yet, just the main ones related to method calls, subroutine names and the like.
Diffstat (limited to 'dump.c')
-rw-r--r--dump.c94
1 files changed, 72 insertions, 22 deletions
diff --git a/dump.c b/dump.c
index 232130be39..7272e65ff8 100644
--- a/dump.c
+++ b/dump.c
@@ -110,28 +110,31 @@ will also be escaped.
Normally the SV will be cleared before the escaped string is prepared,
but when C<PERL_PV_ESCAPE_NOCLEAR> is set this will not occur.
-If C<PERL_PV_ESCAPE_UNI> is set then the input string is treated as UTF-8
-if C<PERL_PV_ESCAPE_UNI_DETECT> is set then the input string is scanned
+If C<PERL_PV_ESCAPE_UNI> is set then the input string is treated as UTF-8.
+If C<PERL_PV_ESCAPE_UNI_DETECT> is set then the input string is scanned
using C<is_utf8_string()> to determine if it is UTF-8.
If C<PERL_PV_ESCAPE_ALL> is set then all input chars will be output
-using C<\x01F1> style escapes, otherwise if C<PERL_PV_ESCAPE_NONASCII> is set, only
-non-ASCII chars will be escaped using this style; otherwise, only chars above
-255 will be so escaped; other non printable chars will use octal or
-common escaped patterns like C<\n>.
-Otherwise, if C<PERL_PV_ESCAPE_NOBACKSLASH>
-then all chars below 255 will be treated as printable and
-will be output as literals.
+using C<\x01F1> style escapes, otherwise if C<PERL_PV_ESCAPE_NONASCII>
+is set, only non-ASCII chars will be escaped using this style;
+otherwise, only chars above 255 will be so escaped; other non printable
+chars will use octal or common escaped patterns like C<\n>. Otherwise,
+if C<PERL_PV_ESCAPE_NOBACKSLASH> then all chars below 255 will be
+treated as printable and will be output as literals. The
+C<PERL_PV_ESCAPE_NON_WC> modifies the previous rules to cause word
+chars, unicode or otherwise, to be output as literals, note this uses
+the *unicode* rules for deciding on word characters.
If C<PERL_PV_ESCAPE_FIRSTCHAR> is set then only the first char of the
-string will be escaped, regardless of max. If the output is to be in hex,
-then it will be returned as a plain hex
-sequence. Thus the output will either be a single char,
-an octal escape sequence, a special escape like C<\n> or a hex value.
+string will be escaped, regardless of max. If the output is to be in
+hex, then it will be returned as a plain hex sequence. Thus the output
+will either be a single char, an octal escape sequence, a special escape
+like C<\n> or a hex value.
-If C<PERL_PV_ESCAPE_RE> is set then the escape char used will be a C<"%"> and
-not a C<"\\">. This is because regexes very often contain backslashed
-sequences, whereas C<"%"> is not a particularly common character in patterns.
+If C<PERL_PV_ESCAPE_RE> is set then the escape char used will be a
+C<"%"> and not a C<"\\">. This is because regexes very often contain
+backslashed sequences, whereas C<"%"> is not a particularly common
+character in patterns.
Returns a pointer to the escaped text as held by C<dsv>.
@@ -144,6 +147,7 @@ Returns a pointer to the escaped text as held by C<dsv>.
=for apidoc Amnh||PERL_PV_ESCAPE_RE
=for apidoc Amnh||PERL_PV_ESCAPE_UNI
=for apidoc Amnh||PERL_PV_ESCAPE_UNI_DETECT
+=for apidoc Amnh||PERL_PV_ESCAPE_NON_WC
=cut
@@ -161,7 +165,7 @@ Unused or not for public use
char *
Perl_pv_escape( pTHX_ SV *dsv, char const * const str,
- const STRLEN count, const STRLEN max,
+ const STRLEN count, STRLEN max,
STRLEN * const escaped, U32 flags )
{
@@ -173,13 +177,42 @@ Perl_pv_escape( pTHX_ SV *dsv, char const * const str,
const char esc = (flags & PERL_PV_ESCAPE_RE) ? '%' : '\\';
const char dq = (flags & PERL_PV_ESCAPE_QUOTE) ? '"' : esc;
+ const char *qs;
+ const char *qe;
+
char octbuf[PV_ESCAPE_OCTBUFSIZE] = "%123456789ABCDF";
STRLEN wrote = 0; /* chars written so far */
STRLEN chsize = 0; /* size of data to be written */
STRLEN readsize = 1; /* size of data just read */
- bool isuni= flags & PERL_PV_ESCAPE_UNI ? 1 : 0; /* is this UTF-8 */
+ bool isuni= (flags & PERL_PV_ESCAPE_UNI)
+ ? TRUE : FALSE; /* is this UTF-8 */
const char *pv = str;
const char * const end = pv + count; /* end of string */
+ const char *restart = NULL;
+ STRLEN extra_len = 0;
+ STRLEN tail = 0;
+ if ((flags & PERL_PV_ESCAPE_TRUNC_MIDDLE) && max > 3) {
+ if (flags & PERL_PV_ESCAPE_QUOTE) {
+ qs = qe = "\"";
+ extra_len = 5;
+ } else if (flags & PERL_PV_PRETTY_LTGT) {
+ qs = "<";
+ qe = ">";
+ extra_len = 5;
+ } else {
+ qs = qe = "";
+ extra_len = 3;
+ }
+ tail = max / 2;
+ restart = isuni ? (char *)utf8_hop_back((U8*)end,-tail,(U8*)pv) : end - tail;
+ if (restart > pv) {
+ max -= tail;
+ } else {
+ tail = 0;
+ restart = NULL;
+ }
+ }
+
octbuf[0] = esc;
PERL_ARGS_ASSERT_PV_ESCAPE;
@@ -192,9 +225,10 @@ Perl_pv_escape( pTHX_ SV *dsv, char const * const str,
if ((flags & PERL_PV_ESCAPE_UNI_DETECT) && is_utf8_string((U8*)pv, count))
isuni = 1;
- for ( ; (pv < end && (!max || (wrote < max))) ; pv += readsize ) {
+ for ( ; pv < end ; pv += readsize ) {
const UV u= (isuni) ? utf8_to_uvchr_buf((U8*)pv, (U8*) end, &readsize) : (U8)*pv;
const U8 c = (U8)u;
+ const char *source_buf = octbuf;
if ( ( u > 255 )
|| (flags & PERL_PV_ESCAPE_ALL)
@@ -204,6 +238,11 @@ Perl_pv_escape( pTHX_ SV *dsv, char const * const str,
chsize = my_snprintf( octbuf, PV_ESCAPE_OCTBUFSIZE,
"%" UVxf, u);
else
+ if ((flags & PERL_PV_ESCAPE_NON_WC) && isWORDCHAR_uvchr(u)) {
+ chsize = readsize;
+ source_buf = pv;
+ }
+ else
chsize = my_snprintf( octbuf, PV_ESCAPE_OCTBUFSIZE,
((flags & PERL_PV_ESCAPE_DWIM) && !isuni)
? ( use_uc_hex ? ("%c" PV_BYTE_HEX_UC) : ("%c" PV_BYTE_HEX_LC) )
@@ -251,11 +290,22 @@ Perl_pv_escape( pTHX_ SV *dsv, char const * const str,
chsize = 1;
}
}
- if ( max && (wrote + chsize > max) ) {
- break;
+ if (max && (wrote + chsize > max)) {
+ if (restart) {
+ /* this only happens with PERL_PV_ESCAPE_TRUNC_MIDDLE */
+ if (dsv)
+ Perl_sv_catpvf( aTHX_ dsv,"%s...%s", qe, qs);
+ wrote += extra_len;
+ pv = restart;
+ max = tail;
+ wrote = tail = 0;
+ restart = NULL;
+ } else {
+ break;
+ }
} else if (chsize > 1) {
if (dsv)
- sv_catpvn(dsv, octbuf, chsize);
+ sv_catpvn(dsv, source_buf, chsize);
wrote += chsize;
} else {
/* If PERL_PV_ESCAPE_NOBACKSLASH is set then non-ASCII bytes