summaryrefslogtreecommitdiff
path: root/sv.c
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2022-08-01 16:21:12 +0200
committerYves Orton <demerphq@gmail.com>2022-08-25 14:46:33 +0200
commit33ef5d2cdf69f4a7b22ff4c220758689b59b28a8 (patch)
tree82f059df72c6aac72eff8ff761051f69debba20e /sv.c
parent697eaf802a042beb1c1c6f1983a08a147f12eb72 (diff)
downloadperl-33ef5d2cdf69f4a7b22ff4c220758689b59b28a8.tar.gz
sv.c - add a _QUOTEDPREFIX version of SVf, UTF8f, and HEKf for use in error messages.
These new formats are intended to be used in error messages where we want to show the contents of a string without any possible hidden characters not rendering in the error message, and where it would be unreasonable to show every character of the string if it is very long. A good example would be when we want to say that a class name is illegal. Consider: "Foo\0"->thing() should not throw an error message about "Foo" being missing, the fact there is a null in there should be visible to the developer. Similarly if we had ("x" x 1000_000)->thing() we also do not want to throw a 1MB error message as it is generally just unhelpful, a class name that long is almost certainly a mistake. Currently this patch restricts it to showing 256 characters, the first 128 followed by an ellipses followed by the last 128 characters, but the docs are such that we can change that if we wish, I suspect something like 100 would be more reasonable. You can override the define PERL_QUOTEDPREFIX_LEN to a longer value in Configure if you wish. Example usage: other= newSVpvs("Some\0::Thing\n"); sv_catpvf(msg_sv,"%" SVf_QUOTEDPREFIX, SVfARG(other)); Should append "Some\0::Thing\n" to the msg_sv. If it were very long it would have ellipses infixed. The class name "x" x 1_000_000 would show Can't locate object method "non_existent_method" via \ package "x[repeated 128 times]"..."x[repeated 128 times]" \ (perhaps you forgot to load \ "x[repeated 128 times]"..."x[repeated 128 times]"?) at -e line 1. (but obviously as one line with the literal text of the class instead of "[repeated 128 times]") This patch changes a variety of error messages that used to output the full string always. I haven't changed every place that this could happen yet, just the main ones related to method calls, subroutine names and the like.
Diffstat (limited to 'sv.c')
-rw-r--r--sv.c102
1 files changed, 93 insertions, 9 deletions
diff --git a/sv.c b/sv.c
index 3bda95ce7a..e7991350e0 100644
--- a/sv.c
+++ b/sv.c
@@ -12027,6 +12027,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
#endif
/* we never change this unless USE_LOCALE_NUMERIC */
bool in_lc_numeric = FALSE;
+ SV *tmp_sv = NULL;
PERL_ARGS_ASSERT_SV_VCATPVFN_FLAGS;
PERL_UNUSED_ARG(maybe_tainted);
@@ -12132,6 +12133,8 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
char c; /* the actual format ('d', s' etc) */
+ bool escape_it = FALSE; /* if this is a string should we quote and escape it? */
+
/* echo everything up to the next format specification */
for (q = fmtstart; q < patend && *q != '%'; ++q)
@@ -12505,6 +12508,21 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
}
string:
+ if (escape_it) {
+ U32 flags = PERL_PV_PRETTY_QUOTEDPREFIX;
+ if (is_utf8)
+ flags |= PERL_PV_ESCAPE_UNI;
+
+ if (!tmp_sv) {
+ /* "blah"... where blah might be made up
+ * of characters like \x{1234} */
+ tmp_sv = newSV(1 + (PERL_QUOTEDPREFIX_LEN * 8) + 1 + 3);
+ sv_2mortal(tmp_sv);
+ }
+ pv_pretty(tmp_sv, eptr, elen, PERL_QUOTEDPREFIX_LEN,
+ NULL, NULL, flags);
+ eptr = SvPV_const(tmp_sv, elen);
+ }
if (has_precis && precis < elen)
elen = precis;
break;
@@ -12513,7 +12531,34 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
case 'p':
- /* %p extensions:
+ /* BEGIN NOTE
+ *
+ * We want to extend the C level sprintf format API with
+ * custom formats for specific types (eg SV*) and behavior.
+ * However some C compilers are "sprintf aware" and will
+ * throw compile time exceptions when an illegal sprintf is
+ * encountered, so we can't just add new format letters.
+ *
+ * However it turns out the length argument to the %p format
+ * is more or less useless (the size of a pointer does not
+ * change over time) and is not really used in the C level
+ * code. Accordingly we can map our special behavior to
+ * specific "length" options to the %p format. We hide these
+ * mappings behind defines anyway, so nobody needs to know
+ * that HEKf is actually %2p. This keeps the C compiler
+ * happy while allowing us to add new formats.
+ *
+ * Note the existing logic for which number is used for what
+ * is torturous. All negative values are used for SVf, and
+ * non-negative values have arbitrary meanings with no
+ * structure to them. This may change in the future.
+ *
+ * NEVER use the raw %p values directly. Always use the define
+ * as the underlying mapping may change in the future.
+ *
+ * END NOTE
+ *
+ * %p extensions:
*
* "%...p" is normally treated like "%...x", except that the
* number to print is the SV's address (or a pointer address
@@ -12523,23 +12568,42 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
* extensions. These are currently:
*
* %-p (SVf) Like %s, but gets the string from an SV*
- * arg rather than a char* arg.
+ * arg rather than a char* arg. Use C<SVfARG()>
+ * to set up the argument properly.
* (This was previously %_).
*
- * %-<num>p Ditto but like %.<num>s (i.e. num is max width)
+ * %-<num>p Ditto but like %.<num>s (i.e. num is max
+ * width), there is no escaped and quoted version
+ * of this.
+ *
+ * %1p (PVf_QUOTEDPREFIX). Like raw %s, but it is escaped
+ * and quoted.
+ *
+ * %5p (SVf_QUOTEDPREFIX) Like SVf, but length restricted,
+ * escaped and quoted with pv_pretty. Intended
+ * for error messages.
*
* %2p (HEKf) Like %s, but using the key string in a HEK
+ * %7p (HEKf_QUOTEDPREFIX) ... but escaped and quoted.
*
* %3p (HEKf256) Ditto but like %.256s
+ * %8p (HEKf256_QUOTEDPREFIX) ... but escaped and quoted
*
* %d%lu%4p (UTF8f) A utf8 string. Consumes 3 args:
* (cBOOL(utf8), len, string_buf).
* It's handled by the "case 'd'" branch
* rather than here.
+ * %d%lu%9p (UTF8f_QUOTEDPREFIX) .. but escaped and quoted.
+ *
*
- * %<num>p where num is 1 or > 4: reserved for future
+ * %<num>p where num is > 9: reserved for future
* extensions. Warns, but then is treated as a
* general %p (print hex address) format.
+ *
+ * NOTE: If you add a new magic %p value you will
+ * need to update F<t/porting/diag.t> to be aware of it
+ * on top of adding the various defines and etc. Do not
+ * forget to add it to F<pod/perlguts.pod> as well.
*/
if ( args
@@ -12551,10 +12615,12 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
&& q[-2] != '*'
&& q[-2] != '$'
) {
- if (left) { /* %-p (SVf), %-NNNp */
- if (width) {
+ if (left || width == 5) { /* %-p (SVf), %-NNNp, %5p */
+ if (left && width) {
precis = width;
has_precis = TRUE;
+ } else if (width == 5) {
+ escape_it = TRUE;
}
argsv = MUTABLE_SV(va_arg(*args, void*));
eptr = SvPV_const(argsv, elen);
@@ -12563,7 +12629,9 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
width = 0;
goto string;
}
- else if (width == 2 || width == 3) { /* HEKf, HEKf256 */
+ else if (width == 2 || width == 3 ||
+ width == 7 || width == 8)
+ { /* HEKf, HEKf256, HEKf_QUOTEDPREFIX, HEKf256_QUOTEDPREFIX */
HEK * const hek = va_arg(*args, HEK *);
eptr = HEK_KEY(hek);
elen = HEK_LEN(hek);
@@ -12573,10 +12641,20 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
precis = 256;
has_precis = TRUE;
}
+ if (width > 5)
+ escape_it = TRUE;
+ width = 0;
+ goto string;
+ }
+ else if (width == 1) {
+ eptr = va_arg(*args,char *);
+ elen = strlen(eptr);
+ escape_it = TRUE;
width = 0;
goto string;
}
else if (width) {
+ /* note width=4 or width=9 is handled under %d */
Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL),
"internal %%<num>p might conflict with future printf extensions");
}
@@ -12617,7 +12695,8 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
case 'd':
/* probably just a plain %d, but it might be the start of the
* special UTF8f format, which usually looks something like
- * "%d%lu%4p" (the lu may vary by platform)
+ * "%d%lu%4p" (the lu may vary by platform) or
+ * "%d%lu%9p" for an escaped version.
*/
assert((UTF8f)[0] == 'd');
assert((UTF8f)[1] == '%');
@@ -12626,10 +12705,15 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
&& q == fmtstart + 1 /* plain %d, not %....d */
&& patend >= fmtstart + sizeof(UTF8f) - 1 /* long enough */
&& *q == '%'
- && strnEQ(q + 1, (UTF8f) + 2, sizeof(UTF8f) - 3))
+ && strnEQ(q + 1, (UTF8f) + 2, sizeof(UTF8f) - 5)
+ && q[sizeof(UTF8f)-3] == 'p'
+ && (q[sizeof(UTF8f)-4] == '4' ||
+ q[sizeof(UTF8f)-4] == '9'))
{
/* The argument has already gone through cBOOL, so the cast
is safe. */
+ if (q[sizeof(UTF8f)-4] == '9')
+ escape_it = TRUE;
is_utf8 = (bool)va_arg(*args, int);
elen = va_arg(*args, UV);
/* if utf8 length is larger than 0x7ffff..., then it might