summaryrefslogtreecommitdiff
path: root/sv.c
diff options
context:
space:
mode:
Diffstat (limited to 'sv.c')
-rw-r--r--sv.c102
1 files changed, 93 insertions, 9 deletions
diff --git a/sv.c b/sv.c
index 3bda95ce7a..e7991350e0 100644
--- a/sv.c
+++ b/sv.c
@@ -12027,6 +12027,7 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
#endif
/* we never change this unless USE_LOCALE_NUMERIC */
bool in_lc_numeric = FALSE;
+ SV *tmp_sv = NULL;
PERL_ARGS_ASSERT_SV_VCATPVFN_FLAGS;
PERL_UNUSED_ARG(maybe_tainted);
@@ -12132,6 +12133,8 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
char c; /* the actual format ('d', s' etc) */
+ bool escape_it = FALSE; /* if this is a string should we quote and escape it? */
+
/* echo everything up to the next format specification */
for (q = fmtstart; q < patend && *q != '%'; ++q)
@@ -12505,6 +12508,21 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
}
string:
+ if (escape_it) {
+ U32 flags = PERL_PV_PRETTY_QUOTEDPREFIX;
+ if (is_utf8)
+ flags |= PERL_PV_ESCAPE_UNI;
+
+ if (!tmp_sv) {
+ /* "blah"... where blah might be made up
+ * of characters like \x{1234} */
+ tmp_sv = newSV(1 + (PERL_QUOTEDPREFIX_LEN * 8) + 1 + 3);
+ sv_2mortal(tmp_sv);
+ }
+ pv_pretty(tmp_sv, eptr, elen, PERL_QUOTEDPREFIX_LEN,
+ NULL, NULL, flags);
+ eptr = SvPV_const(tmp_sv, elen);
+ }
if (has_precis && precis < elen)
elen = precis;
break;
@@ -12513,7 +12531,34 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
case 'p':
- /* %p extensions:
+ /* BEGIN NOTE
+ *
+ * We want to extend the C level sprintf format API with
+ * custom formats for specific types (eg SV*) and behavior.
+ * However some C compilers are "sprintf aware" and will
+ * throw compile time exceptions when an illegal sprintf is
+ * encountered, so we can't just add new format letters.
+ *
+ * However it turns out the length argument to the %p format
+ * is more or less useless (the size of a pointer does not
+ * change over time) and is not really used in the C level
+ * code. Accordingly we can map our special behavior to
+ * specific "length" options to the %p format. We hide these
+ * mappings behind defines anyway, so nobody needs to know
+ * that HEKf is actually %2p. This keeps the C compiler
+ * happy while allowing us to add new formats.
+ *
+ * Note the existing logic for which number is used for what
+ * is torturous. All negative values are used for SVf, and
+ * non-negative values have arbitrary meanings with no
+ * structure to them. This may change in the future.
+ *
+ * NEVER use the raw %p values directly. Always use the define
+ * as the underlying mapping may change in the future.
+ *
+ * END NOTE
+ *
+ * %p extensions:
*
* "%...p" is normally treated like "%...x", except that the
* number to print is the SV's address (or a pointer address
@@ -12523,23 +12568,42 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
* extensions. These are currently:
*
* %-p (SVf) Like %s, but gets the string from an SV*
- * arg rather than a char* arg.
+ * arg rather than a char* arg. Use C<SVfARG()>
+ * to set up the argument properly.
* (This was previously %_).
*
- * %-<num>p Ditto but like %.<num>s (i.e. num is max width)
+ * %-<num>p Ditto but like %.<num>s (i.e. num is max
+ * width), there is no escaped and quoted version
+ * of this.
+ *
+ * %1p (PVf_QUOTEDPREFIX). Like raw %s, but it is escaped
+ * and quoted.
+ *
+ * %5p (SVf_QUOTEDPREFIX) Like SVf, but length restricted,
+ * escaped and quoted with pv_pretty. Intended
+ * for error messages.
*
* %2p (HEKf) Like %s, but using the key string in a HEK
+ * %7p (HEKf_QUOTEDPREFIX) ... but escaped and quoted.
*
* %3p (HEKf256) Ditto but like %.256s
+ * %8p (HEKf256_QUOTEDPREFIX) ... but escaped and quoted
*
* %d%lu%4p (UTF8f) A utf8 string. Consumes 3 args:
* (cBOOL(utf8), len, string_buf).
* It's handled by the "case 'd'" branch
* rather than here.
+ * %d%lu%9p (UTF8f_QUOTEDPREFIX) .. but escaped and quoted.
+ *
*
- * %<num>p where num is 1 or > 4: reserved for future
+ * %<num>p where num is > 9: reserved for future
* extensions. Warns, but then is treated as a
* general %p (print hex address) format.
+ *
+ * NOTE: If you add a new magic %p value you will
+ * need to update F<t/porting/diag.t> to be aware of it
+ * on top of adding the various defines and etc. Do not
+ * forget to add it to F<pod/perlguts.pod> as well.
*/
if ( args
@@ -12551,10 +12615,12 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
&& q[-2] != '*'
&& q[-2] != '$'
) {
- if (left) { /* %-p (SVf), %-NNNp */
- if (width) {
+ if (left || width == 5) { /* %-p (SVf), %-NNNp, %5p */
+ if (left && width) {
precis = width;
has_precis = TRUE;
+ } else if (width == 5) {
+ escape_it = TRUE;
}
argsv = MUTABLE_SV(va_arg(*args, void*));
eptr = SvPV_const(argsv, elen);
@@ -12563,7 +12629,9 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
width = 0;
goto string;
}
- else if (width == 2 || width == 3) { /* HEKf, HEKf256 */
+ else if (width == 2 || width == 3 ||
+ width == 7 || width == 8)
+ { /* HEKf, HEKf256, HEKf_QUOTEDPREFIX, HEKf256_QUOTEDPREFIX */
HEK * const hek = va_arg(*args, HEK *);
eptr = HEK_KEY(hek);
elen = HEK_LEN(hek);
@@ -12573,10 +12641,20 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
precis = 256;
has_precis = TRUE;
}
+ if (width > 5)
+ escape_it = TRUE;
+ width = 0;
+ goto string;
+ }
+ else if (width == 1) {
+ eptr = va_arg(*args,char *);
+ elen = strlen(eptr);
+ escape_it = TRUE;
width = 0;
goto string;
}
else if (width) {
+ /* note width=4 or width=9 is handled under %d */
Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL),
"internal %%<num>p might conflict with future printf extensions");
}
@@ -12617,7 +12695,8 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
case 'd':
/* probably just a plain %d, but it might be the start of the
* special UTF8f format, which usually looks something like
- * "%d%lu%4p" (the lu may vary by platform)
+ * "%d%lu%4p" (the lu may vary by platform) or
+ * "%d%lu%9p" for an escaped version.
*/
assert((UTF8f)[0] == 'd');
assert((UTF8f)[1] == '%');
@@ -12626,10 +12705,15 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
&& q == fmtstart + 1 /* plain %d, not %....d */
&& patend >= fmtstart + sizeof(UTF8f) - 1 /* long enough */
&& *q == '%'
- && strnEQ(q + 1, (UTF8f) + 2, sizeof(UTF8f) - 3))
+ && strnEQ(q + 1, (UTF8f) + 2, sizeof(UTF8f) - 5)
+ && q[sizeof(UTF8f)-3] == 'p'
+ && (q[sizeof(UTF8f)-4] == '4' ||
+ q[sizeof(UTF8f)-4] == '9'))
{
/* The argument has already gone through cBOOL, so the cast
is safe. */
+ if (q[sizeof(UTF8f)-4] == '9')
+ escape_it = TRUE;
is_utf8 = (bool)va_arg(*args, int);
elen = va_arg(*args, UV);
/* if utf8 length is larger than 0x7ffff..., then it might