summaryrefslogtreecommitdiff
path: root/dist/Data-Dumper/Dumper.xs
diff options
context:
space:
mode:
Diffstat (limited to 'dist/Data-Dumper/Dumper.xs')
-rw-r--r--dist/Data-Dumper/Dumper.xs100
1 files changed, 59 insertions, 41 deletions
diff --git a/dist/Data-Dumper/Dumper.xs b/dist/Data-Dumper/Dumper.xs
index 0bdcbe06f3..12c4ebd9f6 100644
--- a/dist/Data-Dumper/Dumper.xs
+++ b/dist/Data-Dumper/Dumper.xs
@@ -19,7 +19,9 @@
static I32 num_q (const char *s, STRLEN slen);
static I32 esc_q (char *dest, const char *src, STRLEN slen);
static I32 esc_q_utf8 (pTHX_ SV *sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq);
-static I32 needs_quote(const char *s, STRLEN len);
+static bool globname_needs_quote(const char *s, STRLEN len);
+static bool key_needs_quote(const char *s, STRLEN len);
+static bool safe_decimal_number(const char *p, STRLEN len);
static SV *sv_x (pTHX_ SV *sv, const char *str, STRLEN len, I32 n);
static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
HV *seenhv, AV *postav, I32 *levelp, I32 indent,
@@ -91,19 +93,19 @@ Perl_utf8_to_uvchr_buf(pTHX_ U8 *s, U8 *send, STRLEN *retlen)
#define DD_is_integer(sv) SvIOK(sv)
#endif
-/* does a string need to be protected? */
-static I32
-needs_quote(const char *s, STRLEN len)
+/* does a glob name need to be protected? */
+static bool
+globname_needs_quote(const char *s, STRLEN len)
{
const char *send = s+len;
TOP:
if (s[0] == ':') {
if (++s<send) {
if (*s++ != ':')
- return 1;
+ return TRUE;
}
else
- return 1;
+ return TRUE;
}
if (isIDFIRST(*s)) {
while (++s<send)
@@ -111,12 +113,35 @@ TOP:
if (*s == ':')
goto TOP;
else
- return 1;
+ return TRUE;
}
}
else
- return 1;
- return 0;
+ return TRUE;
+
+ return FALSE;
+}
+
+/* does a hash key need to be quoted (to the left of => ).
+ Previously this used (globname_)needs_quote() which accepted strings
+ like '::foo', but these aren't safe as unquoted keys under strict.
+*/
+static bool
+key_needs_quote(const char *s, STRLEN len) {
+ const char *send = s+len;
+
+ if (safe_decimal_number(s, len)) {
+ return FALSE;
+ }
+ else if (isIDFIRST(*s)) {
+ while (++s<send)
+ if (!isWORDCHAR(*s))
+ return TRUE;
+ }
+ else
+ return TRUE;
+
+ return FALSE;
}
/* Check that the SV can be represented as a simple decimal integer.
@@ -124,10 +149,7 @@ TOP:
* The perl code does this by matching against /^(?:0|-?[1-9]\d{0,8})\z/
*/
static bool
-safe_decimal_number(pTHX_ SV *val) {
- STRLEN len;
- const char *p = SvPV(val, len);
-
+safe_decimal_number(const char *p, STRLEN len) {
if (len == 1 && *p == '0')
return TRUE;
@@ -883,28 +905,24 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
sv_catsv(retval, totpad);
sv_catsv(retval, ipad);
- /* old logic was first to check utf8 flag, and if utf8 always
+ /* The (very)
+ old logic was first to check utf8 flag, and if utf8 always
call esc_q_utf8. This caused test to break under -Mutf8,
because there even strings like 'c' have utf8 flag on.
Hence with quotekeys == 0 the XS code would still '' quote
them based on flags, whereas the perl code would not,
based on regexps.
- The perl code is correct.
- needs_quote() decides that anything that isn't a valid
- perl identifier needs to be quoted, hence only correctly
- formed strings with no characters outside [A-Za-z0-9_:]
- won't need quoting. None of those characters are used in
- the byte encoding of utf8, so anything with utf8
- encoded characters in will need quoting. Hence strings
- with utf8 encoded characters in will end up inside do_utf8
- just like before, but now strings with utf8 flag set but
- only ascii characters will end up in the unquoted section.
-
- There should also be less tests for the (probably currently)
- more common doesn't need quoting case.
- The code is also smaller (22044 vs 22260) because I've been
- able to pull the common logic out to both sides. */
- if (quotekeys || needs_quote(key,keylen)) {
+
+ The old logic checked that the string was a valid
+ perl glob name (foo::bar), which isn't safe under
+ strict, and differs from the perl code which only
+ accepts simple identifiers.
+
+ With the fix for [perl #120384] I chose to make
+ their handling of key quoting compatible between XS
+ and perl.
+ */
+ if (quotekeys || key_needs_quote(key,keylen)) {
if (do_utf8 || useqq) {
STRLEN ocur = SvCUR(retval);
nlen = esc_q_utf8(aTHX_ retval, key, klen, do_utf8, useqq);
@@ -1094,7 +1112,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
#endif
i = 0; else i -= 4;
}
- if (needs_quote(c,i)) {
+ if (globname_needs_quote(c,i)) {
#ifdef GvNAMEUTF8
if (GvNAMEUTF8(val)) {
sv_grow(retval, SvCUR(retval)+2);
@@ -1188,18 +1206,18 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
}
#endif
- /* the pure perl and XS non-qq outputs have historically been
- * different in this case, but for useqq, let's try to match
- * the pure perl code.
- * see [perl #74798]
- */
- else if (useqq && safe_decimal_number(aTHX_ val)) {
- sv_catsv(retval, val);
- }
else {
integer_came_from_string:
- c = SvPV(val, i);
- if (DO_UTF8(val) || useqq)
+ c = SvPV(val, i);
+ /* the pure perl and XS non-qq outputs have historically been
+ * different in this case, but for useqq, let's try to match
+ * the pure perl code.
+ * see [perl #74798]
+ */
+ if (useqq && safe_decimal_number(c, i)) {
+ sv_catsv(retval, val);
+ }
+ else if (DO_UTF8(val) || useqq)
i += esc_q_utf8(aTHX_ retval, c, i, DO_UTF8(val), useqq);
else {
sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */