diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2001-03-21 03:48:24 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2001-03-21 03:48:24 +0000 |
commit | dc71dc59fcd7e213f3e4a961182792e0d7ceae78 (patch) | |
tree | a63a35bf0b8a62e607f5292f12b535e29a4cf921 /ext/Data/Dumper/Dumper.xs | |
parent | 933fb4e4b6dafdee7ff28c399d8aa8c4737b9819 (diff) | |
download | perl-dc71dc59fcd7e213f3e4a961182792e0d7ceae78.tar.gz |
First stab at making Data::Dumper to grok Unicode.
TODO: tests, EBCDICify.
p4raw-id: //depot/perl@9274
Diffstat (limited to 'ext/Data/Dumper/Dumper.xs')
-rw-r--r-- | ext/Data/Dumper/Dumper.xs | 136 |
1 files changed, 104 insertions, 32 deletions
diff --git a/ext/Data/Dumper/Dumper.xs b/ext/Data/Dumper/Dumper.xs index 25e72b144c..2cb89e57d8 100644 --- a/ext/Data/Dumper/Dumper.xs +++ b/ext/Data/Dumper/Dumper.xs @@ -22,6 +22,7 @@ static I32 num_q (char *s, STRLEN slen); static I32 esc_q (char *dest, char *src, STRLEN slen); +static I32 esc_q_utf8 (SV *sv, char *src, STRLEN slen); static SV *sv_x (pTHX_ SV *sv, char *str, STRLEN len, I32 n); static I32 DD_dump (pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv, AV *postav, I32 *levelp, I32 indent, @@ -96,6 +97,52 @@ esc_q(register char *d, register char *s, register STRLEN slen) return ret; } +static I32 +esc_q_utf8(SV* sv, register char *src, register STRLEN slen) +{ + char *s, *send, *r; + STRLEN grow = 0, j = 1, l; + bool dquote = FALSE; + + /* this will need EBCDICification */ + for (s = src, send = src + slen; s < send; s += UTF8SKIP(s)) { + UV k = utf8_to_uvchr((U8*)s, &l); + + grow += + (*s == '"' || *s == '\\') ? 2 : + (k < 0x80 ? 1 : UNISKIP(k) + 1 + 4); /* 4: \x{} */ + } + sv_grow(sv, SvCUR(sv)+3+grow); /* 3: ""\0 */ + r = SvPVX(sv) + SvCUR(sv); + + for (s = src; s < send; s += UTF8SKIP(s)) { + UV k = utf8_to_uvchr((U8*)s, &l); + + if (*s == '"' || *s == '\\') { + r[j++] = '\\'; + r[j++] = *s; + } + else if (k < 0x80) + r[j++] = k; + else { + r[j++] = '\\'; + r[j++] = 'x'; + r[j++] = '{'; + j += sprintf(r + j, "%x", k); + r[j++] = '}'; + dquote = TRUE; + } + } + if (dquote) + r[0] = r[j++] = '"'; + else + r[0] = r[j++] = '\''; + r[j++] = '\0'; + SvCUR_set(sv, SvCUR(sv) + j); + + return j; +} + /* append a repeated string to an SV */ static SV * sv_x(pTHX_ SV *sv, register char *str, STRLEN len, I32 n) @@ -456,38 +503,59 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv, (void)hv_iterinit((HV*)ival); i = 0; while ((entry = hv_iternext((HV*)ival))) { - char *nkey; + char *nkey = NULL; I32 nticks = 0; + SV* keysv; + STRLEN keylen; + bool do_utf8 = FALSE; if (i) sv_catpvn(retval, ",", 1); i++; - key = hv_iterkey(entry, &klen); - hval = hv_iterval((HV*)ival, entry); - - if (quotekeys || needs_quote(key)) { - nticks = num_q(key, klen); - New(0, nkey, klen+nticks+3, char); - nkey[0] = '\''; - if (nticks) - klen += esc_q(nkey+1, key, klen); - else - (void)Copy(key, nkey+1, klen, char); - nkey[++klen] = '\''; - nkey[++klen] = '\0'; + keysv = hv_iterkeysv(entry); + hval = hv_iterval((HV*)ival, entry); + + do_utf8 = DO_UTF8(keysv); + key = SvPV(keysv, keylen); + klen = keylen; + + if (do_utf8) { + char *okey = SvPVX(retval) + SvCUR(retval); + I32 nlen; + + sv_catsv(retval, totpad); + sv_catsv(retval, ipad); + nlen = esc_q_utf8(retval, key, klen); + + sname = newSVsv(iname); + sv_catpvn(sname, okey, nlen); + sv_catpvn(sname, "}", 1); } else { - New(0, nkey, klen, char); - (void)Copy(key, nkey, klen, char); - } - - sname = newSVsv(iname); - sv_catpvn(sname, nkey, klen); - sv_catpvn(sname, "}", 1); + if (quotekeys || needs_quote(key)) { + nticks = num_q(key, klen); + New(0, nkey, klen+nticks+3, char); + nkey[0] = '\''; + if (nticks) + klen += esc_q(nkey+1, key, klen); + else + (void)Copy(key, nkey+1, klen, char); + nkey[++klen] = '\''; + nkey[++klen] = '\0'; + } + else { + New(0, nkey, klen, char); + (void)Copy(key, nkey, klen, char); + } - sv_catsv(retval, totpad); - sv_catsv(retval, ipad); - sv_catpvn(retval, nkey, klen); + sname = newSVsv(iname); + sv_catpvn(sname, nkey, klen); + sv_catpvn(sname, "}", 1); + + sv_catsv(retval, totpad); + sv_catsv(retval, ipad); + sv_catpvn(retval, nkey, klen); + } sv_catpvn(retval, " => ", 4); if (indent >= 2) { char *extra; @@ -662,14 +730,18 @@ DD_dump(pTHX_ SV *val, char *name, STRLEN namelen, SV *retval, HV *seenhv, } else { c = SvPV(val, i); - sv_grow(retval, SvCUR(retval)+3+2*i); - r = SvPVX(retval)+SvCUR(retval); - r[0] = '\''; - i += esc_q(r+1, c, i); - ++i; - r[i++] = '\''; - r[i] = '\0'; - SvCUR_set(retval, SvCUR(retval)+i); + if (DO_UTF8(val)) + i += esc_q_utf8(retval, c, i); + else { + sv_grow(retval, SvCUR(retval)+3+2*i); /* 3: ""\0 */ + r = SvPVX(retval) + SvCUR(retval); + r[0] = '\''; + i += esc_q(r+1, c, i); + ++i; + r[i++] = '\''; + r[i] = '\0'; + SvCUR_set(retval, SvCUR(retval)+i); + } } } |