summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2002-01-07 04:44:05 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2002-01-07 04:44:05 +0000
commitc728cb41064fc29351e896c61d746060cee08d64 (patch)
tree3fa8c5e0794612f27141bbbd29c9e8ad76d6ffa0
parent108481f44f73c82d1f291486a8da7654b4d58fe6 (diff)
downloadperl-c728cb41064fc29351e896c61d746060cee08d64.tar.gz
More regex and utf8 debug dumping.
p4raw-id: //depot/perl@14114
-rw-r--r--dump.c7
-rw-r--r--regcomp.c10
-rw-r--r--regexec.c17
-rw-r--r--utf8.c29
-rw-r--r--utf8.h3
5 files changed, 49 insertions, 17 deletions
diff --git a/dump.c b/dump.c
index 290ee7a622..02791072e0 100644
--- a/dump.c
+++ b/dump.c
@@ -279,7 +279,8 @@ Perl_sv_peek(pTHX_ SV *sv)
Perl_sv_catpvf(aTHX_ t, "%s)", pv_display(tmp, SvPVX(sv), SvCUR(sv), SvLEN(sv), 127));
if (SvUTF8(sv))
Perl_sv_catpvf(aTHX_ t, " [UTF8 \"%s\"]",
- sv_uni_display(tmp, sv, 8 * sv_len_utf8(sv), 0));
+ sv_uni_display(tmp, sv, 8 * sv_len_utf8(sv),
+ UNI_DISPLAY_QQ));
SvREFCNT_dec(tmp);
}
}
@@ -1115,7 +1116,7 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, bo
PerlIO_printf(file, "( %s . ) ", pv_display(d, SvPVX(sv)-SvIVX(sv), SvIVX(sv), 0, pvlim));
PerlIO_printf(file, "%s", pv_display(d, SvPVX(sv), SvCUR(sv), SvLEN(sv), pvlim));
if (SvUTF8(sv)) /* the 8? \x{....} */
- PerlIO_printf(file, " [UTF8 \"%s\"]", sv_uni_display(d, sv, 8 * sv_len_utf8(sv), 0));
+ PerlIO_printf(file, " [UTF8 \"%s\"]", sv_uni_display(d, sv, 8 * sv_len_utf8(sv), UNI_DISPLAY_QQ));
PerlIO_printf(file, "\n");
Perl_dump_indent(aTHX_ level, file, " CUR = %"IVdf"\n", (IV)SvCUR(sv));
Perl_dump_indent(aTHX_ level, file, " LEN = %"IVdf"\n", (IV)SvLEN(sv));
@@ -1247,7 +1248,7 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, bo
elt = hv_iterval(hv, he);
Perl_dump_indent(aTHX_ level+1, file, "Elt %s ", pv_display(d, keypv, len, 0, pvlim));
if (SvUTF8(keysv))
- PerlIO_printf(file, "[UTF8 \"%s\"] ", sv_uni_display(d, keysv, 8 * sv_len_utf8(keysv), 0));
+ PerlIO_printf(file, "[UTF8 \"%s\"] ", sv_uni_display(d, keysv, 8 * sv_len_utf8(keysv), UNI_DISPLAY_QQ));
PerlIO_printf(file, "HASH = 0x%"UVxf"\n", (UV)hash);
do_sv_dump(level+1, file, elt, nest+1, maxnest, dumpops, pvlim);
}
diff --git a/regcomp.c b/regcomp.c
index 3459e0a7ee..07b11ee6dc 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -4557,9 +4557,13 @@ Perl_regprop(pTHX_ SV *sv, regnode *o)
if (k == EXACT) {
SV *dsv = sv_2mortal(newSVpvn("", 0));
- bool do_utf8 = DO_UTF8(sv);
+ /* Using is_utf8_string() is a crude hack but it may
+ * be the best for now since we have no flag "this EXACTish
+ * node was UTF-8" --jhi */
+ bool do_utf8 = is_utf8_string((U8*)STRING(o), STR_LEN(o));
char *s = do_utf8 ?
- pv_uni_display(dsv, (U8*)STRING(o), STR_LEN(o), 60, 0) :
+ pv_uni_display(dsv, (U8*)STRING(o), STR_LEN(o), 60,
+ UNI_DISPLAY_REGEX) :
STRING(o);
int len = do_utf8 ?
strlen(s) :
@@ -4750,7 +4754,7 @@ Perl_pregfree(pTHX_ struct regexp *r)
return;
DEBUG_r({
char *s = pv_uni_display(dsv, (U8*)r->precomp, r->prelen, 60,
- UNI_DISPLAY_ISPRINT);
+ UNI_DISPLAY_REGEX);
int len = SvCUR(dsv);
if (!PL_colorset)
reginitcolors();
diff --git a/regexec.c b/regexec.c
index ee8f602964..203c8e9687 100644
--- a/regexec.c
+++ b/regexec.c
@@ -401,7 +401,8 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
DEBUG_r({
char *s = PL_reg_match_utf8 ?
- sv_uni_display(dsv, sv, 60, 0) : strpos;
+ sv_uni_display(dsv, sv, 60, UNI_DISPLAY_REGEX) :
+ strpos;
int len = PL_reg_match_utf8 ?
strlen(s) : strend - strpos;
if (!PL_colorset)
@@ -1626,11 +1627,11 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
DEBUG_r({
char *s0 = UTF ?
pv_uni_display(dsv0, (U8*)prog->precomp, prog->prelen, 60,
- UNI_DISPLAY_ISPRINT) :
+ UNI_DISPLAY_REGEX) :
prog->precomp;
int len0 = UTF ? SvCUR(dsv0) : prog->prelen;
char *s1 = do_utf8 ? sv_uni_display(dsv1, sv, 60,
- UNI_DISPLAY_ISPRINT) : startpos;
+ UNI_DISPLAY_REGEX) : startpos;
int len1 = do_utf8 ? SvCUR(dsv1) : strend - startpos;
if (!PL_colorset)
reginitcolors();
@@ -1822,11 +1823,11 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
regprop(prop, c);
s0 = UTF ?
pv_uni_display(dsv0, (U8*)SvPVX(prop), SvCUR(prop), 60,
- UNI_DISPLAY_ISPRINT) :
+ UNI_DISPLAY_REGEX) :
SvPVX(prop);
len0 = UTF ? SvCUR(dsv0) : SvCUR(prop);
s1 = UTF ?
- sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_ISPRINT) : s;
+ sv_uni_display(dsv1, sv, 60, UNI_DISPLAY_REGEX) : s;
len1 = UTF ? SvCUR(dsv1) : strend - s;
PerlIO_printf(Perl_debug_log,
"Matching stclass `%*.*s' against `%*.*s'\n",
@@ -2197,17 +2198,17 @@ S_regmatch(pTHX_ regnode *prog)
char *s0 =
do_utf8 ?
pv_uni_display(dsv0, (U8*)(locinput - pref_len),
- pref0_len, 60, 0) :
+ pref0_len, 60, UNI_DISPLAY_REGEX) :
locinput - pref_len;
int len0 = do_utf8 ? strlen(s0) : pref0_len;
char *s1 = do_utf8 ?
pv_uni_display(dsv1, (U8*)(locinput - pref_len + pref0_len),
- pref_len - pref0_len, 60, 0) :
+ pref_len - pref0_len, 60, UNI_DISPLAY_REGEX) :
locinput - pref_len + pref0_len;
int len1 = do_utf8 ? strlen(s1) : pref_len - pref0_len;
char *s2 = do_utf8 ?
pv_uni_display(dsv2, (U8*)locinput,
- PL_regeol - locinput, 60, 0) :
+ PL_regeol - locinput, 60, UNI_DISPLAY_REGEX) :
locinput;
int len2 = do_utf8 ? strlen(s2) : l;
PerlIO_printf(Perl_debug_log,
diff --git a/utf8.c b/utf8.c
index 0a25c03c31..8258ef5330 100644
--- a/utf8.c
+++ b/utf8.c
@@ -1677,14 +1677,37 @@ Perl_pv_uni_display(pTHX_ SV *dsv, U8 *spv, STRLEN len, STRLEN pvlim, UV flags)
sv_setpvn(dsv, "", 0);
for (s = (char *)spv, e = s + len; s < e; s += UTF8SKIP(s)) {
UV u;
+ bool ok = FALSE;
+
if (pvlim && SvCUR(dsv) >= pvlim) {
truncated++;
break;
}
u = utf8_to_uvchr((U8*)s, 0);
- if ((flags & UNI_DISPLAY_ISPRINT) && u < 256 && isprint(u))
- Perl_sv_catpvf(aTHX_ dsv, "%c", u);
- else
+ if (u < 256) {
+ if (!ok && (flags & UNI_DISPLAY_ISPRINT) && isprint(u & 0xFF)) {
+ Perl_sv_catpvf(aTHX_ dsv, "%c", u);
+ ok = TRUE;
+ }
+ if (!ok && (flags & UNI_DISPLAY_BACKSLASH)) {
+ switch (u & 0xFF) {
+ case '\n':
+ Perl_sv_catpvf(aTHX_ dsv, "\\n"); ok = TRUE; break;
+ case '\r':
+ Perl_sv_catpvf(aTHX_ dsv, "\\r"); ok = TRUE; break;
+ case '\t':
+ Perl_sv_catpvf(aTHX_ dsv, "\\t"); ok = TRUE; break;
+ case '\f':
+ Perl_sv_catpvf(aTHX_ dsv, "\\f"); ok = TRUE; break;
+ case '\a':
+ Perl_sv_catpvf(aTHX_ dsv, "\\a"); ok = TRUE; break;
+ case '\\':
+ Perl_sv_catpvf(aTHX_ dsv, "\\" ); ok = TRUE; break;
+ default: break;
+ }
+ }
+ }
+ if (!ok)
Perl_sv_catpvf(aTHX_ dsv, "\\x{%"UVxf"}", u);
}
if (truncated)
diff --git a/utf8.h b/utf8.h
index 96f1b741f4..8c27afab50 100644
--- a/utf8.h
+++ b/utf8.h
@@ -194,4 +194,7 @@ END_EXTERN_C
#define UNICODE_GREEK_SMALL_LETTER_SIGMA 0x03C3
#define UNI_DISPLAY_ISPRINT 0x0001
+#define UNI_DISPLAY_BACKSLASH 0x0002
+#define UNI_DISPLAY_QQ (UNI_DISPLAY_ISPRINT|UNI_DISPLAY_BACKSLASH)
+#define UNI_DISPLAY_REGEX (UNI_DISPLAY_ISPRINT|UNI_DISPLAY_BACKSLASH)