summaryrefslogtreecommitdiff
path: root/dump.c
diff options
context:
space:
mode:
authorHugo van der Sanden <hv@crypt.org>2003-01-21 01:37:03 +0000
committerhv <hv@crypt.org>2003-01-21 01:37:03 +0000
commit7e8c5daceba7cb185532328a3b67d4ca7ba4811b (patch)
tree6fbcb357ec74beb075b5c99dbc3f3ac52e68114f /dump.c
parent388cc4de5f48b02cc9fe9b962f02cf603af02178 (diff)
downloadperl-7e8c5daceba7cb185532328a3b67d4ca7ba4811b.tar.gz
integrate (by hand) #18353 and #18359 from maint-5.8:
Introduce a cache for UTF-8 data: length and byte<->char offset mapping are stored in a new type of magic. Speeds up length(), substr(), index(), rindex(), pos(), and some parts of s///. The speedup varies a lot (on the usual suspects: what is the access pattern of the data, compiler, CPU), but should be at least one order of magnitude, and getting to the same magnitude as byte string speeds, and in some cases (length on unchanged data) even reaching the byte string speed. On the other hand, in some cases (index) the byte speed is still faster by a factor of five or so, but the bottleneck there does not seem to be any more the byte<->char offset mapping (instead, the fbm_instr() speed). There is one cache slot for the length, and only two for the byte<->char offset mapping (the first one for the start->offset, and the second for the offset->offset+length, when talking in substr() terms). Code this hairy is bound to have hairy trolls hiding under it. [...] A small tweak on top of #18353: don't display mg_len bytes of mg_ptr for PERL_MAGIC_utf8 because that's not what's there. p4raw-id: //depot/perl@18530
Diffstat (limited to 'dump.c')
-rw-r--r--dump.c22
1 files changed, 19 insertions, 3 deletions
diff --git a/dump.c b/dump.c
index 60903250e2..759906e2dd 100644
--- a/dump.c
+++ b/dump.c
@@ -768,6 +768,7 @@ static struct { char type; char *name; } magic_names[] = {
{ PERL_MAGIC_uvar_elem, "uvar_elem(v)" },
{ PERL_MAGIC_vec, "vec(v)" },
{ PERL_MAGIC_vstring, "vstring(V)" },
+ { PERL_MAGIC_utf8, "utf8(w)" },
{ PERL_MAGIC_substr, "substr(x)" },
{ PERL_MAGIC_defelem, "defelem(y)" },
{ PERL_MAGIC_ext, "ext(~)" },
@@ -811,6 +812,7 @@ Perl_do_magic_dump(pTHX_ I32 level, PerlIO *file, MAGIC *mg, I32 nest, I32 maxne
else if (v == &PL_vtbl_amagic) s = "amagic";
else if (v == &PL_vtbl_amagicelem) s = "amagicelem";
else if (v == &PL_vtbl_backref) s = "backref";
+ else if (v == &PL_vtbl_utf8) s = "utf8";
if (s)
Perl_dump_indent(aTHX_ level, file, " MG_VIRTUAL = &PL_vtbl_%s\n", s);
else
@@ -862,9 +864,11 @@ Perl_do_magic_dump(pTHX_ I32 level, PerlIO *file, MAGIC *mg, I32 nest, I32 maxne
if (mg->mg_ptr) {
Perl_dump_indent(aTHX_ level, file, " MG_PTR = 0x%"UVxf, PTR2UV(mg->mg_ptr));
if (mg->mg_len >= 0) {
- SV *sv = newSVpvn("", 0);
- PerlIO_printf(file, " %s", pv_display(sv, mg->mg_ptr, mg->mg_len, 0, pvlim));
- SvREFCNT_dec(sv);
+ if (mg->mg_type != PERL_MAGIC_utf8) {
+ SV *sv = newSVpvn("", 0);
+ PerlIO_printf(file, " %s", pv_display(sv, mg->mg_ptr, mg->mg_len, 0, pvlim));
+ SvREFCNT_dec(sv);
+ }
}
else if (mg->mg_len == HEf_SVKEY) {
PerlIO_puts(file, " => HEf_SVKEY\n");
@@ -875,6 +879,18 @@ Perl_do_magic_dump(pTHX_ I32 level, PerlIO *file, MAGIC *mg, I32 nest, I32 maxne
PerlIO_puts(file, " ???? - please notify IZ");
PerlIO_putc(file, '\n');
}
+ if (mg->mg_type == PERL_MAGIC_utf8) {
+ STRLEN *cache = (STRLEN *) mg->mg_ptr;
+ if (cache) {
+ IV i;
+ for (i = 0; i < PERL_MAGIC_UTF8_CACHESIZE; i++)
+ Perl_dump_indent(aTHX_ level, file,
+ " %2"IVdf": %"UVuf" -> %"UVuf"\n",
+ i,
+ (UV)cache[i * 2],
+ (UV)cache[i * 2 + 1]);
+ }
+ }
}
}