diff options
author | Nicholas Clark <nick@ccl4.org> | 2011-06-28 15:20:56 +0200 |
---|---|---|
committer | Nicholas Clark <nick@ccl4.org> | 2011-07-01 14:05:41 +0200 |
commit | 72de20cdcc84ae35e0d8a55c7a92950fece19347 (patch) | |
tree | fde310b665c99a781df72f0aeab31f0e8ba8225e /util.c | |
parent | b606cf7f37b8b46206c7f521b29167e037397a62 (diff) | |
download | perl-72de20cdcc84ae35e0d8a55c7a92950fece19347.tar.gz |
For shorter strings, store C<study>'s data as U8s or U16s, instead of U32s.
The assumption is that most studied strings are fairly short, hence the pain
of the extra code is worth it, given the memory savings.
80 character string, 336 bytes as U8, down from 1344 as U32
800 character string, 2112 bytes as U16, down from 4224 as U32
Diffstat (limited to 'util.c')
-rw-r--r-- | util.c | 85 |
1 files changed, 69 insertions, 16 deletions
@@ -854,7 +854,7 @@ Perl_screaminstr(pTHX_ SV *bigstr, SV *littlestr, I32 start_shift, I32 end_shift { dVAR; register const unsigned char *big; - U32 pos; + U32 pos = 0; /* hush a gcc warning */ register I32 previous; register I32 first; register const unsigned char *little; @@ -862,9 +862,8 @@ Perl_screaminstr(pTHX_ SV *bigstr, SV *littlestr, I32 start_shift, I32 end_shift register const unsigned char *littleend; bool found = FALSE; const MAGIC * mg; - U32 *screamfirst; - U32 *screamnext; - U32 const nope = ~0; + const void *screamnext_raw = NULL; /* hush a gcc warning */ + bool cant_find = FALSE; /* hush a gcc warning */ PERL_ARGS_ASSERT_SCREAMINSTR; @@ -874,12 +873,37 @@ Perl_screaminstr(pTHX_ SV *bigstr, SV *littlestr, I32 start_shift, I32 end_shift assert(SvTYPE(littlestr) == SVt_PVMG); assert(SvVALID(littlestr)); - screamfirst = (U32 *)mg->mg_ptr; - screamnext = screamfirst + 256; + if (mg->mg_private == 1) { + const U8 *const screamfirst = (U8 *)mg->mg_ptr; + const U8 *const screamnext = screamfirst + 256; - pos = *old_posp == -1 - ? screamfirst[BmRARE(littlestr)] : screamnext[*old_posp]; - if (pos == nope) { + screamnext_raw = (const void *)screamnext; + + pos = *old_posp == -1 + ? screamfirst[BmRARE(littlestr)] : screamnext[*old_posp]; + cant_find = pos == (U8)~0; + } else if (mg->mg_private == 2) { + const U16 *const screamfirst = (U16 *)mg->mg_ptr; + const U16 *const screamnext = screamfirst + 256; + + screamnext_raw = (const void *)screamnext; + + pos = *old_posp == -1 + ? screamfirst[BmRARE(littlestr)] : screamnext[*old_posp]; + cant_find = pos == (U16)~0; + } else if (mg->mg_private == 4) { + const U32 *const screamfirst = (U32 *)mg->mg_ptr; + const U32 *const screamnext = screamfirst + 256; + + screamnext_raw = (const void *)screamnext; + + pos = *old_posp == -1 + ? screamfirst[BmRARE(littlestr)] : screamnext[*old_posp]; + cant_find = pos == (U32)~0; + } else + Perl_croak(aTHX_ "panic: unknown study size %u", mg->mg_private); + + if (cant_find) { cant_find: if ( BmRARE(littlestr) == '\n' && BmPREVIOUS(littlestr) == SvCUR(littlestr) - 1) { @@ -910,13 +934,30 @@ Perl_screaminstr(pTHX_ SV *bigstr, SV *littlestr, I32 start_shift, I32 end_shift #endif return NULL; } - while ((I32)pos < previous + start_shift) { - pos = screamnext[pos]; - if (pos == nope) - goto cant_find; + if (mg->mg_private == 1) { + const U8 *const screamnext = (const U8 *const) screamnext_raw; + while ((I32)pos < previous + start_shift) { + pos = screamnext[pos]; + if (pos == (U8)~0) + goto cant_find; + } + } else if (mg->mg_private == 2) { + const U16 *const screamnext = (const U16 *const) screamnext_raw; + while ((I32)pos < previous + start_shift) { + pos = screamnext[pos]; + if (pos == (U16)~0) + goto cant_find; + } + } else if (mg->mg_private == 4) { + const U32 *const screamnext = (const U32 *const) screamnext_raw; + while ((I32)pos < previous + start_shift) { + pos = screamnext[pos]; + if (pos == (U32)~0) + goto cant_find; + } } big -= previous; - do { + while (1) { if ((I32)pos >= stop_pos) break; if (big[pos] == first) { const unsigned char *s = little; @@ -932,8 +973,20 @@ Perl_screaminstr(pTHX_ SV *bigstr, SV *littlestr, I32 start_shift, I32 end_shift found = TRUE; } } - pos = screamnext[pos]; - } while (pos != nope); + if (mg->mg_private == 1) { + pos = ((const U8 *const)screamnext_raw)[pos]; + if (pos == (U8)~0) + break; + } else if (mg->mg_private == 2) { + pos = ((const U16 *const)screamnext_raw)[pos]; + if (pos == (U16)~0) + break; + } else if (mg->mg_private == 4) { + pos = ((const U32 *const)screamnext_raw)[pos]; + if (pos == (U32)~0) + break; + } + }; if (last && found) return (char *)(big+(*old_posp)); check_tail: |