diff options
author | Nicholas Clark <nick@ccl4.org> | 2006-02-06 22:28:41 +0000 |
---|---|---|
committer | Nicholas Clark <nick@ccl4.org> | 2006-02-06 22:28:41 +0000 |
commit | 2f040f7f3a7618c48a8d153deb2b7e4a59efefb0 (patch) | |
tree | 1e169da7ae2e5e4d7e0eab31f9db35252fb20e74 /pp.c | |
parent | 212542aaa22ee7b99a683bacf00fb323b1c34697 (diff) | |
download | perl-2f040f7f3a7618c48a8d153deb2b7e4a59efefb0.tar.gz |
Optimise index so that if the big string is ISO-8859-1 but the little
string is UTF-8, it tries to downgrade the little string, rather than
upgrade the big string. For half-meg big strings this is a fourfold
speed gain.
p4raw-id: //depot/perl@27113
Diffstat (limited to 'pp.c')
-rw-r--r-- | pp.c | 61 |
1 files changed, 42 insertions, 19 deletions
@@ -3117,8 +3117,8 @@ PP(pp_index) const char *tmps2; STRLEN biglen; const I32 arybase = PL_curcop->cop_arybase; - int big_utf8; - int little_utf8; + bool big_utf8; + bool little_utf8; if (MAXARG < 3) offset = 0; @@ -3130,22 +3130,43 @@ PP(pp_index) little_utf8 = DO_UTF8(little); if (big_utf8 ^ little_utf8) { /* One needs to be upgraded. */ - SV * const bytes = little_utf8 ? big : little; - STRLEN len; - const char * const p = SvPV_const(bytes, len); - - temp = newSVpvn(p, len); + if (little_utf8 && !PL_encoding) { + /* Well, maybe instead we might be able to downgrade the small + string? */ + STRLEN little_len; + const U8 * const little_pv = (U8*) SvPV_const(little, little_len); + char * const pv = (char*)bytes_from_utf8(little_pv, &little_len, + &little_utf8); + if (little_utf8) { + /* If the large string is ISO-8859-1, and it's not possible to + convert the small string to ISO-8859-1, then there is no + way that it could be found anywhere by index. */ + retval = -1; + goto fail; + } - if (PL_encoding) { - sv_recode_to_utf8(temp, PL_encoding); - } else { - sv_utf8_upgrade(temp); - } - if (little_utf8) { - big = temp; - big_utf8 = TRUE; + /* At this point, pv is a malloc()ed string. So donate it to temp + to ensure it will get free()d */ + little = temp = newSV(0); + sv_usepvn(temp, pv, little_len); } else { - little = temp; + SV * const bytes = little_utf8 ? big : little; + STRLEN len; + const char * const p = SvPV_const(bytes, len); + + temp = newSVpvn(p, len); + + if (PL_encoding) { + sv_recode_to_utf8(temp, PL_encoding); + } else { + sv_utf8_upgrade(temp); + } + if (little_utf8) { + big = temp; + big_utf8 = TRUE; + } else { + little = temp; + } } } if (big_utf8 && offset > 0) @@ -3158,12 +3179,14 @@ PP(pp_index) if (!(tmps2 = fbm_instr((unsigned char*)tmps + offset, (unsigned char*)tmps + biglen, little, 0))) retval = -1; - else + else { retval = tmps2 - tmps; - if (retval > 0 && big_utf8) - sv_pos_b2u(big, &retval); + if (big_utf8) + sv_pos_b2u(big, &retval); + } if (temp) SvREFCNT_dec(temp); + fail: PUSHi(retval + arybase); RETURN; } |