summaryrefslogtreecommitdiff
path: root/sv.c
diff options
context:
space:
mode:
authorNicholas Clark <nick@ccl4.org>2006-03-22 21:19:50 +0000
committerNicholas Clark <nick@ccl4.org>2006-03-22 21:19:50 +0000
commitc336ad0b0ffc3d19ec24d23d8bc7a30158ac1ae5 (patch)
treeaa15fdbfde74b7079a5843fb1100a5807ef56ce7 /sv.c
parent28ccbf949dabb67c68ce034cf8b582683d4ca2a5 (diff)
downloadperl-c336ad0b0ffc3d19ec24d23d8bc7a30158ac1ae5.tar.gz
Add S_sv_pos_u2b_midway for when we know the utf-8/bytes offsets on
both sides of the desired offset, so could count either way. Use cached utf-8 length in S_sv_pos_u2b_cached. Check the results if PL_utf8cache < 0. p4raw-id: //depot/perl@27573
Diffstat (limited to 'sv.c')
-rw-r--r--sv.c58
1 files changed, 53 insertions, 5 deletions
diff --git a/sv.c b/sv.c
index 27f2b8071e..69232be8c1 100644
--- a/sv.c
+++ b/sv.c
@@ -5517,17 +5517,65 @@ S_sv_pos_u2b_forwards(pTHX_ const U8 *const start, const U8 *const send,
return s - start;
}
+
+static STRLEN
+S_sv_pos_u2b_midway(pTHX_ const U8 *const start, const U8 *send,
+ STRLEN uoffset, STRLEN uend)
+{
+ STRLEN backw = uend - uoffset;
+ if (uoffset < 2 * backw) {
+ /* The assumption is that going fowards is twice the speed of going
+ forward (that's where the 2 * backw comes from).
+ (The real figure of course depends on the UTF-8 data.) */
+ return S_sv_pos_u2b_forwards(aTHX_ start, send, uoffset);
+ }
+
+ while (backw--) {
+ send--;
+ while (UTF8_IS_CONTINUATION(*send))
+ send--;
+ }
+ return send - start;
+}
+
static STRLEN
S_sv_pos_u2b_cached(pTHX_ SV *sv, MAGIC **mgp, const U8 *const start,
const U8 *const send, STRLEN uoffset,
STRLEN uoffset0, STRLEN boffset0) {
STRLEN boffset;
- if (uoffset >= uoffset0) {
- boffset = boffset0 + S_sv_pos_u2b_forwards(aTHX_ start + boffset0,
- send, uoffset - uoffset0);
+ bool found = FALSE;
+
+ if (SvMAGICAL(sv) && !SvREADONLY(sv) && PL_utf8cache
+ && (*mgp = mg_find(sv, PERL_MAGIC_utf8))) {
+ if ((*mgp)->mg_len != -1) {
+ boffset = S_sv_pos_u2b_midway(aTHX_ start, send, uoffset,
+ (*mgp)->mg_len);
+ found = TRUE;
+ }
}
- else {
- boffset = S_sv_pos_u2b_forwards(aTHX_ start, send, uoffset);
+
+ if (!found || PL_utf8cache < 0) {
+ STRLEN real_boffset;
+ if (uoffset >= uoffset0) {
+ real_boffset
+ = boffset0 + S_sv_pos_u2b_forwards(aTHX_ start + boffset0,
+ send, uoffset - uoffset0);
+ }
+ else {
+ real_boffset = S_sv_pos_u2b_forwards(aTHX_ start, send, uoffset);
+ }
+ if (found && PL_utf8cache < 0) {
+ if (real_boffset != boffset) {
+ /* Need to turn the assertions off otherwise we may recurse
+ infinitely while printing error messages. */
+ SAVEI8(PL_utf8cache);
+ PL_utf8cache = 0;
+ Perl_croak(aTHX_ "panic: sv_pos_u2b_cache cache %"UVf
+ " real %"UVf" for %"SVf,
+ (UV) boffset, (UV) real_boffset, sv);
+ }
+ }
+ boffset = real_boffset;
}
return boffset;
}