summaryrefslogtreecommitdiff
path: root/ext
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2022-07-10 10:06:17 -0600
committerKarl Williamson <khw@cpan.org>2022-12-07 09:16:58 -0700
commit76062242c5bacb046859572260d73cd6bc6f2004 (patch)
tree86846e053927a3f55e3f0624da52c8e2edefdd97 /ext
parentf0cb6a0886fcc9d9d34b3d80ffb90829db33e738 (diff)
downloadperl-76062242c5bacb046859572260d73cd6bc6f2004.tar.gz
utf8_hop forwards Change continuation start behavior
Prior to this commit, when hopping forwards, and the initial position to hop from is a continuation byte, it treats it and each such successive one as a single character until it gets to a start byte, and switches into normal mode. In contrast, in hopping backwards, all the consecutive continuation bytes are considered to be part of a single character (as they indeed are). Thus there is a discrepancy between forward/backwards hopping; and the forward version seems wrong to me. This commit removes the discrepancy. There is no change in behavior if the starting position is to the beginning of a character. All calls in the core except for the API test are of this form. But, if the initial position is in the middle of a character, it now moves to the beginning of the next character, subtracting just 1 from the count of characters to hop (instead of subtracting however many continuation bytes there are). This is how I would have expected it to work all along. Succinctly, getting to the next character now consumes one hop count, no matter the direction nor which byte in the character is the starting position.
Diffstat (limited to 'ext')
-rw-r--r--ext/XS-APItest/t/utf8.t7
1 files changed, 4 insertions, 3 deletions
diff --git a/ext/XS-APItest/t/utf8.t b/ext/XS-APItest/t/utf8.t
index a05194cfcc..f4af4c4298 100644
--- a/ext/XS-APItest/t/utf8.t
+++ b/ext/XS-APItest/t/utf8.t
@@ -1207,9 +1207,10 @@ SKIP:
[ $utf, $utf_ch_len * 5, -4, $utf_ch_len, "utf in range b, backward" ],
[ $utf, $utf_ch_len * 5, 6, length($utf), "utf out of range, forward" ],
[ $utf, $utf_ch_len * 5, -6, 0, "utf out of range, backward" ],
- [ $bad_start, 0, 1, 1, "bad start, forward 1 from 0" ],
- [ $bad_start, 0, $utf_ch_len-1, $utf_ch_len-1, "bad start, forward ch_len-1 from 0" ],
- [ $bad_start, 0, $utf_ch_len, $utf_ch_len*2-1, "bad start, forward ch_len from 0" ],
+ [ $bad_start, 0, 1, $utf_ch_len-1, "bad start, forward 1 from 0" ],
+ [ $bad_start, 0, 5, 5 * $utf_ch_len-1, "bad start, forward 5 chars from 0" ],
+ [ $bad_start, 0, 9, length($bad_start)-$utf_ch_len, "bad start, forward 9 chars from 0" ],
+ [ $bad_start, 0, 10, length $bad_start, "bad start, forward 10 chars from 0" ],
[ $bad_start, $utf_ch_len-1, -1, 0, "bad start, back 1 from first start byte" ],
[ $bad_start, $utf_ch_len-2, -1, 0, "bad start, back 1 from before first start byte" ],
[ $bad_start, 0, -1, 0, "bad start, back 1 from 0" ],