summaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2021-06-10 22:12:11 -0600
committerTony Cook <tony@develop-help.com>2021-06-30 11:23:45 +1000
commit6a2e93d94928fd304ef9a1821ad2e0eb8adfccb3 (patch)
treead004047c9795eadad53d366efbc7c316c96084c /utf8.c
parenta53cc5a4d7455fe00bb9905602e3469144e6e468 (diff)
downloadperl-6a2e93d94928fd304ef9a1821ad2e0eb8adfccb3.tar.gz
utf8_length: Fix undefined C behavior
In C the comparison of two pointers is only legal if both point to within the same object, or to a virtual element one above the high edge of the object. The previous code was doing an addition potentially outside that range, and so the results would be undefined.
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c29
1 files changed, 18 insertions, 11 deletions
diff --git a/utf8.c b/utf8.c
index 3b321be7cb..354f0789a8 100644
--- a/utf8.c
+++ b/utf8.c
@@ -2372,23 +2372,30 @@ Perl_utf8_length(pTHX_ const U8 *s, const U8 *e)
* the bitops (especially ~) can create illegal UTF-8.
* In other words: in Perl UTF-8 is not just for Unicode. */
- if (UNLIKELY(e < s))
- goto warn_and_return;
while (s < e) {
- s += UTF8SKIP(s);
+ Ptrdiff_t expected_byte_count = UTF8SKIP(s);
+
+ if (UNLIKELY(e - s < expected_byte_count)) {
+ goto warn_and_return;
+ }
+
len++;
+ s += expected_byte_count;
}
- if (UNLIKELY(e != s)) {
- len--;
- warn_and_return:
- if (PL_op)
- Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8),
- "%s in %s", unees, OP_DESC(PL_op));
- else
- Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8), "%s", unees);
+ if (LIKELY(e == s)) {
+ return len;
}
+ /* Here, s > e on entry */
+
+ warn_and_return:
+ if (PL_op)
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8),
+ "%s in %s", unees, OP_DESC(PL_op));
+ else
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8), "%s", unees);
+
return len;
}