diff options
author | Karl Williamson <khw@cpan.org> | 2021-06-10 22:12:11 -0600 |
---|---|---|
committer | Tony Cook <tony@develop-help.com> | 2021-06-30 11:23:45 +1000 |
commit | 6a2e93d94928fd304ef9a1821ad2e0eb8adfccb3 (patch) | |
tree | ad004047c9795eadad53d366efbc7c316c96084c /utf8.c | |
parent | a53cc5a4d7455fe00bb9905602e3469144e6e468 (diff) | |
download | perl-6a2e93d94928fd304ef9a1821ad2e0eb8adfccb3.tar.gz |
utf8_length: Fix undefined C behavior
In C the comparison of two pointers is only legal if both point to
within the same object, or to a virtual element one above the high edge
of the object.
The previous code was doing an addition potentially outside that range,
and so the results would be undefined.
Diffstat (limited to 'utf8.c')
-rw-r--r-- | utf8.c | 29 |
1 files changed, 18 insertions, 11 deletions
@@ -2372,23 +2372,30 @@ Perl_utf8_length(pTHX_ const U8 *s, const U8 *e) * the bitops (especially ~) can create illegal UTF-8. * In other words: in Perl UTF-8 is not just for Unicode. */ - if (UNLIKELY(e < s)) - goto warn_and_return; while (s < e) { - s += UTF8SKIP(s); + Ptrdiff_t expected_byte_count = UTF8SKIP(s); + + if (UNLIKELY(e - s < expected_byte_count)) { + goto warn_and_return; + } + len++; + s += expected_byte_count; } - if (UNLIKELY(e != s)) { - len--; - warn_and_return: - if (PL_op) - Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8), - "%s in %s", unees, OP_DESC(PL_op)); - else - Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8), "%s", unees); + if (LIKELY(e == s)) { + return len; } + /* Here, s > e on entry */ + + warn_and_return: + if (PL_op) + Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8), + "%s in %s", unees, OP_DESC(PL_op)); + else + Perl_ck_warner_d(aTHX_ packWARN(WARN_UTF8), "%s", unees); + return len; } |