From 1686c3fa47155d9c3884726b2ade4dede2eeb99f Mon Sep 17 00:00:00 2001 From: Anatol Belski Date: Mon, 1 Apr 2019 02:16:52 +0200 Subject: Improve ascii check --- win32/codepage.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'win32/codepage.c') diff --git a/win32/codepage.c b/win32/codepage.c index 302fcb8b45..636bc37992 100644 --- a/win32/codepage.c +++ b/win32/codepage.c @@ -105,6 +105,7 @@ PW32CP wchar_t *php_win32_cp_conv_ascii_to_w(const char* in, size_t in_len, size {/*{{{*/ wchar_t *ret, *ret_idx; const char *idx = in, *end; + char ch_err = 0; assert(in && in_len ? in[in_len] == '\0' : 1); @@ -123,29 +124,33 @@ PW32CP wchar_t *php_win32_cp_conv_ascii_to_w(const char* in, size_t in_len, size /* Process unaligned chunk. */ while (idx < aidx) { - if (!__isascii(*idx) && '\0' != *idx) { - ASCII_FAIL_RETURN() - } + ch_err |= *idx; idx++; } + if (ch_err & 0x80) { + ASCII_FAIL_RETURN() + } /* Process aligned chunk. */ + __m128i vec_err = _mm_setzero_si128(); while (end - idx > 15) { const __m128i block = _mm_load_si128((__m128i *)idx); - if (_mm_movemask_epi8(block)) { - ASCII_FAIL_RETURN() - } + vec_err = _mm_or_si128(vec_err, block); idx += 16; } + if (_mm_movemask_epi8(vec_err)) { + ASCII_FAIL_RETURN() + } } /* Process the trailing part, or otherwise process string < 16 bytes. */ while (idx < end) { - if (!__isascii(*idx) && '\0' != *idx) { - ASCII_FAIL_RETURN() - } + ch_err |= *idx; idx++; } + if (ch_err & 0x80) { + ASCII_FAIL_RETURN() + } ret = malloc((in_len+1)*sizeof(wchar_t)); if (!ret) { @@ -156,9 +161,6 @@ PW32CP wchar_t *php_win32_cp_conv_ascii_to_w(const char* in, size_t in_len, size ret_idx = ret; idx = in; - /* Check and conversion could be merged. This however would - be more expencive, if a non ASCII string was passed. - TODO check whether the impact is acceptable. */ if (in_len > 15) { const char *aidx = (const char *)ZEND_SLIDE_TO_ALIGNED16(in); -- cgit v1.2.1