diff options
author | Nikita Popov <nikita.ppv@gmail.com> | 2017-07-23 23:10:53 +0200 |
---|---|---|
committer | Nikita Popov <nikita.ppv@gmail.com> | 2017-07-23 23:17:12 +0200 |
commit | 445e13b149fe68faa9aa4cd7b0921519266dc2e5 (patch) | |
tree | e90cf31b7fcbff282d3c747ba01a7b3cdbd17739 | |
parent | 0e8346bd16af9d7ef26f61033ba979fc547d1d8b (diff) | |
download | php-git-445e13b149fe68faa9aa4cd7b0921519266dc2e5.tar.gz |
Add MBFL_SUBSTR_TO_END mode to mbfl_substr
This takes the substr from the offset to the end of the string.
This avoids pointless searching for the end position and also
saves us a length calculation in the strstr family of functions.
-rw-r--r-- | ext/mbstring/libmbfl/mbfl/mbfilter.c | 95 | ||||
-rw-r--r-- | ext/mbstring/libmbfl/mbfl/mbfilter.h | 6 | ||||
-rw-r--r-- | ext/mbstring/mbstring.c | 58 |
3 files changed, 78 insertions, 81 deletions
diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter.c b/ext/mbstring/libmbfl/mbfl/mbfilter.c index 73461028d0..55c5c1d27d 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfilter.c +++ b/ext/mbstring/libmbfl/mbfl/mbfilter.c @@ -1159,46 +1159,55 @@ mbfl_substr( if ((encoding->flag & (MBFL_ENCTYPE_SBCS | MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE | MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) || encoding->mblen_table != NULL) { len = string->len; - start = from; - end = from + length; - if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) { - start *= 2; - end = start + length*2; + if (encoding->flag & MBFL_ENCTYPE_SBCS) { + start = from; + } else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) { + start = from*2; } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) { - start *= 4; - end = start + length*4; - } else if (encoding->mblen_table != NULL) { + start = from*4; + } else { const unsigned char *mbtab = encoding->mblen_table; start = 0; - end = 0; n = 0; k = 0; p = string->val; - if (p != NULL) { - /* search start position */ - while (k <= from) { - start = n; - if (n >= len) { - break; - } - m = mbtab[*p]; - n += m; - p += m; - k++; + /* search start position */ + while (k <= from) { + start = n; + if (n >= len) { + break; } - /* detect end position */ - k = 0; - end = start; - while (k < length) { - end = n; - if (n >= len) { - break; - } - m = mbtab[*p]; - n += m; - p += m; - k++; + m = mbtab[*p]; + n += m; + p += m; + k++; + } + } + + if (length == MBFL_SUBSTR_UNTIL_END) { + end = len; + } else if (encoding->flag & MBFL_ENCTYPE_SBCS) { + end = start + length; + } else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) { + end = start + length*2; + } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) { + end = start + length*4; + } else { + const unsigned char *mbtab = encoding->mblen_table; + end = start; + n = start; + k = 0; + p = string->val + start; + /* detect end position */ + while (k <= length) { + end = n; + if (n >= len) { + break; } + m = mbtab[*p]; + n += m; + p += m; + k++; } } @@ -1215,21 +1224,11 @@ mbfl_substr( /* allocate memory and copy */ n = end - start; result->len = 0; - result->val = w = (unsigned char*)mbfl_malloc((n + 8)*sizeof(unsigned char)); + result->val = w = (unsigned char*)mbfl_malloc(n + 1); if (w != NULL) { - p = string->val; - if (p != NULL) { - p += start; - result->len = n; - while (n > 0) { - *w++ = *p++; - n--; - } - } - *w++ = '\0'; - *w++ = '\0'; - *w++ = '\0'; - *w = '\0'; + result->len = n; + memcpy(w, string->val + start, n); + w[n] = '\0'; } else { result = NULL; } @@ -1239,6 +1238,10 @@ mbfl_substr( mbfl_convert_filter *decoder; mbfl_convert_filter *encoder; + if (length == MBFL_SUBSTR_UNTIL_END) { + length = mbfl_strlen(string) - from; + } + mbfl_memory_device_init(&device, length + 1, 0); mbfl_string_init(result); result->no_language = string->no_language; diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter.h b/ext/mbstring/libmbfl/mbfl/mbfilter.h index 0a27d3a265..54858bd919 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfilter.h +++ b/ext/mbstring/libmbfl/mbfl/mbfilter.h @@ -202,7 +202,6 @@ mbfl_oddlen(mbfl_string *string); MBFLAPI extern size_t mbfl_strpos(mbfl_string *haystack, mbfl_string *needle, ssize_t offset, int reverse); - /* * substr_count */ @@ -210,6 +209,11 @@ MBFLAPI extern size_t mbfl_substr_count(mbfl_string *haystack, mbfl_string *needle); /* + * If specified as length, the substr until the end of the string is taken. + */ +#define MBFL_SUBSTR_UNTIL_END ((size_t) -1) + +/* * substr */ MBFLAPI extern mbfl_string * diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 58556410d1..4f3b7dc08e 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -2595,7 +2595,6 @@ PHP_FUNCTION(mb_strstr) n = mbfl_strpos(&haystack, &needle, 0, 0); if (!mbfl_is_error(n)) { - size_t mblen = mbfl_strlen(&haystack); if (part) { ret = mbfl_substr(&haystack, &result, 0, n); if (ret != NULL) { @@ -2606,8 +2605,7 @@ PHP_FUNCTION(mb_strstr) RETVAL_FALSE; } } else { - size_t len = (mblen - n); - ret = mbfl_substr(&haystack, &result, n, len); + ret = mbfl_substr(&haystack, &result, n, MBFL_SUBSTR_UNTIL_END); if (ret != NULL) { // TODO: avoid reallocation ??? RETVAL_STRINGL((char *)ret->val, ret->len); @@ -2654,7 +2652,6 @@ PHP_FUNCTION(mb_strrchr) n = mbfl_strpos(&haystack, &needle, 0, 1); if (!mbfl_is_error(n)) { - size_t mblen = mbfl_strlen(&haystack); if (part) { ret = mbfl_substr(&haystack, &result, 0, n); if (ret != NULL) { @@ -2665,8 +2662,7 @@ PHP_FUNCTION(mb_strrchr) RETVAL_FALSE; } } else { - size_t len = (mblen - n); - ret = mbfl_substr(&haystack, &result, n, len); + ret = mbfl_substr(&haystack, &result, n, MBFL_SUBSTR_UNTIL_END); if (ret != NULL) { // TODO: avoid reallocation ??? RETVAL_STRINGL((char *)ret->val, ret->len); @@ -2686,7 +2682,7 @@ PHP_FUNCTION(mb_strrchr) PHP_FUNCTION(mb_stristr) { zend_bool part = 0; - size_t from_encoding_len, n, len, mblen; + size_t from_encoding_len, n; mbfl_string haystack, needle, result, *ret = NULL; const char *from_encoding = NULL; mbfl_string_init(&haystack); @@ -2712,8 +2708,6 @@ PHP_FUNCTION(mb_stristr) RETURN_FALSE; } - mblen = mbfl_strlen(&haystack); - if (part) { ret = mbfl_substr(&haystack, &result, 0, n); if (ret != NULL) { @@ -2724,8 +2718,7 @@ PHP_FUNCTION(mb_stristr) RETVAL_FALSE; } } else { - len = (mblen - n); - ret = mbfl_substr(&haystack, &result, n, len); + ret = mbfl_substr(&haystack, &result, n, MBFL_SUBSTR_UNTIL_END); if (ret != NULL) { // TODO: avoid reallocaton ??? RETVAL_STRINGL((char *)ret->val, ret->len); @@ -2742,7 +2735,7 @@ PHP_FUNCTION(mb_stristr) PHP_FUNCTION(mb_strrichr) { zend_bool part = 0; - size_t n, len, mblen; + size_t n; size_t from_encoding_len; mbfl_string haystack, needle, result, *ret = NULL; const char *from_encoding = NULL; @@ -2764,8 +2757,6 @@ PHP_FUNCTION(mb_strrichr) RETURN_FALSE; } - mblen = mbfl_strlen(&haystack); - if (part) { ret = mbfl_substr(&haystack, &result, 0, n); if (ret != NULL) { @@ -2776,8 +2767,7 @@ PHP_FUNCTION(mb_strrichr) RETVAL_FALSE; } } else { - len = (mblen - n); - ret = mbfl_substr(&haystack, &result, n, len); + ret = mbfl_substr(&haystack, &result, n, MBFL_SUBSTR_UNTIL_END); if (ret != NULL) { // TODO: avoid reallocation ??? RETVAL_STRINGL((char *)ret->val, ret->len); @@ -2831,7 +2821,7 @@ PHP_FUNCTION(mb_substr) { char *str, *encoding = NULL; zend_long from, len; - size_t mblen; + size_t mblen, real_from, real_len; size_t str_len, encoding_len; zend_bool len_is_null = 1; mbfl_string string, result, *ret; @@ -2850,42 +2840,42 @@ PHP_FUNCTION(mb_substr) string.val = (unsigned char *)str; string.len = str_len; - if (len_is_null) { - len = str_len; - } - /* measures length */ mblen = 0; - if (from < 0 || len < 0) { + if (from < 0 || (!len_is_null && len < 0)) { mblen = mbfl_strlen(&string); } /* if "from" position is negative, count start position from the end * of the string */ - if (from < 0) { - from = mblen + from; - if (from < 0) { - from = 0; - } + if (from >= 0) { + real_from = (size_t) from; + } else if (-from < mblen) { + real_from = mblen + from; + } else { + real_from = 0; } /* if "length" position is negative, set it to the length * needed to stop that many chars from the end of the string */ - if (len < 0) { - len = (mblen - from) + len; - if (len < 0) { - len = 0; - } + if (len_is_null) { + real_len = MBFL_SUBSTR_UNTIL_END; + } else if (len >= 0) { + real_len = (size_t) len; + } else if (real_from < mblen && -len < mblen - real_from) { + real_len = (mblen - real_from) + len; + } else { + real_len = 0; } if (((MBSTRG(func_overload) & MB_OVERLOAD_STRING) == MB_OVERLOAD_STRING) - && (from >= mbfl_strlen(&string))) { + && (real_from >= mbfl_strlen(&string))) { RETURN_FALSE; } - ret = mbfl_substr(&string, &result, from, len); + ret = mbfl_substr(&string, &result, real_from, real_len); if (NULL == ret) { RETURN_FALSE; } |