summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNikita Popov <nikita.ppv@gmail.com>2017-07-23 23:10:53 +0200
committerNikita Popov <nikita.ppv@gmail.com>2017-07-23 23:17:12 +0200
commit445e13b149fe68faa9aa4cd7b0921519266dc2e5 (patch)
treee90cf31b7fcbff282d3c747ba01a7b3cdbd17739
parent0e8346bd16af9d7ef26f61033ba979fc547d1d8b (diff)
downloadphp-git-445e13b149fe68faa9aa4cd7b0921519266dc2e5.tar.gz
Add MBFL_SUBSTR_TO_END mode to mbfl_substr
This takes the substr from the offset to the end of the string. This avoids pointless searching for the end position and also saves us a length calculation in the strstr family of functions.
-rw-r--r--ext/mbstring/libmbfl/mbfl/mbfilter.c95
-rw-r--r--ext/mbstring/libmbfl/mbfl/mbfilter.h6
-rw-r--r--ext/mbstring/mbstring.c58
3 files changed, 78 insertions, 81 deletions
diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter.c b/ext/mbstring/libmbfl/mbfl/mbfilter.c
index 73461028d0..55c5c1d27d 100644
--- a/ext/mbstring/libmbfl/mbfl/mbfilter.c
+++ b/ext/mbstring/libmbfl/mbfl/mbfilter.c
@@ -1159,46 +1159,55 @@ mbfl_substr(
if ((encoding->flag & (MBFL_ENCTYPE_SBCS | MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE | MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) ||
encoding->mblen_table != NULL) {
len = string->len;
- start = from;
- end = from + length;
- if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
- start *= 2;
- end = start + length*2;
+ if (encoding->flag & MBFL_ENCTYPE_SBCS) {
+ start = from;
+ } else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
+ start = from*2;
} else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
- start *= 4;
- end = start + length*4;
- } else if (encoding->mblen_table != NULL) {
+ start = from*4;
+ } else {
const unsigned char *mbtab = encoding->mblen_table;
start = 0;
- end = 0;
n = 0;
k = 0;
p = string->val;
- if (p != NULL) {
- /* search start position */
- while (k <= from) {
- start = n;
- if (n >= len) {
- break;
- }
- m = mbtab[*p];
- n += m;
- p += m;
- k++;
+ /* search start position */
+ while (k <= from) {
+ start = n;
+ if (n >= len) {
+ break;
}
- /* detect end position */
- k = 0;
- end = start;
- while (k < length) {
- end = n;
- if (n >= len) {
- break;
- }
- m = mbtab[*p];
- n += m;
- p += m;
- k++;
+ m = mbtab[*p];
+ n += m;
+ p += m;
+ k++;
+ }
+ }
+
+ if (length == MBFL_SUBSTR_UNTIL_END) {
+ end = len;
+ } else if (encoding->flag & MBFL_ENCTYPE_SBCS) {
+ end = start + length;
+ } else if (encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
+ end = start + length*2;
+ } else if (encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
+ end = start + length*4;
+ } else {
+ const unsigned char *mbtab = encoding->mblen_table;
+ end = start;
+ n = start;
+ k = 0;
+ p = string->val + start;
+ /* detect end position */
+ while (k <= length) {
+ end = n;
+ if (n >= len) {
+ break;
}
+ m = mbtab[*p];
+ n += m;
+ p += m;
+ k++;
}
}
@@ -1215,21 +1224,11 @@ mbfl_substr(
/* allocate memory and copy */
n = end - start;
result->len = 0;
- result->val = w = (unsigned char*)mbfl_malloc((n + 8)*sizeof(unsigned char));
+ result->val = w = (unsigned char*)mbfl_malloc(n + 1);
if (w != NULL) {
- p = string->val;
- if (p != NULL) {
- p += start;
- result->len = n;
- while (n > 0) {
- *w++ = *p++;
- n--;
- }
- }
- *w++ = '\0';
- *w++ = '\0';
- *w++ = '\0';
- *w = '\0';
+ result->len = n;
+ memcpy(w, string->val + start, n);
+ w[n] = '\0';
} else {
result = NULL;
}
@@ -1239,6 +1238,10 @@ mbfl_substr(
mbfl_convert_filter *decoder;
mbfl_convert_filter *encoder;
+ if (length == MBFL_SUBSTR_UNTIL_END) {
+ length = mbfl_strlen(string) - from;
+ }
+
mbfl_memory_device_init(&device, length + 1, 0);
mbfl_string_init(result);
result->no_language = string->no_language;
diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter.h b/ext/mbstring/libmbfl/mbfl/mbfilter.h
index 0a27d3a265..54858bd919 100644
--- a/ext/mbstring/libmbfl/mbfl/mbfilter.h
+++ b/ext/mbstring/libmbfl/mbfl/mbfilter.h
@@ -202,7 +202,6 @@ mbfl_oddlen(mbfl_string *string);
MBFLAPI extern size_t
mbfl_strpos(mbfl_string *haystack, mbfl_string *needle, ssize_t offset, int reverse);
-
/*
* substr_count
*/
@@ -210,6 +209,11 @@ MBFLAPI extern size_t
mbfl_substr_count(mbfl_string *haystack, mbfl_string *needle);
/*
+ * If specified as length, the substr until the end of the string is taken.
+ */
+#define MBFL_SUBSTR_UNTIL_END ((size_t) -1)
+
+/*
* substr
*/
MBFLAPI extern mbfl_string *
diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c
index 58556410d1..4f3b7dc08e 100644
--- a/ext/mbstring/mbstring.c
+++ b/ext/mbstring/mbstring.c
@@ -2595,7 +2595,6 @@ PHP_FUNCTION(mb_strstr)
n = mbfl_strpos(&haystack, &needle, 0, 0);
if (!mbfl_is_error(n)) {
- size_t mblen = mbfl_strlen(&haystack);
if (part) {
ret = mbfl_substr(&haystack, &result, 0, n);
if (ret != NULL) {
@@ -2606,8 +2605,7 @@ PHP_FUNCTION(mb_strstr)
RETVAL_FALSE;
}
} else {
- size_t len = (mblen - n);
- ret = mbfl_substr(&haystack, &result, n, len);
+ ret = mbfl_substr(&haystack, &result, n, MBFL_SUBSTR_UNTIL_END);
if (ret != NULL) {
// TODO: avoid reallocation ???
RETVAL_STRINGL((char *)ret->val, ret->len);
@@ -2654,7 +2652,6 @@ PHP_FUNCTION(mb_strrchr)
n = mbfl_strpos(&haystack, &needle, 0, 1);
if (!mbfl_is_error(n)) {
- size_t mblen = mbfl_strlen(&haystack);
if (part) {
ret = mbfl_substr(&haystack, &result, 0, n);
if (ret != NULL) {
@@ -2665,8 +2662,7 @@ PHP_FUNCTION(mb_strrchr)
RETVAL_FALSE;
}
} else {
- size_t len = (mblen - n);
- ret = mbfl_substr(&haystack, &result, n, len);
+ ret = mbfl_substr(&haystack, &result, n, MBFL_SUBSTR_UNTIL_END);
if (ret != NULL) {
// TODO: avoid reallocation ???
RETVAL_STRINGL((char *)ret->val, ret->len);
@@ -2686,7 +2682,7 @@ PHP_FUNCTION(mb_strrchr)
PHP_FUNCTION(mb_stristr)
{
zend_bool part = 0;
- size_t from_encoding_len, n, len, mblen;
+ size_t from_encoding_len, n;
mbfl_string haystack, needle, result, *ret = NULL;
const char *from_encoding = NULL;
mbfl_string_init(&haystack);
@@ -2712,8 +2708,6 @@ PHP_FUNCTION(mb_stristr)
RETURN_FALSE;
}
- mblen = mbfl_strlen(&haystack);
-
if (part) {
ret = mbfl_substr(&haystack, &result, 0, n);
if (ret != NULL) {
@@ -2724,8 +2718,7 @@ PHP_FUNCTION(mb_stristr)
RETVAL_FALSE;
}
} else {
- len = (mblen - n);
- ret = mbfl_substr(&haystack, &result, n, len);
+ ret = mbfl_substr(&haystack, &result, n, MBFL_SUBSTR_UNTIL_END);
if (ret != NULL) {
// TODO: avoid reallocaton ???
RETVAL_STRINGL((char *)ret->val, ret->len);
@@ -2742,7 +2735,7 @@ PHP_FUNCTION(mb_stristr)
PHP_FUNCTION(mb_strrichr)
{
zend_bool part = 0;
- size_t n, len, mblen;
+ size_t n;
size_t from_encoding_len;
mbfl_string haystack, needle, result, *ret = NULL;
const char *from_encoding = NULL;
@@ -2764,8 +2757,6 @@ PHP_FUNCTION(mb_strrichr)
RETURN_FALSE;
}
- mblen = mbfl_strlen(&haystack);
-
if (part) {
ret = mbfl_substr(&haystack, &result, 0, n);
if (ret != NULL) {
@@ -2776,8 +2767,7 @@ PHP_FUNCTION(mb_strrichr)
RETVAL_FALSE;
}
} else {
- len = (mblen - n);
- ret = mbfl_substr(&haystack, &result, n, len);
+ ret = mbfl_substr(&haystack, &result, n, MBFL_SUBSTR_UNTIL_END);
if (ret != NULL) {
// TODO: avoid reallocation ???
RETVAL_STRINGL((char *)ret->val, ret->len);
@@ -2831,7 +2821,7 @@ PHP_FUNCTION(mb_substr)
{
char *str, *encoding = NULL;
zend_long from, len;
- size_t mblen;
+ size_t mblen, real_from, real_len;
size_t str_len, encoding_len;
zend_bool len_is_null = 1;
mbfl_string string, result, *ret;
@@ -2850,42 +2840,42 @@ PHP_FUNCTION(mb_substr)
string.val = (unsigned char *)str;
string.len = str_len;
- if (len_is_null) {
- len = str_len;
- }
-
/* measures length */
mblen = 0;
- if (from < 0 || len < 0) {
+ if (from < 0 || (!len_is_null && len < 0)) {
mblen = mbfl_strlen(&string);
}
/* if "from" position is negative, count start position from the end
* of the string
*/
- if (from < 0) {
- from = mblen + from;
- if (from < 0) {
- from = 0;
- }
+ if (from >= 0) {
+ real_from = (size_t) from;
+ } else if (-from < mblen) {
+ real_from = mblen + from;
+ } else {
+ real_from = 0;
}
/* if "length" position is negative, set it to the length
* needed to stop that many chars from the end of the string
*/
- if (len < 0) {
- len = (mblen - from) + len;
- if (len < 0) {
- len = 0;
- }
+ if (len_is_null) {
+ real_len = MBFL_SUBSTR_UNTIL_END;
+ } else if (len >= 0) {
+ real_len = (size_t) len;
+ } else if (real_from < mblen && -len < mblen - real_from) {
+ real_len = (mblen - real_from) + len;
+ } else {
+ real_len = 0;
}
if (((MBSTRG(func_overload) & MB_OVERLOAD_STRING) == MB_OVERLOAD_STRING)
- && (from >= mbfl_strlen(&string))) {
+ && (real_from >= mbfl_strlen(&string))) {
RETURN_FALSE;
}
- ret = mbfl_substr(&string, &result, from, len);
+ ret = mbfl_substr(&string, &result, real_from, real_len);
if (NULL == ret) {
RETURN_FALSE;
}