summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGustavo Lopes <glopes@nebm.ist.utl.pt>2013-01-15 21:05:21 +0100
committerGustavo Lopes <glopes@nebm.ist.utl.pt>2013-01-15 21:05:21 +0100
commit93e35137aaba98c0a000ed442320e3173bb0a3f2 (patch)
tree6ddcca5078aab6eaaa29c6631a25be3eaefd2d7c
parent83864b470b030a7d1bd0a1b46d3be75ce301304c (diff)
parent930ef9ddd663dcda5726b5d33c54c49a2f4f97d6 (diff)
downloadphp-git-93e35137aaba98c0a000ed442320e3173bb0a3f2.tar.gz
Merge remote-tracking branch 'remotes/cataphract/strtr_wu94_54' into PHP-5.4
* remotes/cataphract/strtr_wu94_54: Fixed inconsequential bug in strtr() Remove _GNU_SOURCE, add local heap sort The compiler can figure this out Remove unused block strtr() with 2nd param array - optimization Refactoring, bugs & leaks Optimize strtr w/ 2nd arg array
-rw-r--r--ext/standard/string.c447
1 files changed, 360 insertions, 87 deletions
diff --git a/ext/standard/string.c b/ext/standard/string.c
index 29115fea7a..58b5483651 100644
--- a/ext/standard/string.c
+++ b/ext/standard/string.c
@@ -23,6 +23,7 @@
/* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
#include <stdio.h>
+#include <stdint.h>
#include "php.h"
#include "php_rand.h"
#include "php_string.h"
@@ -57,6 +58,7 @@
#include "php_globals.h"
#include "basic_functions.h"
#include "php_smart_str.h"
+#include <Zend/zend_exceptions.h>
#ifdef ZTS
#include "TSRM.h"
#endif
@@ -132,7 +134,7 @@ static char *php_bin2hex(const unsigned char *old, const size_t oldlen, size_t *
size_t i, j;
result = (unsigned char *) safe_emalloc(oldlen, 2 * sizeof(char), 1);
-
+
for (i = j = 0; i < oldlen; i++) {
result[j++] = hexconvtab[old[i] >> 4];
result[j++] = hexconvtab[old[i] & 15];
@@ -2772,112 +2774,383 @@ PHPAPI char *php_strtr(char *str, int len, char *str_from, char *str_to, int trl
}
/* }}} */
-/* {{{ php_strtr_array
- */
-static void php_strtr_array(zval *return_value, char *str, int slen, HashTable *hash)
+/* {{{ Definitions for php_strtr_array */
+typedef size_t STRLEN; /* STRLEN should be unsigned */
+typedef uint16_t HASH;
+typedef struct {
+ HASH table_mask;
+ STRLEN entries[1];
+} SHIFT_TAB;
+typedef struct {
+ HASH table_mask;
+ int entries[1];
+} HASH_TAB;
+typedef struct {
+ const char *s;
+ STRLEN l;
+} STR;
+typedef struct _pat_and_repl {
+ STR pat;
+ STR repl;
+} PATNREPL;
+
+#define S(a) ((a)->s)
+#define L(a) ((a)->l)
+
+#define SHIFT_TAB_BITS 13
+#define HASH_TAB_BITS 10 /* should be less than sizeof(HASH) * 8 */
+#define SHIFT_TAB_SIZE (1U << SHIFT_TAB_BITS)
+#define HASH_TAB_SIZE (1U << HASH_TAB_BITS)
+
+typedef struct {
+ int B; /* size of suffixes */
+ int Bp; /* size of prefixes */
+ STRLEN m; /* minimum pattern length */
+ int patnum; /* number of patterns */
+ SHIFT_TAB *shift; /* table mapping hash to allowed shift */
+ HASH_TAB *hash; /* table mapping hash to int (pair of pointers) */
+ HASH *prefix; /* array of hashes of prefixes by pattern suffix hash order */
+ PATNREPL *patterns; /* array of prefixes by pattern suffix hash order */
+} PPRES;
+/* }}} */
+
+/* {{{ php_strtr_hash */
+static inline HASH php_strtr_hash(const char *str, int len)
{
- zval **entry;
- char *string_key;
- uint string_key_len;
- zval **trans;
- zval ctmp;
- ulong num_key;
- int minlen = 128*1024;
- int maxlen = 0, pos, len, found;
- char *key;
- HashPosition hpos;
- smart_str result = {0};
- HashTable tmp_hash;
-
- zend_hash_init(&tmp_hash, zend_hash_num_elements(hash), NULL, NULL, 0);
- zend_hash_internal_pointer_reset_ex(hash, &hpos);
- while (zend_hash_get_current_data_ex(hash, (void **)&entry, &hpos) == SUCCESS) {
- switch (zend_hash_get_current_key_ex(hash, &string_key, &string_key_len, &num_key, 0, &hpos)) {
- case HASH_KEY_IS_STRING:
- len = string_key_len-1;
- if (len < 1) {
- zend_hash_destroy(&tmp_hash);
- RETURN_FALSE;
- }
- zend_hash_add(&tmp_hash, string_key, string_key_len, entry, sizeof(zval*), NULL);
- if (len > maxlen) {
- maxlen = len;
- }
- if (len < minlen) {
- minlen = len;
- }
- break;
+ HASH res = 0;
+ int i;
+ for (i = 0; i < len; i++) {
+ res = res * 33 + (unsigned char)str[i];
+ }
+
+ return res;
+}
+/* }}} */
+/* {{{ php_strtr_populate_shift */
+static inline void php_strtr_populate_shift(PATNREPL *patterns, int patnum, int B, STRLEN m, SHIFT_TAB *shift)
+{
+ int i;
+ STRLEN j,
+ max_shift;
+
+ max_shift = m - B + 1;
+ for (i = 0; i < SHIFT_TAB_SIZE; i++) {
+ shift->entries[i] = max_shift;
+ }
+ for (i = 0; i < patnum; i++) {
+ for (j = 0; j < m - B + 1; j++) {
+ HASH h = php_strtr_hash(&S(&patterns[i].pat)[j], B) & shift->table_mask;
+ assert((long long) m - (long long) j - B >= 0);
+ shift->entries[h] = MIN(shift->entries[h], m - j - B);
+ }
+ }
+}
+/* }}} */
+/* {{{ php_strtr_compare_hash_suffix */
+static int php_strtr_compare_hash_suffix(const void *a, const void *b, void *ctx_g)
+{
+ const PPRES *res = ctx_g;
+ const PATNREPL *pnr_a = a,
+ *pnr_b = b;
+ HASH hash_a = php_strtr_hash(&S(&pnr_a->pat)[res->m - res->B], res->B)
+ & res->hash->table_mask,
+ hash_b = php_strtr_hash(&S(&pnr_b->pat)[res->m - res->B], res->B)
+ & res->hash->table_mask;
+ /* TODO: don't recalculate the hashes all the time */
+ if (hash_a > hash_b) {
+ return 1;
+ } else if (hash_a < hash_b) {
+ return -1;
+ } else {
+ /* longer patterns must be sorted first */
+ if (L(&pnr_a->pat) > L(&pnr_b->pat)) {
+ return -1;
+ } else if (L(&pnr_a->pat) < L(&pnr_b->pat)) {
+ return 1;
+ } else {
+ return 0;
+ }
+ }
+}
+/* }}} */
+/* {{{ Sorting (no zend_qsort_r in this PHP version) */
+#define HS_LEFT(i) ((i) * 2 + 1)
+#define HS_RIGHT(i) ((i) * 2 + 2)
+#define HS_PARENT(i) (((i) - 1) / 2);
+#define HS_OFF(data, i) ((void *)(&((data)->arr)[i]))
+#define HS_CMP_CALL(data, i1, i2) \
+ (php_strtr_compare_hash_suffix(HS_OFF((data), (i1)), HS_OFF((data), (i2)), (data)->res))
+struct hs_data {
+ PATNREPL *arr;
+ size_t nel;
+ size_t heapel;
+ PPRES *res;
+};
+static inline void php_strtr_swap(PATNREPL *a, PATNREPL *b)
+{
+ PATNREPL tmp = *a;
+ *a = *b;
+ *b = tmp;
+}
+static inline void php_strtr_fix_heap(struct hs_data *data, size_t i)
+{
+ size_t li = HS_LEFT(i),
+ ri = HS_RIGHT(i),
+ largei;
+ if (li < data->heapel && HS_CMP_CALL(data, li, i) > 0) {
+ largei = li;
+ } else {
+ largei = i;
+ }
+ if (ri < data->heapel && HS_CMP_CALL(data, ri, largei) > 0) {
+ largei = ri;
+ }
+ if (largei != i) {
+ php_strtr_swap(HS_OFF(data, i), HS_OFF(data, largei));
+ php_strtr_fix_heap(data, largei);
+ }
+}
+static inline void php_strtr_build_heap(struct hs_data *data)
+{
+ size_t i;
+ for (i = data->nel / 2; i > 0; i--) {
+ php_strtr_fix_heap(data, i - 1);
+ }
+}
+static inline void php_strtr_heapsort(PATNREPL *arr, size_t nel, PPRES *res)
+{
+ struct hs_data data = { arr, nel, nel, res };
+ size_t i;
+ php_strtr_build_heap(&data);
+ for (i = nel; i > 1; i--) {
+ php_strtr_swap(arr, HS_OFF(&data, i - 1));
+ data.heapel--;
+ php_strtr_fix_heap(&data, 0);
+ }
+}
+/* }}} */
+/* {{{ php_strtr_free_strp */
+static void php_strtr_free_strp(void *strp)
+{
+ STR_FREE(*(char**)strp);
+}
+/* }}} */
+/* {{{ php_strtr_array_prepare_repls */
+static PATNREPL *php_strtr_array_prepare_repls(int slen, HashTable *pats, zend_llist **allocs, int *outsize)
+{
+ PATNREPL *patterns;
+ HashPosition hpos;
+ zval **entry;
+ int num_pats = zend_hash_num_elements(pats),
+ i;
+
+ patterns = safe_emalloc(num_pats, sizeof(*patterns), 0);
+ *allocs = emalloc(sizeof **allocs);
+ zend_llist_init(*allocs, sizeof(void*), &php_strtr_free_strp, 0);
+
+ for (i = 0, zend_hash_internal_pointer_reset_ex(pats, &hpos);
+ zend_hash_get_current_data_ex(pats, (void **)&entry, &hpos) == SUCCESS;
+ zend_hash_move_forward_ex(pats, &hpos)) {
+ char *string_key;
+ uint string_key_len;
+ ulong num_key;
+ zval *tzv = NULL;
+
+ switch (zend_hash_get_current_key_ex(pats, &string_key, &string_key_len, &num_key, 0, &hpos)) {
+ case HASH_KEY_IS_LONG:
+ string_key_len = 1 + zend_spprintf(&string_key, 0, "%ld", (long)num_key);
+ zend_llist_add_element(*allocs, &string_key);
+ /* break missing intentionally */
+
+ case HASH_KEY_IS_STRING:
+ string_key_len--; /* exclude final '\0' */
+ if (string_key_len == 0) { /* empty string given as pattern */
+ efree(patterns);
+ zend_llist_destroy(*allocs);
+ efree(*allocs);
+ *allocs = NULL;
+ return NULL;
+ }
+ if (string_key_len > slen) { /* this pattern can never match */
+ continue;
+ }
- case HASH_KEY_IS_LONG:
- Z_TYPE(ctmp) = IS_LONG;
- Z_LVAL(ctmp) = num_key;
+ if (Z_TYPE_PP(entry) != IS_STRING) {
+ tzv = *entry;
+ zval_addref_p(tzv);
+ SEPARATE_ZVAL(&tzv);
+ convert_to_string(tzv);
+ entry = &tzv;
+ zend_llist_add_element(*allocs, &Z_STRVAL_PP(entry));
+ }
- convert_to_string(&ctmp);
- len = Z_STRLEN(ctmp);
- zend_hash_add(&tmp_hash, Z_STRVAL(ctmp), len+1, entry, sizeof(zval*), NULL);
- zval_dtor(&ctmp);
+ S(&patterns[i].pat) = string_key;
+ L(&patterns[i].pat) = string_key_len;
+ S(&patterns[i].repl) = Z_STRVAL_PP(entry);
+ L(&patterns[i].repl) = Z_STRLEN_PP(entry);
+ i++;
- if (len > maxlen) {
- maxlen = len;
- }
- if (len < minlen) {
- minlen = len;
- }
- break;
+ if (tzv) {
+ efree(tzv);
+ }
}
- zend_hash_move_forward_ex(hash, &hpos);
}
- key = emalloc(maxlen+1);
- pos = 0;
+ *outsize = i;
+ return patterns;
+}
+/* }}} */
- while (pos < slen) {
- if ((pos + maxlen) > slen) {
- maxlen = slen - pos;
- }
+/* {{{ PPRES *php_strtr_array_prepare(STR *text, PATNREPL *patterns, int patnum, int B, int Bp) */
+static PPRES *php_strtr_array_prepare(STR *text, PATNREPL *patterns, int patnum, int B, int Bp)
+{
+ int i;
+ PPRES *res = emalloc(sizeof *res);
- found = 0;
- memcpy(key, str+pos, maxlen);
+ res->m = (STRLEN)-1;
+ for (i = 0; i < patnum; i++) {
+ if (L(&patterns[i].pat) < res->m) {
+ res->m = L(&patterns[i].pat);
+ }
+ }
+ assert(res->m > 0);
+ res->B = B = MIN(B, res->m);
+ res->Bp = Bp = MIN(Bp, res->m);
- for (len = maxlen; len >= minlen; len--) {
- key[len] = 0;
+ res->shift = safe_emalloc(SHIFT_TAB_SIZE, sizeof(*res->shift->entries), sizeof(*res->shift));
+ res->shift->table_mask = SHIFT_TAB_SIZE - 1;
+ php_strtr_populate_shift(patterns, patnum, B, res->m, res->shift);
- if (zend_hash_find(&tmp_hash, key, len+1, (void**)&trans) == SUCCESS) {
- char *tval;
- int tlen;
- zval tmp;
+ res->hash = safe_emalloc(HASH_TAB_SIZE, sizeof(*res->hash->entries), sizeof(*res->hash));
+ res->hash->table_mask = HASH_TAB_SIZE - 1;
- if (Z_TYPE_PP(trans) != IS_STRING) {
- tmp = **trans;
- zval_copy_ctor(&tmp);
- convert_to_string(&tmp);
- tval = Z_STRVAL(tmp);
- tlen = Z_STRLEN(tmp);
- } else {
- tval = Z_STRVAL_PP(trans);
- tlen = Z_STRLEN_PP(trans);
- }
+ res->patterns = safe_emalloc(patnum, sizeof(*res->patterns), 0);
+ memcpy(res->patterns, patterns, sizeof(*patterns) * patnum);
+ php_strtr_heapsort(res->patterns, patnum, res);
- smart_str_appendl(&result, tval, tlen);
- pos += len;
- found = 1;
+ res->prefix = safe_emalloc(patnum, sizeof(*res->prefix), 0);
+ for (i = 0; i < patnum; i++) {
+ res->prefix[i] = php_strtr_hash(S(&res->patterns[i].pat), Bp);
+ }
- if (Z_TYPE_PP(trans) != IS_STRING) {
- zval_dtor(&tmp);
- }
- break;
+ /* Initialize the rest of ->hash */
+ for (i = 0; i < HASH_TAB_SIZE; i++) {
+ res->hash->entries[i] = -1;
+ }
+ {
+ HASH last_h = -1; /* assumes not all bits are used in res->hash */
+ /* res->patterns is already ordered by hash.
+ * Make res->hash->entries[h] de index of the first pattern in
+ * res->patterns that has hash h */
+ for (i = 0; i < patnum; i++) {
+ HASH h = php_strtr_hash(&S(&res->patterns[i].pat)[res->m - res->B], res->B)
+ & res->hash->table_mask;
+ if (h != last_h) {
+ res->hash->entries[h] = i;
+ last_h = h;
}
}
+ }
+ res->hash->entries[HASH_TAB_SIZE] = patnum; /* OK, we effectively allocated SIZE+1 */
+ for (i = HASH_TAB_SIZE - 1; i >= 0; i--) {
+ if (res->hash->entries[i] == -1) {
+ res->hash->entries[i] = res->hash->entries[i + 1];
+ }
+ }
+
+ res->patnum = patnum;
- if (! found) {
- smart_str_appendc(&result, str[pos++]);
+ return res;
+}
+/* }}} */
+/* {{{ php_strtr_array_destroy_ppres(PPRES *d) */
+static void php_strtr_array_destroy_ppres(PPRES *d)
+{
+ efree(d->shift);
+ efree(d->hash);
+ efree(d->prefix);
+ efree(d->patterns);
+ efree(d);
+}
+/* }}} */
+
+/* {{{ php_strtr_array_do_repl(STR *text, PPRES *d, zval *return_value) */
+static void php_strtr_array_do_repl(STR *text, PPRES *d, zval *return_value)
+{
+ STRLEN pos = 0,
+ nextwpos = 0,
+ lastpos = L(text) - d->m;
+ smart_str result = {0};
+
+ while (pos <= lastpos) {
+ HASH h = php_strtr_hash(&S(text)[pos + d->m - d->B], d->B) & d->shift->table_mask;
+ STRLEN shift = d->shift->entries[h];
+
+ if (shift > 0) {
+ pos += shift;
+ } else {
+ HASH h2 = h & d->hash->table_mask,
+ prefix_h = php_strtr_hash(&S(text)[pos], d->Bp);
+
+ int offset_start = d->hash->entries[h2],
+ offset_end = d->hash->entries[h2 + 1], /* exclusive */
+ i = 0;
+
+ for (i = offset_start; i < offset_end; i++) {
+ PATNREPL *pnr;
+ if (d->prefix[i] != prefix_h)
+ continue;
+
+ pnr = &d->patterns[i];
+ if (L(&pnr->pat) > L(text) - pos ||
+ memcmp(S(&pnr->pat), &S(text)[pos], L(&pnr->pat)) != 0)
+ continue;
+
+ smart_str_appendl(&result, &S(text)[nextwpos], pos - nextwpos);
+ smart_str_appendl(&result, S(&pnr->repl), L(&pnr->repl));
+ pos += L(&pnr->pat);
+ nextwpos = pos;
+ goto end_outer_loop;
+ }
+
+ pos++;
+end_outer_loop: ;
}
}
- efree(key);
- zend_hash_destroy(&tmp_hash);
- smart_str_0(&result);
- RETVAL_STRINGL(result.c, result.len, 0);
+ smart_str_appendl(&result, &S(text)[nextwpos], L(text) - nextwpos);
+
+ if (result.c != NULL) {
+ smart_str_0(&result);
+ RETVAL_STRINGL(result.c, result.len, 0);
+ } else {
+ RETURN_EMPTY_STRING();
+ }
+}
+/* }}} */
+
+/* {{{ php_strtr_array */
+static void php_strtr_array(zval *return_value, char *str, int slen, HashTable *pats)
+{
+ PPRES *data;
+ STR text;
+ PATNREPL *patterns;
+ int patterns_len;
+ zend_llist *allocs;
+
+ S(&text) = str;
+ L(&text) = slen;
+
+ patterns = php_strtr_array_prepare_repls(slen, pats, &allocs, &patterns_len);
+ if (patterns == NULL) {
+ RETURN_FALSE;
+ }
+ data = php_strtr_array_prepare(&text, patterns, patterns_len, 2, 2);
+ efree(patterns);
+ php_strtr_array_do_repl(&text, data, return_value);
+ php_strtr_array_destroy_ppres(data);
+ zend_llist_destroy(allocs);
+ efree(allocs);
}
/* }}} */