summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ext/standard/string.c351
1 files changed, 265 insertions, 86 deletions
diff --git a/ext/standard/string.c b/ext/standard/string.c
index 29115fea7a..dc92e8e085 100644
--- a/ext/standard/string.c
+++ b/ext/standard/string.c
@@ -22,7 +22,9 @@
/* Synced with php 3.0 revision 1.193 1999-06-16 [ssb] */
+#define _GNU_SOURCE 1
#include <stdio.h>
+#include <stdint.h>
#include "php.h"
#include "php_rand.h"
#include "php_string.h"
@@ -57,6 +59,7 @@
#include "php_globals.h"
#include "basic_functions.h"
#include "php_smart_str.h"
+#include <Zend/zend_exceptions.h>
#ifdef ZTS
#include "TSRM.h"
#endif
@@ -2772,112 +2775,288 @@ PHPAPI char *php_strtr(char *str, int len, char *str_from, char *str_to, int trl
}
/* }}} */
-/* {{{ php_strtr_array
- */
-static void php_strtr_array(zval *return_value, char *str, int slen, HashTable *hash)
+/* {{{ Definitions for php_strtr_array */
+typedef size_t STRLEN; /* STRLEN should be unsigned */
+typedef uint16_t HASH;
+typedef struct {
+ HASH table_mask;
+ STRLEN entries[1];
+} SHIFT_TAB;
+typedef struct {
+ HASH table_mask;
+ int entries[1];
+} HASH_TAB;
+typedef struct {
+ const char *s;
+ STRLEN l;
+} STR;
+typedef struct _match_node MATCH_NODE;
+struct _match_node {
+ STRLEN pos;
+ MATCH_NODE *next;
+};
+typedef struct _pat_and_repl {
+ STR pat;
+ STR repl;
+} PATNREPL;
+
+#define S(a) ((a)->s)
+#define L(a) ((a)->l)
+
+#define SHIFT_TAB_BITS 13
+#define HASH_TAB_BITS 10 /* should be less than sizeof(HASH) * 8 */
+#define SHIFT_TAB_SIZE (1U << SHIFT_TAB_BITS)
+#define HASH_TAB_SIZE (1U << HASH_TAB_BITS)
+
+typedef struct {
+ int B; /* size of suffixes */
+ int Bp; /* size of prefixes */
+ STRLEN m; /* minimum pattern length */
+ int patnum; /* number of patterns */
+ SHIFT_TAB *shift; /* table mapping hash to allowed shift */
+ HASH_TAB *hash; /* table mapping hash to int (pair of pointers) */
+ HASH *prefix; /* array of hashes of prefixes by pattern suffix hash order */
+ PATNREPL *patterns; /* array of prefixes by pattern suffix hash order */
+} PPRES;
+/* }}} */
+
+/* {{{ php_strtr_hash */
+static inline HASH php_strtr_hash(const char *str, int len)
{
- zval **entry;
- char *string_key;
- uint string_key_len;
- zval **trans;
- zval ctmp;
- ulong num_key;
- int minlen = 128*1024;
- int maxlen = 0, pos, len, found;
- char *key;
- HashPosition hpos;
- smart_str result = {0};
- HashTable tmp_hash;
-
- zend_hash_init(&tmp_hash, zend_hash_num_elements(hash), NULL, NULL, 0);
- zend_hash_internal_pointer_reset_ex(hash, &hpos);
- while (zend_hash_get_current_data_ex(hash, (void **)&entry, &hpos) == SUCCESS) {
- switch (zend_hash_get_current_key_ex(hash, &string_key, &string_key_len, &num_key, 0, &hpos)) {
- case HASH_KEY_IS_STRING:
- len = string_key_len-1;
- if (len < 1) {
- zend_hash_destroy(&tmp_hash);
- RETURN_FALSE;
- }
- zend_hash_add(&tmp_hash, string_key, string_key_len, entry, sizeof(zval*), NULL);
- if (len > maxlen) {
- maxlen = len;
- }
- if (len < minlen) {
- minlen = len;
- }
- break;
+ HASH res = 0;
+ int i;
+ for (i = 0; i < len; i++) {
+ res = (res << 5) + res + (unsigned char)str[i];
+ }
- case HASH_KEY_IS_LONG:
- Z_TYPE(ctmp) = IS_LONG;
- Z_LVAL(ctmp) = num_key;
+ return res;
+}
+/* }}} */
+/* {{{ php_strtr_populate_shift */
+static inline void php_strtr_populate_shift(PATNREPL *patterns, int patnum, int B, STRLEN m, SHIFT_TAB *shift)
+{
+ int i;
+ STRLEN j,
+ max_shift;
+
+ max_shift = m - B + 1;
+ for (i = 0; i < SHIFT_TAB_SIZE; i++) {
+ shift->entries[i] = max_shift;
+ }
+ for (i = 0; i < patnum; i++) {
+ for (j = 0; j < m - B + 1; j++) {
+ HASH h = php_strtr_hash(&S(&patterns[i].pat)[j], B) & shift->table_mask;
+ assert((long long) m - (long long) j - B >= 0);
+ shift->entries[h] = MIN(shift->entries[h], m - j - B);
+ }
+ }
+}
+/* }}} */
+/* {{{ php_strtr_compare_hash_suffix */
+static int php_strtr_compare_hash_suffix(const void *a, const void *b, void *ctx_g)
+{
+ const PPRES *res = ctx_g;
+ const PATNREPL *pnr_a = a,
+ *pnr_b = b;
+ HASH hash_a = php_strtr_hash(&S(&pnr_a->pat)[res->m - res->B], res->B)
+ & res->hash->table_mask,
+ hash_b = php_strtr_hash(&S(&pnr_b->pat)[res->m - res->B], res->B)
+ & res->hash->table_mask;
+ /* TODO: don't recalculate the hashes all the time */
+ return hash_a - hash_b;
+}
+/* }}} */
- convert_to_string(&ctmp);
- len = Z_STRLEN(ctmp);
- zend_hash_add(&tmp_hash, Z_STRVAL(ctmp), len+1, entry, sizeof(zval*), NULL);
- zval_dtor(&ctmp);
+/* {{{ PPRES *php_strtr_array_prepare(STR *text, PATNREPL *patterns, int patnum, int B, int Bp) */
+static PPRES *php_strtr_array_prepare(STR *text, PATNREPL *patterns, int patnum, int B, int Bp)
+{
+ int i;
+ PPRES *res = emalloc(sizeof *res);
- if (len > maxlen) {
- maxlen = len;
- }
- if (len < minlen) {
- minlen = len;
- }
- break;
+ res->m = (STRLEN)-1;
+ for (i = 0; i < patnum; i++) {
+ if (L(&patterns[i].pat) < res->m) {
+ res->m = L(&patterns[i].pat);
}
- zend_hash_move_forward_ex(hash, &hpos);
}
+ assert(res->m > 0);
+ res->B = B = MIN(B, res->m);
+ res->Bp = Bp = MIN(Bp, res->m);
- key = emalloc(maxlen+1);
- pos = 0;
+ res->shift = safe_emalloc(SHIFT_TAB_SIZE, sizeof(*res->shift->entries), sizeof(*res->shift));
+ res->shift->table_mask = SHIFT_TAB_SIZE - 1;
+ php_strtr_populate_shift(patterns, patnum, B, res->m, res->shift);
- while (pos < slen) {
- if ((pos + maxlen) > slen) {
- maxlen = slen - pos;
- }
+ res->hash = safe_emalloc(HASH_TAB_SIZE, sizeof(*res->hash->entries), sizeof(*res->shift));
+ res->hash->table_mask = HASH_TAB_SIZE - 1;
+
+ res->patterns = safe_emalloc(patnum, sizeof(*res->patterns), 0);
+ memcpy(res->patterns, patterns, sizeof(*patterns) * patnum);
+ qsort_r(res->patterns, patnum, sizeof(*res->patterns), php_strtr_compare_hash_suffix, res);
+
+ res->prefix = safe_emalloc(patnum, sizeof(*res->prefix), 0);
+ for (i = 0; i < patnum; i++) {
+ res->prefix[i] = php_strtr_hash(S(&res->patterns[i].pat), Bp);
+ }
- found = 0;
- memcpy(key, str+pos, maxlen);
+ /* Initialize the rest of ->hash */
+ for (i = 0; i < HASH_TAB_SIZE; i++) {
+ res->hash->entries[i] = -1;
+ }
+ {
+ HASH last_h = -1; /* assumes not all bits are used in res->hash */
+ /* res->patterns is already ordered by hash.
+ * Make res->hash->entries[h] de index of the first pattern in
+ * res->patterns that has hash h */
+ for (i = 0; i < patnum; i++) {
+ HASH h = php_strtr_hash(&S(&res->patterns[i].pat)[res->m - res->B], res->B)
+ & res->hash->table_mask;
+ if (h != last_h) {
+ res->hash->entries[h] = i;
+ last_h = h;
+ }
+ }
+ }
+ res->hash->entries[HASH_TAB_SIZE] = patnum;
+ for (i = HASH_TAB_SIZE - 1; i >= 0; i--) {
+ if (res->hash->entries[i] == -1) {
+ res->hash->entries[i] = res->hash->entries[i + 1];
+ }
+ }
- for (len = maxlen; len >= minlen; len--) {
- key[len] = 0;
+ res->patnum = patnum;
- if (zend_hash_find(&tmp_hash, key, len+1, (void**)&trans) == SUCCESS) {
- char *tval;
- int tlen;
- zval tmp;
+ return res;
+}
+/* }}} */
+/* {{{ php_strtr_array_destroy_ppres(PPRES *d) */
+static void php_strtr_array_destroy_ppres(PPRES *d)
+{
+ efree(d->shift);
+ efree(d->hash);
+ efree(d->prefix);
+ efree(d->patterns);
+ efree(d);
+}
+/* }}} */
- if (Z_TYPE_PP(trans) != IS_STRING) {
- tmp = **trans;
- zval_copy_ctor(&tmp);
- convert_to_string(&tmp);
- tval = Z_STRVAL(tmp);
- tlen = Z_STRLEN(tmp);
- } else {
- tval = Z_STRVAL_PP(trans);
- tlen = Z_STRLEN_PP(trans);
- }
+/* {{{ php_strtr_array_do_repl(STR *text, PPRES *d, zval *return_value) */
+static void php_strtr_array_do_repl(STR *text, PPRES *d, zval *return_value)
+{
+ STRLEN pos = 0,
+ lastpos = L(text) - d->m;
+ smart_str result = {0};
- smart_str_appendl(&result, tval, tlen);
- pos += len;
- found = 1;
+ while (pos <= lastpos) {
+ HASH h = php_strtr_hash(&S(text)[pos + d->m - d->B], d->B) & d->shift->table_mask;
+ STRLEN shift = d->shift->entries[h];
- if (Z_TYPE_PP(trans) != IS_STRING) {
- zval_dtor(&tmp);
- }
- break;
+ if (shift > 0) {
+ smart_str_appendl(&result, &S(text)[pos], shift);
+ pos += shift;
+ } else {
+ HASH h2 = h & d->hash->table_mask,
+ prefix_h = php_strtr_hash(&S(text)[pos], d->Bp);
+
+ int offset_start = d->hash->entries[h2],
+ offset_end = d->hash->entries[h2 + 1], /* exclusive */
+ i = 0;
+
+ for (i = offset_start; i < offset_end; i++) {
+ PATNREPL *pnr;
+ if (d->prefix[i] != prefix_h)
+ continue;
+
+ pnr = &d->patterns[i];
+ if (L(&pnr->pat) > L(text) - pos ||
+ memcmp(S(&pnr->pat), &S(text)[pos], L(&pnr->pat)) != 0)
+ continue;
+
+ smart_str_appendl(&result, S(&pnr->repl), (int)L(&pnr->repl));
+ pos += L(&pnr->pat);
+ goto end_outer_loop;
}
+
+ smart_str_appendc(&result, S(text)[pos]);
+ pos++;
+end_outer_loop: ;
}
+ }
+
+ if (pos < L(text)) {
+ smart_str_appendl(&result, &S(text)[pos], (int)(L(text) - pos));
+ }
+
+ if (result.c != NULL) {
+ smart_str_0(&result);
+ RETVAL_STRINGL(result.c, result.len, 0);
+ } else {
+ RETURN_EMPTY_STRING();
+ }
+}
+/* }}} */
+
+/* {{{ php_strtr_array */
+static void php_strtr_array(zval *return_value, char *str, int slen, HashTable *pats)
+{
+ PPRES *data;
+ STR text;
+ PATNREPL *patterns;
+ HashPosition hpos;
+ zval **entry;
+ int num_pats = zend_hash_num_elements(pats),
+ i;
+
+ S(&text) = str;
+ L(&text) = slen;
+ patterns = safe_emalloc(num_pats, sizeof(*patterns), 0);
+
+ for (i = 0, zend_hash_internal_pointer_reset_ex(pats, &hpos);
+ zend_hash_get_current_data_ex(pats, (void **)&entry, &hpos) == SUCCESS;
+ i++, zend_hash_move_forward_ex(pats, &hpos)) {
+ char *string_key;
+ uint string_key_len;
+ ulong num_key;
+ int free_str = 0,
+ free_repl = 0;
+ zval *tzv;
+
+ switch (zend_hash_get_current_key_ex(pats, &string_key, &string_key_len, &num_key, 0, &hpos)) {
+ case HASH_KEY_IS_LONG:
+ string_key_len = 1 + zend_spprintf(&string_key, 0, "%ld", (long)num_key);
+ free_str = 1;
+ /* break missing intentionally */
+
+ case HASH_KEY_IS_STRING:
+ string_key_len--; /* exclude final '\0' */
+ if (string_key_len == 0) { /* empty string given as pattern */
+ efree(patterns);
+ RETURN_FALSE;
+ }
+ if (string_key_len > slen) { /* this pattern can never match */
+ continue;
+ }
+
+ if (Z_TYPE_PP(entry) != IS_STRING) {
+ tzv = *entry;
+ zval_addref_p(tzv);
+ SEPARATE_ZVAL(&tzv);
+ convert_to_string(tzv);
+ entry = &tzv;
+ free_repl = 1;
+ }
- if (! found) {
- smart_str_appendc(&result, str[pos++]);
+ S(&patterns[i].pat) = string_key;
+ L(&patterns[i].pat) = string_key_len;
+ S(&patterns[i].repl) = Z_STRVAL_PP(entry);
+ L(&patterns[i].repl) = Z_STRLEN_PP(entry);
}
}
- efree(key);
- zend_hash_destroy(&tmp_hash);
- smart_str_0(&result);
- RETVAL_STRINGL(result.c, result.len, 0);
+ data = php_strtr_array_prepare(&text, patterns, i, 2, 2);
+ efree(patterns);
+ php_strtr_array_do_repl(&text, data, return_value);
+ php_strtr_array_destroy_ppres(data);
}
/* }}} */