diff options
| author | Andrei Zmievski <andrei@php.net> | 2005-08-11 23:36:07 +0000 |
|---|---|---|
| committer | Andrei Zmievski <andrei@php.net> | 2005-08-11 23:36:07 +0000 |
| commit | 264cec8be641d1c2ea0c95d915d506c4fdbc751a (patch) | |
| tree | e9f980e16f707842248125cf22efabec782cfece /ext | |
| parent | b80cb7bd2f721dad13a97a1300c6dc56934daaf7 (diff) | |
| download | php-git-264cec8be641d1c2ea0c95d915d506c4fdbc751a.tar.gz | |
Unicode support.
Diffstat (limited to 'ext')
40 files changed, 2619 insertions, 569 deletions
diff --git a/ext/bz2/bz2_filter.c b/ext/bz2/bz2_filter.c index 6abf36d65a..fc28823d37 100644 --- a/ext/bz2/bz2_filter.c +++ b/ext/bz2/bz2_filter.c @@ -76,13 +76,20 @@ static php_stream_filter_status_t php_bz2_decompress_filter( while (buckets_in->head) { size_t bin = 0, desired; - bucket = php_stream_bucket_make_writeable(buckets_in->head TSRMLS_CC); - while (bin < bucket->buflen) { - desired = bucket->buflen - bin; + bucket = buckets_in->head; + + if (bucket->is_unicode) { + /* decompression not allowed for unicode data */ + return PSFS_ERR_FATAL; + } + + bucket = php_stream_bucket_make_writeable(bucket TSRMLS_CC); + while (bin < bucket->buf.str.len) { + desired = bucket->buf.str.len - bin; if (desired > data->inbuf_len) { desired = data->inbuf_len; } - memcpy(data->strm.next_in, bucket->buf + bin, desired); + memcpy(data->strm.next_in, bucket->buf.str.val + bin, desired); data->strm.avail_in = desired; status = BZ2_bzDecompress(&(data->strm)); @@ -148,7 +155,8 @@ static void php_bz2_decompress_dtor(php_stream_filter *thisfilter TSRMLS_DC) static php_stream_filter_ops php_bz2_decompress_ops = { php_bz2_decompress_filter, php_bz2_decompress_dtor, - "bzip2.decompress" + "bzip2.decompress", + PSFO_FLAG_ACCEPTS_STRING | PSFO_FLAG_OUTPUTS_STRING }; /* }}} */ @@ -181,14 +189,21 @@ static php_stream_filter_status_t php_bz2_compress_filter( while (buckets_in->head) { size_t bin = 0, desired; - bucket = php_stream_bucket_make_writeable(buckets_in->head TSRMLS_CC); + bucket = buckets_in->head; + + if (bucket->is_unicode) { + /* compression not allowed for unicode data */ + return PSFS_ERR_FATAL; + } + + bucket = php_stream_bucket_make_writeable(bucket TSRMLS_CC); - while (bin < bucket->buflen) { - desired = bucket->buflen - bin; + while (bin < bucket->buf.str.len) { + desired = bucket->buf.str.len - bin; if (desired > data->inbuf_len) { desired = data->inbuf_len; } - memcpy(data->strm.next_in, bucket->buf + bin, desired); + memcpy(data->strm.next_in, bucket->buf.str.val + bin, desired); data->strm.avail_in = desired; status = BZ2_bzCompress(&(data->strm), flags & PSFS_FLAG_FLUSH_CLOSE ? BZ_FINISH : (flags & PSFS_FLAG_FLUSH_INC ? BZ_FLUSH : BZ_RUN)); @@ -254,7 +269,8 @@ static void php_bz2_compress_dtor(php_stream_filter *thisfilter TSRMLS_DC) static php_stream_filter_ops php_bz2_compress_ops = { php_bz2_compress_filter, php_bz2_compress_dtor, - "bzip2.compress" + "bzip2.compress", + PSFO_FLAG_ACCEPTS_STRING | PSFO_FLAG_OUTPUTS_STRING }; /* }}} */ diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c index e007198573..b7fc1724cc 100644 --- a/ext/dom/php_dom.c +++ b/ext/dom/php_dom.c @@ -402,7 +402,7 @@ zend_object_value dom_objects_store_clone_obj(zval *zobject TSRMLS_DC) obj = &EG(objects_store).object_buckets[handle].bucket.obj; if (obj->clone == NULL) { - php_error(E_ERROR, "Trying to clone an uncloneable object of class %s", Z_OBJCE_P(zobject)->name); + php_error(E_ERROR, "Trying to clone an uncloneable object of class %v", Z_OBJCE_P(zobject)->name); } obj->clone(obj->object, &new_object TSRMLS_CC); @@ -420,7 +420,7 @@ zend_object_value dom_objects_store_clone_obj(zval *zobject TSRMLS_DC) zend_object_value dom_objects_ze1_clone_obj(zval *zobject TSRMLS_DC) { - php_error(E_ERROR, "Cannot clone object of class %s due to 'zend.ze1_compatibility_mode'", Z_OBJCE_P(zobject)->name); + php_error(E_ERROR, "Cannot clone object of class %v due to 'zend.ze1_compatibility_mode'", Z_OBJCE_P(zobject)->name); /* Return zobject->value.obj just to satisfy compiler */ return zobject->value.obj; } diff --git a/ext/dom/php_dom.h b/ext/dom/php_dom.h index 9fa2f1770e..210d2cfe1b 100644 --- a/ext/dom/php_dom.h +++ b/ext/dom/php_dom.h @@ -119,7 +119,7 @@ entry = zend_register_internal_class_ex(&ce, parent_ce, NULL TSRMLS_CC); #define DOM_GET_OBJ(__ptr, __id, __prtype, __intern) { \ __intern = (dom_object *)zend_object_store_get_object(__id TSRMLS_CC); \ if (__intern->ptr == NULL || !(__ptr = (__prtype)((php_libxml_node_ptr *)__intern->ptr)->node)) { \ - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Couldn't fetch %s", __intern->std.ce->name);\ + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Couldn't fetch %v", __intern->std.ce->name);\ RETURN_NULL();\ } \ } diff --git a/ext/iconv/iconv.c b/ext/iconv/iconv.c index 92e6a2e0df..6ee5309c6b 100644 --- a/ext/iconv/iconv.c +++ b/ext/iconv/iconv.c @@ -2279,6 +2279,8 @@ PHP_FUNCTION(iconv_get_encoding) } /* }}} */ +#ifdef SMG_0 /* UTODO: Needs updating for unicode */ + /* {{{ iconv stream filter */ typedef struct _php_iconv_stream_filter { iconv_t cd; @@ -2666,6 +2668,18 @@ static php_iconv_err_t php_iconv_stream_filter_unregister_factory(TSRMLS_D) return PHP_ICONV_ERR_SUCCESS; } /* }}} */ +#else /* Make dummy bypasses for the register/unregister loop */ +static php_iconv_err_t php_iconv_stream_filter_register_factory(TSRMLS_D) +{ + return PHP_ICONV_ERR_SUCCESS; +} +static php_iconv_err_t php_iconv_stream_filter_unregister_factory(TSRMLS_D) +{ + return PHP_ICONV_ERR_SUCCESS; +} +#endif /* unicode bypass */ + + /* }}} */ #endif diff --git a/ext/mysql/php_mysql.c b/ext/mysql/php_mysql.c index 06970a09cb..a24758c9fe 100644 --- a/ext/mysql/php_mysql.c +++ b/ext/mysql/php_mysql.c @@ -2047,7 +2047,7 @@ static void php_mysql_fetch_hash(INTERNAL_FUNCTION_PARAMETERS, int result_type, fcc.object_pp = &return_value; if (zend_call_function(&fci, &fcc TSRMLS_CC) == FAILURE) { - zend_throw_exception_ex(zend_exception_get_default(), 0 TSRMLS_CC, "Could not execute %s::%s()", ce->name, ce->constructor->common.function_name); + zend_throw_exception_ex(zend_exception_get_default(), 0 TSRMLS_CC, "Could not execute %v::%v()", ce->name, ce->constructor->common.function_name); } else { if (retval_ptr) { zval_ptr_dtor(&retval_ptr); @@ -2057,7 +2057,7 @@ static void php_mysql_fetch_hash(INTERNAL_FUNCTION_PARAMETERS, int result_type, efree(fci.params); } } else if (ctor_params) { - zend_throw_exception_ex(zend_exception_get_default(), 0 TSRMLS_CC, "Class %s does not have a constructor hence you cannot use ctor_params", ce->name); + zend_throw_exception_ex(zend_exception_get_default(), 0 TSRMLS_CC, "Class %v does not have a constructor hence you cannot use ctor_params", ce->name); } } } diff --git a/ext/simplexml/simplexml.c b/ext/simplexml/simplexml.c index deae3573dd..4382016c81 100644 --- a/ext/simplexml/simplexml.c +++ b/ext/simplexml/simplexml.c @@ -1128,7 +1128,7 @@ static zend_object_handlers sxe_ze1_object_handlers = { static zend_object_value sxe_object_ze1_clone(zval *zobject TSRMLS_DC) { - php_error(E_ERROR, "Cannot clone object of class %s due to 'zend.ze1_compatibility_mode'", Z_OBJCE_P(zobject)->name); + php_error(E_ERROR, "Cannot clone object of class %v due to 'zend.ze1_compatibility_mode'", Z_OBJCE_P(zobject)->name); /* Return zobject->value.obj just to satisfy compiler */ return zobject->value.obj; } diff --git a/ext/spl/spl_array.c b/ext/spl/spl_array.c index 7cf96cb1d0..2c24030e9e 100755 --- a/ext/spl/spl_array.c +++ b/ext/spl/spl_array.c @@ -320,8 +320,10 @@ static zval **spl_array_get_dimension_ptr_ptr(int check_inherited, zval *object, switch(Z_TYPE_P(offset)) { case IS_STRING: - if (zend_symtable_find(spl_array_get_hash_table(intern, 0 TSRMLS_CC), Z_STRVAL_P(offset), Z_STRLEN_P(offset)+1, (void **) &retval) == FAILURE) { - zend_error(E_NOTICE, "Undefined index: %s", Z_STRVAL_P(offset)); + case IS_BINARY: + case IS_UNICODE: + if (zend_u_symtable_find(spl_array_get_hash_table(intern, 0 TSRMLS_CC), Z_TYPE_P(offset), Z_UNIVAL_P(offset), Z_UNILEN_P(offset)+1, (void **) &retval) == FAILURE) { + zend_error(E_NOTICE, "Undefined index: %R", Z_TYPE_P(offset), Z_STRVAL_P(offset)); return &EG(uninitialized_zval_ptr); } else { return retval; @@ -396,8 +398,10 @@ static void spl_array_write_dimension_ex(int check_inherited, zval *object, zval } switch(Z_TYPE_P(offset)) { case IS_STRING: + case IS_BINARY: + case IS_UNICODE: value->refcount++; - zend_symtable_update(spl_array_get_hash_table(intern, 0 TSRMLS_CC), Z_STRVAL_P(offset), Z_STRLEN_P(offset)+1, (void**)&value, sizeof(void*), NULL); + zend_u_symtable_update(spl_array_get_hash_table(intern, 0 TSRMLS_CC), Z_TYPE_P(offset), Z_UNIVAL_P(offset), Z_UNILEN_P(offset)+1, (void**)&value, sizeof(void*), NULL); return; case IS_DOUBLE: case IS_RESOURCE: @@ -439,12 +443,12 @@ static void spl_array_unset_dimension_ex(int check_inherited, zval *object, zval switch(Z_TYPE_P(offset)) { case IS_STRING: if (spl_array_get_hash_table(intern, 0 TSRMLS_CC) == &EG(symbol_table)) { - if (zend_delete_global_variable(Z_STRVAL_P(offset), Z_STRLEN_P(offset) TSRMLS_CC)) { + if (zend_u_delete_global_variable(Z_TYPE_P(offset), Z_UNIVAL_P(offset), Z_UNILEN_P(offset) TSRMLS_CC)) { zend_error(E_NOTICE,"Undefined index: %s", Z_STRVAL_P(offset)); } } else { - if (zend_symtable_del(spl_array_get_hash_table(intern, 0 TSRMLS_CC), Z_STRVAL_P(offset), Z_STRLEN_P(offset)+1) == FAILURE) { - zend_error(E_NOTICE,"Undefined index: %s", Z_STRVAL_P(offset)); + if (zend_u_symtable_del(spl_array_get_hash_table(intern, 0 TSRMLS_CC), Z_TYPE_P(offset), Z_UNIVAL_P(offset), Z_UNILEN_P(offset)+1) == FAILURE) { + zend_error(E_NOTICE,"Undefined index: %R", Z_TYPE_P(offset), Z_UNIVAL_P(offset)); } } break; @@ -491,7 +495,9 @@ static int spl_array_has_dimension_ex(int check_inherited, zval *object, zval *o switch(Z_TYPE_P(offset)) { case IS_STRING: - return zend_symtable_exists(spl_array_get_hash_table(intern, 0 TSRMLS_CC), Z_STRVAL_P(offset), Z_STRLEN_P(offset)+1); + case IS_BINARY: + case IS_UNICODE: + return zend_u_symtable_exists(spl_array_get_hash_table(intern, 0 TSRMLS_CC), Z_TYPE_P(offset), Z_UNIVAL_P(offset), Z_UNILEN_P(offset)+1); case IS_DOUBLE: case IS_RESOURCE: case IS_BOOL: @@ -562,7 +568,7 @@ void spl_array_iterator_append(zval *object, zval *append_value TSRMLS_DC) /* {{ } if (Z_TYPE_P(intern->array) == IS_OBJECT) { - php_error_docref(NULL TSRMLS_CC, E_ERROR, "Cannot append properties to objects, use %s::offsetSet() instead", Z_OBJCE_P(object)->name); + php_error_docref(NULL TSRMLS_CC, E_ERROR, "Cannot append properties to objects, use %v::offsetSet() instead", Z_OBJCE_P(object)->name); } spl_array_write_dimension(object, NULL, append_value TSRMLS_CC); @@ -686,8 +692,10 @@ static int spl_array_skip_protected(spl_array_object *intern TSRMLS_DC) /* {{{ * if (Z_TYPE_P(intern->array) == IS_OBJECT) { do { - if (zend_hash_get_current_key_ex(aht, &string_key, &string_length, &num_key, 0, &intern->pos) == HASH_KEY_IS_STRING) { - if (!string_length || string_key[0]) { + if (zend_hash_get_current_key_ex(aht, &string_key, &string_length, &num_key, 0, &intern->pos) == UG(unicode)?HASH_KEY_IS_UNICODE:HASH_KEY_IS_STRING) { + if (!string_length || + ((UG(unicode) && ((UChar*)string_key)[0]) || + (!UG(unicode) && string_key[0]))) { return SUCCESS; } } else { @@ -1139,6 +1147,12 @@ SPL_METHOD(Array, key) case HASH_KEY_IS_STRING: RETVAL_STRINGL(string_key, string_length - 1, 0); break; + case HASH_KEY_IS_BINARY: + RETVAL_BINARYL(string_key, string_length - 1, 0); + break; + case HASH_KEY_IS_UNICODE: + RETVAL_UNICODEL((UChar*)string_key, string_length - 1, 0); + break; case HASH_KEY_IS_LONG: RETVAL_LONG(num_key); break; diff --git a/ext/spl/spl_functions.c b/ext/spl/spl_functions.c index cffdb89cd6..82a6bfb8b9 100755 --- a/ext/spl/spl_functions.c +++ b/ext/spl/spl_functions.c @@ -113,7 +113,7 @@ void spl_register_property( zend_class_entry * class_entry, char *prop_name, zva void spl_add_class_name(zval *list, zend_class_entry * pce, int allow, int ce_flags TSRMLS_DC) { if (!allow || (allow > 0 && pce->ce_flags & ce_flags) || (allow < 0 && !(pce->ce_flags & ce_flags))) { - size_t len = strlen(pce->name); + size_t len = pce->name_length; zval *tmp; if (zend_hash_find(Z_ARRVAL_P(list), pce->name, len+1, (void*)&tmp) == FAILURE) { diff --git a/ext/spl/spl_iterators.c b/ext/spl/spl_iterators.c index 10607de947..130acfcf89 100755 --- a/ext/spl/spl_iterators.c +++ b/ext/spl/spl_iterators.c @@ -1182,6 +1182,7 @@ ZEND_END_ARG_INFO(); static zend_function_entry spl_funcs_RecursiveFilterIterator[] = { SPL_ME(RecursiveFilterIterator, __construct, arginfo_parent_it___construct, ZEND_ACC_PUBLIC) + SPL_MA(ParentIterator, accept, RecursiveFilterIterator, hasChildren, NULL, ZEND_ACC_PUBLIC) SPL_ME(RecursiveFilterIterator, hasChildren, NULL, ZEND_ACC_PUBLIC) SPL_ME(RecursiveFilterIterator, getChildren, NULL, ZEND_ACC_PUBLIC) {NULL, NULL, NULL} @@ -1495,7 +1496,7 @@ SPL_METHOD(CachingIterator, __toString) intern = (spl_dual_it_object*)zend_object_store_get_object(getThis() TSRMLS_CC); if (!(intern->u.caching.flags & CIT_CALL_TOSTRING)) { - zend_throw_exception_ex(spl_ce_BadMethodCallException, 0 TSRMLS_CC, "%s does not fetch string value (see CachingIterator::__construct)", Z_OBJCE_P(getThis())->name); + zend_throw_exception_ex(spl_ce_BadMethodCallException, 0 TSRMLS_CC, "%v does not fetch string value (see CachingIterator::__construct)", Z_OBJCE_P(getThis())->name); } if (intern->u.caching.zstr) { RETURN_STRINGL(Z_STRVAL_P(intern->u.caching.zstr), Z_STRLEN_P(intern->u.caching.zstr), 1); diff --git a/ext/sqlite/sqlite.c b/ext/sqlite/sqlite.c index e3c58ccae4..e164634123 100644 --- a/ext/sqlite/sqlite.c +++ b/ext/sqlite/sqlite.c @@ -2082,7 +2082,7 @@ PHP_FUNCTION(sqlite_fetch_object) fcc.object_pp = &return_value; if (zend_call_function(&fci, &fcc TSRMLS_CC) == FAILURE) { - zend_throw_exception_ex(sqlite_ce_exception, 0 TSRMLS_CC, "Could not execute %s::%s()", class_name, ce->constructor->common.function_name); + zend_throw_exception_ex(sqlite_ce_exception, 0 TSRMLS_CC, "Could not execute %s::%v()", class_name, ce->constructor->common.function_name); } else { if (retval_ptr) { zval_ptr_dtor(&retval_ptr); diff --git a/ext/standard/array.c b/ext/standard/array.c index e34c915e2f..adf690b213 100644 --- a/ext/standard/array.c +++ b/ext/standard/array.c @@ -190,18 +190,26 @@ static int array_key_compare(const void *a, const void *b TSRMLS_DC) if (f->nKeyLength == 0) { Z_TYPE(first) = IS_LONG; Z_LVAL(first) = f->h; + } else if (f->key.type == IS_UNICODE) { + Z_TYPE(first) = IS_UNICODE; + Z_USTRVAL(first) = f->key.u.unicode; + Z_USTRLEN(first) = f->nKeyLength-1; } else { - Z_TYPE(first) = IS_STRING; - Z_STRVAL(first) = f->arKey; + Z_TYPE(first) = f->key.type; + Z_STRVAL(first) = f->key.u.string; Z_STRLEN(first) = f->nKeyLength-1; } if (s->nKeyLength == 0) { Z_TYPE(second) = IS_LONG; Z_LVAL(second) = s->h; + } else if (s->key.type == IS_UNICODE) { + Z_TYPE(second) = IS_UNICODE; + Z_USTRVAL(second) = s->key.u.unicode; + Z_USTRLEN(second) = s->nKeyLength-1; } else { - Z_TYPE(second) = IS_STRING; - Z_STRVAL(second) = s->arKey; + Z_TYPE(second) = s->key.type; + Z_STRVAL(second) = s->key.u.string; Z_STRLEN(second) = s->nKeyLength-1; } @@ -724,22 +732,30 @@ static int array_user_key_compare(const void *a, const void *b TSRMLS_DC) f = *((Bucket **) a); s = *((Bucket **) b); - if (f->nKeyLength) { - Z_STRVAL(key1) = estrndup(f->arKey, f->nKeyLength-1); - Z_STRLEN(key1) = f->nKeyLength-1; - Z_TYPE(key1) = IS_STRING; - } else { + if (f->nKeyLength == 0) { Z_LVAL(key1) = f->h; Z_TYPE(key1) = IS_LONG; - } - if (s->nKeyLength) { - Z_STRVAL(key2) = estrndup(s->arKey, s->nKeyLength-1); - Z_STRLEN(key2) = s->nKeyLength-1; - Z_TYPE(key2) = IS_STRING; + } else if (f->key.type == IS_UNICODE) { + Z_USTRVAL(key1) = eustrndup(f->key.u.unicode, f->nKeyLength-1); + Z_USTRLEN(key1) = f->nKeyLength-1; + Z_TYPE(key1) = IS_UNICODE; } else { - Z_LVAL(key2) = s->h; - Z_TYPE(key2) = IS_LONG; + Z_STRVAL(key1) = estrndup(f->key.u.string, f->nKeyLength-1); + Z_STRLEN(key1) = f->nKeyLength-1; + Z_TYPE(key1) = f->key.type; } + if (s->nKeyLength == 0) { + Z_LVAL(key2) = s->h; + Z_TYPE(key2) = IS_LONG; + } else if (s->key.type == IS_UNICODE) { + Z_USTRVAL(key2) = eustrndup(s->key.u.unicode, s->nKeyLength-1); + Z_USTRLEN(key2) = s->nKeyLength-1; + Z_TYPE(key2) = IS_UNICODE; + } else { + Z_STRVAL(key2) = estrndup(s->key.u.string, s->nKeyLength-1); + Z_STRLEN(key2) = s->nKeyLength-1; + Z_TYPE(key2) = s->key.type; + } status = call_user_function(EG(function_table), NULL, *BG(user_compare_func_name), &retval, 2, args TSRMLS_CC); @@ -942,6 +958,12 @@ PHP_FUNCTION(key) case HASH_KEY_IS_STRING: RETVAL_STRINGL(string_key, string_length - 1, 1); break; + case HASH_KEY_IS_BINARY: + RETVAL_BINARYL(string_key, string_length - 1, 1); + break; + case HASH_KEY_IS_UNICODE: + RETVAL_UNICODEL(string_key, string_length - 1, 1); + break; case HASH_KEY_IS_LONG: RETVAL_LONG(num_key); break; @@ -1085,11 +1107,20 @@ static int php_array_walk(HashTable *target_hash, zval **userdata, int recursive MAKE_STD_ZVAL(key); /* Set up the key */ - if (zend_hash_get_current_key_ex(target_hash, &string_key, &string_key_len, &num_key, 0, &pos) == HASH_KEY_IS_LONG) { - Z_TYPE_P(key) = IS_LONG; - Z_LVAL_P(key) = num_key; - } else { - ZVAL_STRINGL(key, string_key, string_key_len-1, 1); + switch (zend_hash_get_current_key_ex(target_hash, &string_key, &string_key_len, &num_key, 0, &pos)) { + case HASH_KEY_IS_LONG: + Z_TYPE_P(key) = IS_LONG; + Z_LVAL_P(key) = num_key; + break; + case HASH_KEY_IS_STRING: + ZVAL_STRINGL(key, string_key, string_key_len-1, 1); + break; + case HASH_KEY_IS_BINARY: + ZVAL_BINARYL(key, string_key, string_key_len-1, 1); + break; + case HASH_KEY_IS_UNICODE: + ZVAL_UNICODEL(key, string_key, string_key_len-1, 1); + break; } fci.size = sizeof(fci); @@ -1158,7 +1189,8 @@ PHP_FUNCTION(array_walk) RETURN_FALSE; } if (Z_TYPE_PP(BG(array_walk_func_name)) != IS_ARRAY && - Z_TYPE_PP(BG(array_walk_func_name)) != IS_STRING) { + Z_TYPE_PP(BG(array_walk_func_name)) != IS_STRING && + Z_TYPE_PP(BG(array_walk_func_name)) != IS_UNICODE) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Wrong syntax for function name"); BG(array_walk_func_name) = old_walk_func_name; RETURN_FALSE; @@ -1193,7 +1225,9 @@ PHP_FUNCTION(array_walk_recursive) BG(array_walk_func_name) = old_walk_func_name; RETURN_FALSE; } - if (Z_TYPE_PP(BG(array_walk_func_name)) != IS_ARRAY && Z_TYPE_PP(BG(array_walk_func_name)) != IS_STRING) { + if (Z_TYPE_PP(BG(array_walk_func_name)) != IS_ARRAY && + Z_TYPE_PP(BG(array_walk_func_name)) != IS_STRING && + Z_TYPE_PP(BG(array_walk_func_name)) != IS_UNICODE) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Wrong syntax for function name"); BG(array_walk_func_name) = old_walk_func_name; RETURN_FALSE; @@ -1254,6 +1288,12 @@ static void php_search_array(INTERNAL_FUNCTION_PARAMETERS, int behavior) case HASH_KEY_IS_STRING: RETURN_STRINGL(string_key, str_key_len-1, 1); break; + case HASH_KEY_IS_BINARY: + RETURN_BINARYL(string_key, str_key_len-1, 1); + break; + case HASH_KEY_IS_UNICODE: + RETURN_UNICODEL(string_key, str_key_len-1, 1); + break; case HASH_KEY_IS_LONG: RETURN_LONG(num_key); break; @@ -1376,9 +1416,15 @@ PHP_FUNCTION(extract) key_type = zend_hash_get_current_key_ex(Z_ARRVAL_PP(var_array), &var_name, &var_name_len, &num_key, 0, &pos); var_exists = 0; - if (key_type == HASH_KEY_IS_STRING) { + if (key_type == HASH_KEY_IS_STRING || + key_type == HASH_KEY_IS_UNICODE) { + if (key_type == HASH_KEY_IS_STRING) { + key_type = IS_STRING; + } else { + key_type = IS_UNICODE; + } var_name_len--; - var_exists = zend_hash_exists(EG(active_symbol_table), var_name, var_name_len + 1); + var_exists = zend_u_hash_exists(EG(active_symbol_table), key_type, var_name, var_name_len + 1); } else if (extract_type == EXTR_PREFIX_ALL || extract_type == EXTR_PREFIX_INVALID) { smart_str_appendl(&final_name, Z_STRVAL_PP(prefix), Z_STRLEN_PP(prefix)); smart_str_appendc(&final_name, '_'); @@ -1484,17 +1530,19 @@ static void php_compact_var(HashTable *eg_active_symbol_table, zval *return_valu { zval **value_ptr, *value, *data; - if (Z_TYPE_P(entry) == IS_STRING) { - if (zend_hash_find(eg_active_symbol_table, Z_STRVAL_P(entry), - Z_STRLEN_P(entry)+1, (void **)&value_ptr) != FAILURE) { + if (Z_TYPE_P(entry) == IS_STRING || + Z_TYPE_P(entry) == IS_BINARY || + Z_TYPE_P(entry) == IS_UNICODE) { + if (zend_u_hash_find(eg_active_symbol_table, Z_TYPE_P(entry), Z_UNIVAL_P(entry), + Z_UNILEN_P(entry)+1, (void **)&value_ptr) != FAILURE) { value = *value_ptr; ALLOC_ZVAL(data); *data = *value; zval_copy_ctor(data); INIT_PZVAL(data); - zend_hash_update(Z_ARRVAL_P(return_value), Z_STRVAL_P(entry), - Z_STRLEN_P(entry)+1, &data, sizeof(zval *), NULL); + zend_u_hash_update(Z_ARRVAL_P(return_value), Z_TYPE_P(entry), Z_UNIVAL_P(entry), + Z_UNILEN_P(entry)+1, &data, sizeof(zval *), NULL); } } else if (Z_TYPE_P(entry) == IS_ARRAY) { @@ -1548,6 +1596,7 @@ PHP_FUNCTION(array_fill) switch (Z_TYPE_PP(start_key)) { case IS_STRING: + case IS_UNICODE: case IS_LONG: case IS_DOUBLE: /* allocate an array for return */ @@ -1839,10 +1888,11 @@ HashTable* php_splice(HashTable *in_hash, int offset, int length, entry->refcount++; /* Update output hash depending on key type */ - if (p->nKeyLength) - zend_hash_update(out_hash, p->arKey, p->nKeyLength, &entry, sizeof(zval *), NULL); - else + if (p->nKeyLength == 0) { zend_hash_next_index_insert(out_hash, &entry, sizeof(zval *), NULL); + } else { + zend_u_hash_update(out_hash, p->key.type, &p->key.u, p->nKeyLength, &entry, sizeof(zval *), NULL); + } } /* If hash for removed entries exists, go until offset+length and copy the entries to it */ @@ -1850,10 +1900,11 @@ HashTable* php_splice(HashTable *in_hash, int offset, int length, for ( ; pos<offset+length && p; pos++, p=p->pListNext) { entry = *((zval **)p->pData); entry->refcount++; - if (p->nKeyLength) - zend_hash_update(*removed, p->arKey, p->nKeyLength, &entry, sizeof(zval *), NULL); - else + if (p->nKeyLength == 0) { zend_hash_next_index_insert(*removed, &entry, sizeof(zval *), NULL); + } else { + zend_u_hash_update(*removed, p->key.type, &p->key.u, p->nKeyLength, &entry, sizeof(zval *), NULL); + } } } else /* otherwise just skip those entries */ for ( ; pos<offset+length && p; pos++, p=p->pListNext); @@ -1882,10 +1933,11 @@ HashTable* php_splice(HashTable *in_hash, int offset, int length, for ( ; p ; p=p->pListNext) { entry = *((zval **)p->pData); entry->refcount++; - if (p->nKeyLength) - zend_hash_update(out_hash, p->arKey, p->nKeyLength, &entry, sizeof(zval *), NULL); - else + if (p->nKeyLength == 0) { zend_hash_next_index_insert(out_hash, &entry, sizeof(zval *), NULL); + } else { + zend_u_hash_update(out_hash, p->key.type, &p->key.u, p->nKeyLength, &entry, sizeof(zval *), NULL); + } } zend_hash_internal_pointer_reset(out_hash); @@ -1948,6 +2000,7 @@ static void _phpi_pop(INTERNAL_FUNCTION_PARAMETERS, int off_the_end) char *key = NULL; int key_len = 0; ulong index; + zend_uchar key_type; /* Get the arguments and do error-checking */ if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &stack) == FAILURE) { @@ -1972,9 +2025,12 @@ static void _phpi_pop(INTERNAL_FUNCTION_PARAMETERS, int off_the_end) RETVAL_ZVAL(*val, 1, 0); /* Delete the first or last value */ - zend_hash_get_current_key_ex(Z_ARRVAL_PP(stack), &key, &key_len, &index, 0, NULL); + key_type = zend_hash_get_current_key_ex(Z_ARRVAL_PP(stack), &key, &key_len, &index, 0, NULL); if (key && Z_ARRVAL_PP(stack) == &EG(symbol_table)) { - zend_delete_global_variable(key, key_len-1 TSRMLS_CC); + if (key_type == HASH_KEY_IS_UNICODE) key_type = IS_UNICODE; + else if (key_type == HASH_KEY_IS_STRING) key_type = IS_STRING; + else key_type = IS_BINARY; + zend_u_delete_global_variable(key_type, key, key_len-1 TSRMLS_CC); } else { zend_hash_del_key_or_index(Z_ARRVAL_PP(stack), key, key_len, index, (key) ? HASH_DEL_KEY : HASH_DEL_INDEX); } @@ -2235,12 +2291,21 @@ PHP_FUNCTION(array_slice) /* Copy elements from input array to the one that's returned */ while (pos < offset_val+length_val && zend_hash_get_current_data_ex(Z_ARRVAL_PP(input), (void **)&entry, &hpos) == SUCCESS) { + zend_uchar utype; (*entry)->refcount++; switch (zend_hash_get_current_key_ex(Z_ARRVAL_PP(input), &string_key, &string_key_len, &num_key, 0, &hpos)) { case HASH_KEY_IS_STRING: - zend_hash_update(Z_ARRVAL_P(return_value), string_key, string_key_len, + utype = IS_STRING; + goto ukey; + case HASH_KEY_IS_BINARY: + utype = IS_BINARY; + goto ukey; + case HASH_KEY_IS_UNICODE: + utype = IS_UNICODE; +ukey: + zend_u_hash_update(Z_ARRVAL_P(return_value), utype, string_key, string_key_len, entry, sizeof(zval *), NULL); break; @@ -2270,10 +2335,20 @@ PHPAPI int php_array_merge(HashTable *dest, HashTable *src, int recursive TSRMLS zend_hash_internal_pointer_reset_ex(src, &pos); while (zend_hash_get_current_data_ex(src, (void **)&src_entry, &pos) == SUCCESS) { + zend_uchar utype; + switch (zend_hash_get_current_key_ex(src, &string_key, &string_key_len, &num_key, 0, &pos)) { case HASH_KEY_IS_STRING: + utype = IS_STRING; + goto ukey; + case HASH_KEY_IS_BINARY: + utype = IS_BINARY; + goto ukey; + case HASH_KEY_IS_UNICODE: + utype = IS_UNICODE; +ukey: if (recursive && - zend_hash_find(dest, string_key, string_key_len, (void **)&dest_entry) == SUCCESS) { + zend_u_hash_find(dest, utype, string_key, string_key_len, (void **)&dest_entry) == SUCCESS) { if (*src_entry == *dest_entry && ((*dest_entry)->refcount % 2)) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "recursion detected"); return 0; @@ -2289,7 +2364,7 @@ PHPAPI int php_array_merge(HashTable *dest, HashTable *src, int recursive TSRMLS } else { (*src_entry)->refcount++; - zend_hash_update(dest, string_key, strlen(string_key)+1, + zend_u_hash_update(dest, utype, string_key, string_key_len+1, src_entry, sizeof(zval *), NULL); } break; @@ -2419,9 +2494,14 @@ PHP_FUNCTION(array_keys) switch (zend_hash_get_current_key_ex(Z_ARRVAL_PP(input), &string_key, &string_key_len, &num_key, 1, &pos)) { case HASH_KEY_IS_STRING: - Z_TYPE_P(new_val) = IS_STRING; - Z_STRVAL_P(new_val) = string_key; - Z_STRLEN_P(new_val) = string_key_len-1; + ZVAL_STRINGL(new_val, string_key, string_key_len-1, 0); + goto ukey; + case HASH_KEY_IS_BINARY: + ZVAL_BINARYL(new_val, string_key, string_key_len-1, 0); + goto ukey; + case HASH_KEY_IS_UNICODE: + ZVAL_UNICODEL(new_val, string_key, string_key_len-1, 0); +ukey: zend_hash_next_index_insert(Z_ARRVAL_P(return_value), &new_val, sizeof(zval *), NULL); break; @@ -2515,9 +2595,12 @@ PHP_FUNCTION(array_count_values) } else { Z_LVAL_PP(tmp)++; } - } else if (Z_TYPE_PP(entry) == IS_STRING) { + } else if (Z_TYPE_PP(entry) == IS_STRING || + Z_TYPE_PP(entry) == IS_BINARY || + Z_TYPE_PP(entry) == IS_UNICODE) { /* make sure our array does not end up with numeric string keys */ - if (is_numeric_string(Z_STRVAL_PP(entry), Z_STRLEN_PP(entry), NULL, NULL, 0) == IS_LONG) { + if ((Z_TYPE_PP(entry) == IS_STRING && is_numeric_string(Z_STRVAL_PP(entry), Z_STRLEN_PP(entry), NULL, NULL, 0) == IS_LONG) || + (Z_TYPE_PP(entry) == IS_UNICODE && is_numeric_unicode(Z_STRVAL_PP(entry), Z_STRLEN_PP(entry), NULL, NULL, 0) == IS_LONG)) { zval tmp_entry; tmp_entry = **entry; @@ -2542,12 +2625,12 @@ PHP_FUNCTION(array_count_values) continue; } - if (zend_hash_find(Z_ARRVAL_P(return_value), Z_STRVAL_PP(entry), Z_STRLEN_PP(entry)+1, (void**)&tmp) == FAILURE) { + if (zend_u_hash_find(Z_ARRVAL_P(return_value), Z_TYPE_PP(entry), Z_UNIVAL_PP(entry), Z_UNILEN_PP(entry)+1, (void**)&tmp) == FAILURE) { zval *data; MAKE_STD_ZVAL(data); Z_TYPE_P(data) = IS_LONG; Z_LVAL_P(data) = 1; - zend_hash_update(Z_ARRVAL_P(return_value), Z_STRVAL_PP(entry), Z_STRLEN_PP(entry) + 1, &data, sizeof(data), NULL); + zend_u_hash_update(Z_ARRVAL_P(return_value), Z_TYPE_PP(entry), Z_UNIVAL_PP(entry), Z_UNILEN_PP(entry) + 1, &data, sizeof(data), NULL); } else { Z_LVAL_PP(tmp)++; } @@ -2594,11 +2677,21 @@ PHP_FUNCTION(array_reverse) zend_hash_internal_pointer_end_ex(Z_ARRVAL_PP(input), &pos); while (zend_hash_get_current_data_ex(Z_ARRVAL_PP(input), (void **)&entry, &pos) == SUCCESS) { + zend_uchar utype; + (*entry)->refcount++; switch (zend_hash_get_current_key_ex(Z_ARRVAL_PP(input), &string_key, &string_key_len, &num_key, 0, &pos)) { case HASH_KEY_IS_STRING: - zend_hash_update(Z_ARRVAL_P(return_value), string_key, string_key_len, entry, sizeof(zval *), NULL); + utype = IS_STRING; + goto ukey; + case HASH_KEY_IS_BINARY: + utype = IS_BINARY; + goto ukey; + case HASH_KEY_IS_UNICODE: + utype = IS_UNICODE; +ukey: + zend_u_hash_update(Z_ARRVAL_P(return_value), utype, string_key, string_key_len, entry, sizeof(zval *), NULL); break; case HASH_KEY_IS_LONG: @@ -2715,9 +2808,13 @@ PHP_FUNCTION(array_flip) MAKE_STD_ZVAL(data); switch (zend_hash_get_current_key_ex(target_hash, &string_key, &str_key_len, &num_key, 1, &pos)) { case HASH_KEY_IS_STRING: - Z_STRVAL_P(data) = string_key; - Z_STRLEN_P(data) = str_key_len-1; - Z_TYPE_P(data) = IS_STRING; + ZVAL_STRINGL(data, string_key, str_key_len-1, 0); + break; + case HASH_KEY_IS_BINARY: + ZVAL_BINARYL(data, string_key, str_key_len-1, 0); + break; + case HASH_KEY_IS_UNICODE: + ZVAL_UNICODEL(data, string_key, str_key_len-1, 0); break; case HASH_KEY_IS_LONG: Z_TYPE_P(data) = IS_LONG; @@ -2727,8 +2824,10 @@ PHP_FUNCTION(array_flip) if (Z_TYPE_PP(entry) == IS_LONG) { zend_hash_index_update(Z_ARRVAL_P(return_value), Z_LVAL_PP(entry), &data, sizeof(data), NULL); - } else if (Z_TYPE_PP(entry) == IS_STRING) { - zend_symtable_update(Z_ARRVAL_P(return_value), Z_STRVAL_PP(entry), Z_STRLEN_PP(entry) + 1, &data, sizeof(data), NULL); + } else if (Z_TYPE_PP(entry) == IS_STRING || + Z_TYPE_PP(entry) == IS_BINARY || + Z_TYPE_PP(entry) == IS_UNICODE) { + zend_u_symtable_update(Z_ARRVAL_P(return_value), Z_TYPE_PP(entry), Z_UNIVAL_PP(entry), Z_UNILEN_PP(entry) + 1, &data, sizeof(data), NULL); } else { zval_ptr_dtor(&data); /* will free also zval structure */ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Can only flip STRING and INTEGER values!"); @@ -2739,6 +2838,7 @@ PHP_FUNCTION(array_flip) } /* }}} */ + /* {{{ proto array array_change_key_case(array input [, int case=CASE_LOWER]) Retuns an array with all string keys lowercased [or uppercased] */ PHP_FUNCTION(array_change_key_case) @@ -2783,7 +2883,18 @@ PHP_FUNCTION(array_change_key_case) php_strtoupper(new_key, str_key_len - 1); else php_strtolower(new_key, str_key_len - 1); - zend_hash_update(Z_ARRVAL_P(return_value), new_key, str_key_len, entry, sizeof(entry), NULL); + zend_u_hash_update(Z_ARRVAL_P(return_value), IS_STRING, new_key, str_key_len, entry, sizeof(entry), NULL); + efree(new_key); + break; + case HASH_KEY_IS_UNICODE: + new_key=eustrndup(string_key,str_key_len - 1); + str_key_len--; + if (change_to_upper) + new_key = php_u_strtoupper(&new_key, &str_key_len, UG(default_locale)); + else + new_key = php_u_strtolower(&new_key, &str_key_len, UG(default_locale)); + str_key_len++; + zend_u_hash_update(Z_ARRVAL_P(return_value), IS_UNICODE, new_key, str_key_len, entry, sizeof(entry), NULL); efree(new_key); break; } @@ -2848,14 +2959,14 @@ PHP_FUNCTION(array_unique) } else { p = cmpdata->b; } - if (p->nKeyLength) { + if (p->nKeyLength == 0) { + zend_hash_index_del(Z_ARRVAL_P(return_value), p->h); + } else { if (Z_ARRVAL_P(return_value) == &EG(symbol_table)) { - zend_delete_global_variable(p->arKey, p->nKeyLength-1 TSRMLS_CC); + zend_u_delete_global_variable(p->key.type, &p->key.u, p->nKeyLength-1 TSRMLS_CC); } else { - zend_hash_del(Z_ARRVAL_P(return_value), p->arKey, p->nKeyLength); + zend_u_hash_del(Z_ARRVAL_P(return_value), p->key.type, &p->key.u, p->nKeyLength); } - } else { - zend_hash_index_del(Z_ARRVAL_P(return_value), p->h); } } } @@ -3109,10 +3220,10 @@ static void php_array_intersect(INTERNAL_FUNCTION_PARAMETERS, int behavior, int if (!p) { goto out; } - if (p->nKeyLength) { - zend_hash_del(Z_ARRVAL_P(return_value), p->arKey, p->nKeyLength); - } else { + if (p->nKeyLength == 0) { zend_hash_index_del(Z_ARRVAL_P(return_value), p->h); + } else { + zend_u_hash_del(Z_ARRVAL_P(return_value), p->key.type, &p->key.u, p->nKeyLength); } } } @@ -3125,10 +3236,10 @@ static void php_array_intersect(INTERNAL_FUNCTION_PARAMETERS, int behavior, int /* with value < value of ptrs[i] */ for (;;) { p = *ptrs[0]; - if (p->nKeyLength) { - zend_hash_del(Z_ARRVAL_P(return_value), p->arKey, p->nKeyLength); - } else { + if (p->nKeyLength == 0) { zend_hash_index_del(Z_ARRVAL_P(return_value), p->h); + } else { + zend_u_hash_del(Z_ARRVAL_P(return_value), p->key.type, &p->key.u, p->nKeyLength); } if (!*++ptrs[0]) { goto out; @@ -3508,10 +3619,10 @@ static void php_array_diff(INTERNAL_FUNCTION_PARAMETERS, int behavior, int data_ /* delete all entries with value as ptrs[0] */ for (;;) { p = *ptrs[0]; - if (p->nKeyLength) { - zend_hash_del(Z_ARRVAL_P(return_value), p->arKey, p->nKeyLength); - } else { + if (p->nKeyLength == 0) { zend_hash_index_del(Z_ARRVAL_P(return_value), p->h); + } else { + zend_u_hash_del(Z_ARRVAL_P(return_value), p->key.type, &p->key.u, p->nKeyLength); } if (!*++ptrs[0]) { goto out; @@ -3902,6 +4013,10 @@ PHP_FUNCTION(array_rand) if (Z_TYPE_P(return_value) != IS_ARRAY) { if (key_type == HASH_KEY_IS_STRING) { RETURN_STRINGL(string_key, string_key_len-1, 1); + } else if (key_type == HASH_KEY_IS_BINARY) { + RETURN_BINARYL(string_key, string_key_len-1, 1); + } else if (key_type == HASH_KEY_IS_UNICODE) { + RETURN_UNICODEL(string_key, string_key_len-1, 1); } else { RETURN_LONG(num_key); } @@ -3909,6 +4024,10 @@ PHP_FUNCTION(array_rand) /* Append the result to the return value. */ if (key_type == HASH_KEY_IS_STRING) add_next_index_stringl(return_value, string_key, string_key_len-1, 1); + else if (key_type == HASH_KEY_IS_BINARY) + add_next_index_binaryl(return_value, string_key, string_key_len-1, 1); + else if (key_type == HASH_KEY_IS_UNICODE) + add_next_index_unicodel(return_value, string_key, string_key_len-1, 1); else add_next_index_long(return_value, num_key); } @@ -4154,6 +4273,7 @@ PHP_FUNCTION(array_filter) for (zend_hash_internal_pointer_reset_ex(Z_ARRVAL_PP(input), &pos); zend_hash_get_current_data_ex(Z_ARRVAL_PP(input), (void **)&operand, &pos) == SUCCESS; zend_hash_move_forward_ex(Z_ARRVAL_PP(input), &pos)) { + zend_uchar utype; if (callback) { zend_fcall_info fci; @@ -4188,7 +4308,15 @@ PHP_FUNCTION(array_filter) zval_add_ref(operand); switch (zend_hash_get_current_key_ex(Z_ARRVAL_PP(input), &string_key, &string_key_len, &num_key, 0, &pos)) { case HASH_KEY_IS_STRING: - zend_hash_update(Z_ARRVAL_P(return_value), string_key, string_key_len, operand, sizeof(zval *), NULL); + utype = IS_STRING; + goto ukey; + case HASH_KEY_IS_BINARY: + utype = IS_BINARY; + goto ukey; + case HASH_KEY_IS_UNICODE: + utype = IS_UNICODE; +ukey: + zend_u_hash_update(Z_ARRVAL_P(return_value), utype, string_key, string_key_len, operand, sizeof(zval *), NULL); break; case HASH_KEY_IS_LONG: @@ -4346,10 +4474,22 @@ PHP_FUNCTION(array_map) if (ZEND_NUM_ARGS() > 2) { add_next_index_zval(return_value, result); } else { - if (key_type == HASH_KEY_IS_STRING) { - add_assoc_zval_ex(return_value, str_key, str_key_len, result); - } else { - add_index_zval(return_value, num_key, result); + zend_uchar utype; + + switch (key_type) { + case HASH_KEY_IS_STRING: + utype = IS_STRING; + goto ukey; + case HASH_KEY_IS_BINARY: + utype = IS_BINARY; + goto ukey; + case HASH_KEY_IS_UNICODE: + utype = IS_UNICODE; +ukey: + add_u_assoc_zval_ex(return_value, utype, str_key, str_key_len, result); + break; + default: + add_index_zval(return_value, num_key, result); } } } @@ -4382,7 +4522,9 @@ PHP_FUNCTION(array_key_exists) switch (Z_TYPE_PP(key)) { case IS_STRING: - if (zend_symtable_exists(HASH_OF(*array), Z_STRVAL_PP(key), Z_STRLEN_PP(key)+1)) { + case IS_UNICODE: + case IS_BINARY: + if (zend_u_symtable_exists(HASH_OF(*array), Z_TYPE_PP(key), Z_UNIVAL_PP(key), Z_UNILEN_PP(key)+1)) { RETURN_TRUE; } RETURN_FALSE; @@ -4444,12 +4586,25 @@ PHP_FUNCTION(array_chunk) zval_add_ref(entry); if (preserve_keys) { + zend_uchar utype; + key_type = zend_hash_get_current_key_ex(Z_ARRVAL_P(input), &str_key, &str_key_len, &num_key, 0, &pos); - if (key_type == HASH_KEY_IS_STRING) { - add_assoc_zval_ex(chunk, str_key, str_key_len, *entry); - } else { - add_index_zval(chunk, num_key, *entry); + switch (key_type) { + case HASH_KEY_IS_STRING: + utype = IS_STRING; + goto ukey; + case HASH_KEY_IS_BINARY: + utype = IS_BINARY; + goto ukey; + case HASH_KEY_IS_UNICODE: + utype = IS_UNICODE; +ukey: + add_u_assoc_zval_ex(chunk, utype, str_key, str_key_len, *entry); + break; + default: + add_index_zval(chunk, num_key, *entry); + break; } } else { add_next_index_zval(chunk, *entry); diff --git a/ext/standard/assert.c b/ext/standard/assert.c index b8e42ed6df..7100c5c347 100644 --- a/ext/standard/assert.c +++ b/ext/standard/assert.c @@ -137,11 +137,21 @@ PHP_FUNCTION(assert) WRONG_PARAM_COUNT; } - if (Z_TYPE_PP(assertion) == IS_STRING) { + if (Z_TYPE_PP(assertion) == IS_STRING || Z_TYPE_PP(assertion) == IS_UNICODE) { zval retval; + zval tmp; int old_error_reporting = 0; /* shut up gcc! */ - - myeval = Z_STRVAL_PP(assertion); + int free_tmp = 0; + + if (Z_TYPE_PP(assertion) == IS_UNICODE) { + tmp = **assertion; + zval_copy_ctor(&tmp); + convert_to_string(&tmp); + myeval = Z_STRVAL(tmp); + free_tmp = 1; + } else { + myeval = Z_STRVAL_PP(assertion); + } if (ASSERTG(quiet_eval)) { old_error_reporting = EG(error_reporting); @@ -154,6 +164,9 @@ PHP_FUNCTION(assert) php_error_docref(NULL TSRMLS_CC, E_ERROR, "Failure evaluating code:\n%s", myeval); /* php_error_docref() does not return in this case. */ } + if (free_tmp) { + zval_dtor(&tmp); + } efree(compiled_string_description); if (ASSERTG(quiet_eval)) { @@ -184,7 +197,11 @@ PHP_FUNCTION(assert) ZVAL_STRING(args[0], SAFE_STRING(filename), 1); ZVAL_LONG (args[1], lineno); - ZVAL_STRING(args[2], SAFE_STRING(myeval), 1); + if (Z_TYPE_PP(assertion) == IS_UNICODE) { + ZVAL_UNICODEL(args[2], Z_USTRVAL_PP(assertion), Z_USTRLEN_PP(assertion), 1); + } else { + ZVAL_STRING(args[2], SAFE_STRING(myeval), 1); + } MAKE_STD_ZVAL(retval); ZVAL_FALSE(retval); diff --git a/ext/standard/basic_functions.c b/ext/standard/basic_functions.c index 8ebc94371e..966177b710 100644 --- a/ext/standard/basic_functions.c +++ b/ext/standard/basic_functions.c @@ -457,6 +457,7 @@ function_entry basic_functions[] = { PHP_FE(unserialize, NULL) PHP_FE(var_dump, NULL) + PHP_FE(var_inspect, NULL) PHP_FE(var_export, NULL) PHP_FE(debug_zval_dump, NULL) PHP_FE(print_r, NULL) @@ -2039,7 +2040,9 @@ PHP_FUNCTION(call_user_func) RETURN_FALSE; } - if (Z_TYPE_PP(params[0]) != IS_STRING && Z_TYPE_PP(params[0]) != IS_ARRAY) { + if (Z_TYPE_PP(params[0]) != IS_STRING && + Z_TYPE_PP(params[0]) != IS_UNICODE && + Z_TYPE_PP(params[0]) != IS_ARRAY) { SEPARATE_ZVAL(params[0]); convert_to_string_ex(params[0]); } @@ -2094,7 +2097,9 @@ PHP_FUNCTION(call_user_func_array) SEPARATE_ZVAL(params); convert_to_array_ex(params); - if (Z_TYPE_PP(func) != IS_STRING && Z_TYPE_PP(func) != IS_ARRAY) { + if (Z_TYPE_PP(func) != IS_STRING && + Z_TYPE_PP(func) != IS_UNICODE && + Z_TYPE_PP(func) != IS_ARRAY) { SEPARATE_ZVAL(func); convert_to_string_ex(func); } @@ -2157,7 +2162,9 @@ PHP_FUNCTION(call_user_method) efree(params); RETURN_FALSE; } - if (Z_TYPE_PP(params[1]) != IS_OBJECT && Z_TYPE_PP(params[1]) != IS_STRING) { + if (Z_TYPE_PP(params[1]) != IS_OBJECT && + Z_TYPE_PP(params[1]) != IS_STRING && + Z_TYPE_PP(params[1]) != IS_UNICODE) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Second argument is not an object or class name"); efree(params); RETURN_FALSE; @@ -2189,7 +2196,9 @@ PHP_FUNCTION(call_user_method_array) WRONG_PARAM_COUNT; } - if (Z_TYPE_PP(obj) != IS_OBJECT && Z_TYPE_PP(obj) != IS_STRING) { + if (Z_TYPE_PP(obj) != IS_OBJECT && + Z_TYPE_PP(obj) != IS_STRING && + Z_TYPE_PP(obj) != IS_UNICODE) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Second argument is not an object or class name"); RETURN_FALSE; } @@ -2290,7 +2299,7 @@ static void user_tick_function_call(user_tick_function_entry *tick_fe TSRMLS_DC) && zend_hash_index_find(Z_ARRVAL_P(function), 1, (void **) &method) == SUCCESS && Z_TYPE_PP(obj) == IS_OBJECT && Z_TYPE_PP(method) == IS_STRING ) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call %s::%s() - function does not exist", Z_OBJCE_PP(obj)->name, Z_STRVAL_PP(method)); + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call %v::%R() - function does not exist", Z_OBJCE_PP(obj)->name, Z_TYPE_PP(method), Z_UNIVAL_PP(method)); } else { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call tick function"); } @@ -2550,7 +2559,9 @@ static int php_ini_get_option(zend_ini_entry *ini_entry, int num_args, va_list a return 0; } - if (hash_key->nKeyLength == 0 || hash_key->arKey[0] != 0) { + if (hash_key->nKeyLength == 0 || + hash_key->type != IS_STRING || + hash_key->u.string[0] != 0) { MAKE_STD_ZVAL(option); array_init(option); @@ -3230,7 +3241,7 @@ static int copy_request_variable(void *pDest, int num_args, va_list args, zend_h new_key = (char *) emalloc(new_key_len); memcpy(new_key, prefix, prefix_len); - memcpy(new_key+prefix_len, hash_key->arKey, hash_key->nKeyLength); + memcpy(new_key+prefix_len, hash_key->u.string, hash_key->nKeyLength); zend_delete_global_variable(new_key, new_key_len-1 TSRMLS_CC); ZEND_SET_SYMBOL_WITH_LENGTH(&EG(symbol_table), new_key, new_key_len, *var, (*var)->refcount+1, 0); diff --git a/ext/standard/credits.c b/ext/standard/credits.c index bdae216b78..143593d7df 100644 --- a/ext/standard/credits.c +++ b/ext/standard/credits.c @@ -62,6 +62,14 @@ PHPAPI void php_print_credits(int flag TSRMLS_DC) CREDIT_LINE("Streams Abstraction Layer", "Wez Furlong, Sara Golemon"); CREDIT_LINE("PHP Data Objects Layer", "Wez Furlong, Marcus Boerger, Sterling Hughes, George Schlossnagle"); php_info_print_table_end(); + + /* Unicode support */ + php_info_print_table_start(); + php_info_print_table_colspan_header(2, "Unicode Support"); + php_info_print_table_header(2, "Contribution", "Authors"); + CREDIT_LINE("Design and Architecture", "Andrei Zmievski"); + CREDIT_LINE("Zend Engine implementation", "Andrei Zmievski, Dmitry Stogov"); + php_info_print_table_end(); } if (flag & PHP_CREDITS_SAPI) { diff --git a/ext/standard/file.c b/ext/standard/file.c index d5db7323fb..009c9e6bd5 100644 --- a/ext/standard/file.c +++ b/ext/standard/file.c @@ -283,7 +283,9 @@ PHP_MINIT_FUNCTION(file) REGISTER_LONG_CONSTANT("FILE_SKIP_EMPTY_LINES", PHP_FILE_SKIP_EMPTY_LINES, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("FILE_APPEND", PHP_FILE_APPEND, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("FILE_NO_DEFAULT_CONTEXT", PHP_FILE_NO_DEFAULT_CONTEXT, CONST_CS | CONST_PERSISTENT); - + REGISTER_LONG_CONSTANT("FILE_TEXT", PHP_FILE_TEXT, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("FILE_BINARY", PHP_FILE_BINARY, CONST_CS | CONST_PERSISTENT); + #ifdef HAVE_FNMATCH REGISTER_LONG_CONSTANT("FNM_NOESCAPE", FNM_NOESCAPE, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("FNM_PATHNAME", FNM_PATHNAME, CONST_CS | CONST_PERSISTENT); @@ -506,6 +508,7 @@ PHP_FUNCTION(get_meta_tags) /* {{{ proto string file_get_contents(string filename [, bool use_include_path [, resource context [, long offset [, long maxlen]]]]) Read the entire file into a string */ +/* UTODO: Accept unicode contents -- Maybe? Perhaps a binary fetch leaving the script to icu_ucnv_toUnicode() on its own is best? */ PHP_FUNCTION(file_get_contents) { char *filename; @@ -571,6 +574,7 @@ PHP_FUNCTION(file_put_contents) long flags = 0; zval *zcontext = NULL; php_stream_context *context = NULL; + char mode[3] = { 'w', 0, 0 }; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sz/|lr!", &filename, &filename_len, &data, &flags, &zcontext) == FAILURE) { @@ -579,7 +583,15 @@ PHP_FUNCTION(file_put_contents) context = php_stream_context_from_zval(zcontext, flags & PHP_FILE_NO_DEFAULT_CONTEXT); - stream = php_stream_open_wrapper_ex(filename, (flags & PHP_FILE_APPEND) ? "ab" : "wb", + if (flags & PHP_FILE_APPEND) { + mode[0] = 'a'; + } + if (flags & PHP_FILE_BINARY) { + mode[1] = 'b'; + } else if (flags & PHP_FILE_TEXT) { + mode[1] = 't'; + } + stream = php_stream_open_wrapper_ex(filename, mode, ((flags & PHP_FILE_USE_INCLUDE_PATH) ? USE_PATH : 0) | ENFORCE_SAFE_MODE | REPORT_ERRORS, NULL, context); if (stream == NULL) { RETURN_FALSE; @@ -599,23 +611,6 @@ PHP_FUNCTION(file_put_contents) break; } - case IS_NULL: - case IS_LONG: - case IS_DOUBLE: - case IS_BOOL: - case IS_CONSTANT: - convert_to_string_ex(&data); - - case IS_STRING: - if (Z_STRLEN_P(data)) { - numbytes = php_stream_write(stream, Z_STRVAL_P(data), Z_STRLEN_P(data)); - if (numbytes != Z_STRLEN_P(data)) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Only %d of %d bytes written, possibly out of free disk space", numbytes, Z_STRLEN_P(data)); - numbytes = -1; - } - } - break; - case IS_ARRAY: if (zend_hash_num_elements(Z_ARRVAL_P(data))) { int bytes_written; @@ -624,31 +619,88 @@ PHP_FUNCTION(file_put_contents) zend_hash_internal_pointer_reset_ex(Z_ARRVAL_P(data), &pos); while (zend_hash_get_current_data_ex(Z_ARRVAL_P(data), (void **) &tmp, &pos) == SUCCESS) { - if ((*tmp)->type != IS_STRING) { - SEPARATE_ZVAL(tmp); - convert_to_string(*tmp); - } - if (Z_STRLEN_PP(tmp)) { - numbytes += Z_STRLEN_PP(tmp); - bytes_written = php_stream_write(stream, Z_STRVAL_PP(tmp), Z_STRLEN_PP(tmp)); - if (bytes_written < 0 || bytes_written != Z_STRLEN_PP(tmp)) { - if (bytes_written < 0) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to write %d bytes to %s", Z_STRLEN_PP(tmp), filename); - } else { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Only %d of %d bytes written, possibly out of free disk space", bytes_written, Z_STRLEN_PP(tmp)); - } + if (Z_TYPE_PP(tmp) == IS_UNICODE) { + int wrote_bytes = php_stream_u_write(stream, Z_USTRVAL_PP(tmp), Z_USTRLEN_PP(tmp)); + if (wrote_bytes < 0) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to write %d characters to %s", Z_USTRLEN_PP(tmp), filename); + numbytes = -1; + break; + } else if (wrote_bytes != UBYTES(Z_USTRLEN_PP(tmp))) { + int32_t ustrlen = u_countChar32(Z_USTRVAL_PP(tmp), Z_USTRLEN_PP(tmp)); + int32_t numchars = u_countChar32(Z_USTRVAL_PP(tmp), wrote_bytes / UBYTES(1)); + + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Only %d of %d characters written, possibly out of free disk space", numchars, ustrlen); numbytes = -1; break; } + numbytes += wrote_bytes; + } else { /* non-unicode */ + int free_val = 0; + zval strval = **tmp; + + if (Z_TYPE(strval) != IS_STRING) { + zval_copy_ctor(&strval); + convert_to_string(&strval); + free_val = 1; + } + if (Z_STRLEN(strval)) { + numbytes += Z_STRLEN(strval); + bytes_written = php_stream_write(stream, Z_STRVAL(strval), Z_STRLEN(strval)); + if (bytes_written < 0 || bytes_written != Z_STRLEN(strval)) { + if (bytes_written < 0) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to write %d bytes to %s", Z_STRLEN(strval), filename); + } else { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Only %d of %d bytes written, possibly out of free disk space", bytes_written, Z_STRLEN(strval)); + } + numbytes = -1; + break; + } + } + if (free_val) { + zval_dtor(&strval); + } } zend_hash_move_forward_ex(Z_ARRVAL_P(data), &pos); } } break; + case IS_OBJECT: + /* TODO */ + php_error_docref(NULL TSRMLS_CC, E_WARNING, "2nd parameter must be non-object (for now)"); + numbytes = -1; + break; + case IS_UNICODE: + if (Z_USTRLEN_P(data)) { + numbytes = php_stream_u_write(stream, Z_USTRVAL_P(data), Z_USTRLEN_P(data)); + if (numbytes < 0) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed to write %d characters to %s", Z_USTRLEN_P(data), filename); + numbytes = -1; + } else if (numbytes != UBYTES(Z_USTRLEN_P(data))) { + int32_t ustrlen = u_countChar32(Z_USTRVAL_P(data), Z_USTRLEN_P(data)); + int32_t numchars = u_countChar32(Z_USTRVAL_P(data), numbytes / UBYTES(1)); + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Only %d of %d characters written, possibly out of free disk space", numchars, ustrlen); + numbytes = -1; + } + } + break; + case IS_NULL: + case IS_LONG: + case IS_DOUBLE: + case IS_BOOL: + case IS_CONSTANT: + case IS_STRING: default: - php_error_docref(NULL TSRMLS_CC, E_WARNING, "The 2nd parameter should be either a string or an array"); - numbytes = -1; + if (Z_TYPE_P(data) != IS_STRING) { + convert_to_string_ex(&data); + } + if (Z_STRLEN_P(data)) { + numbytes = php_stream_write(stream, Z_STRVAL_P(data), Z_STRLEN_P(data)); + if (numbytes != Z_STRLEN_P(data)) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Only %d of %d bytes written, possibly out of free disk space", numbytes, Z_STRLEN_P(data)); + numbytes = -1; + } + } break; } @@ -667,6 +719,7 @@ PHP_FUNCTION(file_put_contents) #define PHP_FILE_BUF_SIZE 80 +/* UTODO: Accept unicode contents */ PHP_FUNCTION(file) { char *filename; @@ -838,7 +891,6 @@ PHP_NAMED_FUNCTION(php_if_fopen) context = php_stream_context_from_zval(zcontext, 0); stream = php_stream_open_wrapper_ex(filename, mode, (use_include_path ? USE_PATH : 0) | ENFORCE_SAFE_MODE | REPORT_ERRORS, NULL, context); - if (stream == NULL) { RETURN_FALSE; } @@ -997,57 +1049,42 @@ PHPAPI PHP_FUNCTION(feof) Get a line from file pointer */ PHPAPI PHP_FUNCTION(fgets) { - zval **arg1, **arg2; - int len = 1024; - char *buf = NULL; - int argc = ZEND_NUM_ARGS(); - size_t line_len = 0; php_stream *stream; + zval *zstream; + int argc = ZEND_NUM_ARGS(); + long length = -1; + UChar *buf = NULL; + int32_t num_chars = -1, num_bytes = -1; + int is_unicode; - if (argc<1 || argc>2 || zend_get_parameters_ex(argc, &arg1, &arg2) == FAILURE) { - WRONG_PARAM_COUNT; + if (zend_parse_parameters(argc TSRMLS_CC, "r|l", &zstream, &length) == FAILURE) { + RETURN_NULL(); } - PHP_STREAM_TO_ZVAL(stream, arg1); + php_stream_from_zval(stream, &zstream); - if (argc == 1) { - /* ask streams to give us a buffer of an appropriate size */ - buf = php_stream_get_line(stream, NULL, 0, &line_len); - if (buf == NULL) { - goto exit_failed; - } - } else if (argc > 1) { - convert_to_long_ex(arg2); - len = Z_LVAL_PP(arg2); - - if (len <= 0) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Length parameter must be greater than 0"); - RETURN_FALSE; - } + if (length > 0) { + /* Don't try to short circuit this by just using num_chars in parse_parameters, long doesn't always mean 32-bit */ + num_chars = length; + } - buf = ecalloc(len + 1, sizeof(char)); - if (php_stream_get_line(stream, buf, len, &line_len) == NULL) { - goto exit_failed; - } + if ((buf = php_stream_u_get_line(stream, NULL, &num_bytes, &num_chars, &is_unicode)) == NULL) { + RETURN_FALSE; } - - if (PG(magic_quotes_runtime)) { - Z_STRVAL_P(return_value) = php_addslashes(buf, line_len, &Z_STRLEN_P(return_value), 1 TSRMLS_CC); - Z_TYPE_P(return_value) = IS_STRING; + + if (is_unicode) { + /* UTODO: magic_quotes_runtime */ + RETURN_UNICODEL(buf, num_chars, 0); } else { - ZVAL_STRINGL(return_value, buf, line_len, 0); - /* resize buffer if it's much larger than the result. - * Only needed if the user requested a buffer size. */ - if (argc > 1 && Z_STRLEN_P(return_value) < len / 2) { - Z_STRVAL_P(return_value) = erealloc(buf, line_len + 1); - } - } - return; + if (PG(magic_quotes_runtime)) { + int len; + char *str; -exit_failed: - RETVAL_FALSE; - if (buf) { - efree(buf); + str = php_addslashes((char*)buf, num_bytes, &len, 1 TSRMLS_CC); + RETURN_STRINGL(str, len, 0); + } else { + RETURN_STRINGL((char*)buf, num_bytes, 0); + } } } /* }}} */ @@ -1057,9 +1094,10 @@ exit_failed: PHPAPI PHP_FUNCTION(fgetc) { zval **arg1; - char buf[2]; - int result; + char buf[2 * sizeof(UChar)]; + int is_unicode; php_stream *stream; + int32_t num_bytes = UBYTES(2), num_chars = 1; if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &arg1) == FAILURE) { WRONG_PARAM_COUNT; @@ -1067,21 +1105,25 @@ PHPAPI PHP_FUNCTION(fgetc) PHP_STREAM_TO_ZVAL(stream, arg1); - result = php_stream_getc(stream); - - if (result == EOF) { + if (!php_stream_u_read(stream, buf, &num_bytes, &num_chars, &is_unicode)) { RETVAL_FALSE; } else { - buf[0] = result; - buf[1] = '\0'; - - RETURN_STRINGL(buf, 1, 1); + if (is_unicode) { + UChar *ubuf = buf; + int32_t num_u16 = num_bytes >> 1; + ubuf[num_u16] = 0; + RETURN_UNICODEL(ubuf, num_u16, 0); + } else { + buf[1] = 0; + RETURN_STRINGL(buf, 1, 0); + } } } /* }}} */ /* {{{ proto string fgetss(resource fp [, int length, string allowable_tags]) Get a line from file pointer and strip HTML tags */ +/* UTODO: Accept unicode contents */ PHPAPI PHP_FUNCTION(fgetss) { zval **fd, **bytes = NULL, **allow=NULL; @@ -1150,6 +1192,7 @@ PHPAPI PHP_FUNCTION(fgetss) /* {{{ proto mixed fscanf(resource stream, string format [, string ...]) Implements a mostly ANSI compatible fscanf() */ +/* UTODO: Accept unicode contents */ PHP_FUNCTION(fscanf) { int result; @@ -1213,50 +1256,60 @@ PHP_FUNCTION(fscanf) Binary-safe file write */ PHPAPI PHP_FUNCTION(fwrite) { - zval **arg1, **arg2, **arg3=NULL; - int ret; - int num_bytes; - char *buffer = NULL; + int ret, argc = ZEND_NUM_ARGS(); + long write_len = -1; php_stream *stream; + zval *zstream, *zstring; - switch (ZEND_NUM_ARGS()) { - case 2: - if (zend_get_parameters_ex(2, &arg1, &arg2)==FAILURE) { - RETURN_FALSE; - } - convert_to_string_ex(arg2); - num_bytes = Z_STRLEN_PP(arg2); - break; - - case 3: - if (zend_get_parameters_ex(3, &arg1, &arg2, &arg3)==FAILURE) { - RETURN_FALSE; - } - convert_to_string_ex(arg2); - convert_to_long_ex(arg3); - num_bytes = MAX(0, MIN(Z_LVAL_PP(arg3), Z_STRLEN_PP(arg2))); - break; - - default: - WRONG_PARAM_COUNT; - /* NOTREACHED */ - break; + if (zend_parse_parameters(argc TSRMLS_CC, "rz|l", &zstream, &zstring, &write_len) == FAILURE) { + RETURN_NULL(); } - if (!num_bytes) { + if (!write_len) { RETURN_LONG(0); } - PHP_STREAM_TO_ZVAL(stream, arg1); + php_stream_from_zval(stream, &zstream); - if (PG(magic_quotes_runtime)) { - buffer = estrndup(Z_STRVAL_PP(arg2), num_bytes); - php_stripslashes(buffer, &num_bytes TSRMLS_CC); - } + if (Z_TYPE_P(zstring) == IS_UNICODE) { + if (write_len >= 0) { + /* Convert code units to data points */ + int32_t write_uchars = 0; - ret = php_stream_write(stream, buffer ? buffer : Z_STRVAL_PP(arg2), num_bytes); - if (buffer) { - efree(buffer); + U16_FWD_N(Z_USTRVAL_P(zstring), write_uchars, Z_USTRLEN_P(zstring), write_len); + write_len = write_uchars; + } + + if (write_len < 0 || write_len > Z_USTRLEN_P(zstring)) { + write_len = Z_USTRLEN_P(zstring); + } + + /* UTODO Handle magic_quotes_runtime for unicode strings */ + + ret = php_stream_u_write(stream, Z_USTRVAL_P(zstring), write_len); + + /* Convert data points back to code units */ + if (ret > 0) { + ret = u_countChar32(Z_USTRVAL_P(zstring), ret); + } + } else { + char *buffer = NULL; + int num_bytes; + + convert_to_string(zstring); + if (write_len < 0 || write_len > Z_STRLEN_P(zstring)) { + write_len = Z_STRLEN_P(zstring); + } + + num_bytes = write_len; + if (argc < 3 && PG(magic_quotes_runtime)) { + buffer = estrndup(Z_STRVAL_P(zstring), num_bytes); + php_stripslashes(buffer, &num_bytes TSRMLS_CC); + } + ret = php_stream_write(stream, buffer ? buffer : Z_STRVAL_P(zstring), num_bytes); + if (buffer) { + efree(buffer); + } } RETURN_LONG(ret); @@ -1417,6 +1470,7 @@ PHP_FUNCTION(rmdir) /* {{{ proto int readfile(string filename [, bool use_include_path[, resource context]]) Output a file or a URL */ +/* UTODO: Accept unicode contents */ PHP_FUNCTION(readfile) { char *filename; @@ -1472,6 +1526,7 @@ PHP_FUNCTION(umask) /* {{{ proto int fpassthru(resource fp) Output all remaining data from a file pointer */ +/* UTODO: Accept unicode contents */ PHPAPI PHP_FUNCTION(fpassthru) { zval **arg1; @@ -1781,34 +1836,46 @@ safe_to_copy: Binary-safe file read */ PHPAPI PHP_FUNCTION(fread) { - zval **arg1, **arg2; - int len; + zval *zstream; + char *buf; + long len; php_stream *stream; - - if (ZEND_NUM_ARGS() != 2 || zend_get_parameters_ex(2, &arg1, &arg2) == FAILURE) { - WRONG_PARAM_COUNT; + int is_unicode; + int32_t num_bytes, num_chars; + + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "rl", &zstream, &len) == FAILURE) { + RETURN_NULL(); } - PHP_STREAM_TO_ZVAL(stream, arg1); + php_stream_from_zval(stream, &zstream); - convert_to_long_ex(arg2); - len = Z_LVAL_PP(arg2); if (len <= 0) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Length parameter must be greater than 0"); RETURN_FALSE; } - Z_STRVAL_P(return_value) = emalloc(len + 1); - Z_STRLEN_P(return_value) = php_stream_read(stream, Z_STRVAL_P(return_value), len); + num_chars = len; + num_bytes = UBYTES(len); + buf = emalloc(num_bytes + UBYTES(1)); - /* needed because recv/read/gzread doesnt put a null at the end*/ - Z_STRVAL_P(return_value)[Z_STRLEN_P(return_value)] = 0; + if (!php_stream_u_read(stream, buf, &num_bytes, &num_chars, &is_unicode)) { + efree(buf); + RETURN_FALSE; + } + + if (is_unicode) { + /* UTODO - magic_quotes_runtime */ - if (PG(magic_quotes_runtime)) { - Z_STRVAL_P(return_value) = php_addslashes(Z_STRVAL_P(return_value), - Z_STRLEN_P(return_value), &Z_STRLEN_P(return_value), 1 TSRMLS_CC); + buf[num_bytes] = 0; + buf[num_bytes + 1] = 0; + RETURN_UNICODEL(buf, num_bytes >> 1, 0); + } else { + buf[num_bytes] = 0; + if (PG(magic_quotes_runtime)) { + buf = php_addslashes(buf, num_bytes, &num_bytes, 1 TSRMLS_CC); + } + RETURN_STRINGL(buf, num_bytes, 0); } - Z_TYPE_P(return_value) = IS_STRING; } /* }}} */ @@ -1853,6 +1920,7 @@ quit_loop: /* {{{ proto int fputcsv(resource fp, array fields [, string delimiter [, string enclosure]]) Format line as CSV and write to file pointer */ +/* UTODO: Output unicode contents */ PHP_FUNCTION(fputcsv) { char delimiter = ','; /* allow this to be set as parameter */ @@ -1953,6 +2021,7 @@ PHP_FUNCTION(fputcsv) /* {{{ proto array fgetcsv(resource fp [,int length [, string delimiter [, string enclosure]]]) Get line from file pointer and parse for CSV fields */ +/* UTODO: Accept unicode contents */ PHP_FUNCTION(fgetcsv) { char *temp, *tptr, *bptr, *line_end, *limit; diff --git a/ext/standard/file.h b/ext/standard/file.h index 2f145696c5..226f61ed4b 100644 --- a/ext/standard/file.h +++ b/ext/standard/file.h @@ -82,6 +82,10 @@ PHPAPI int php_mkdir(char *dir, long mode TSRMLS_DC); #define PHP_FILE_APPEND 8 #define PHP_FILE_NO_DEFAULT_CONTEXT 16 +/* Specified as explicit values so that file_put_contents() can override context param default mode */ +#define PHP_FILE_TEXT 32 +#define PHP_FILE_BINARY 64 + typedef enum _php_meta_tags_token { TOK_EOF = 0, TOK_OPENTAG, diff --git a/ext/standard/filters.c b/ext/standard/filters.c index adad37cf61..bc38389033 100644 --- a/ext/standard/filters.c +++ b/ext/standard/filters.c @@ -44,11 +44,14 @@ static php_stream_filter_status_t strfilter_rot13_filter( while (buckets_in->head) { bucket = php_stream_bucket_make_writeable(buckets_in->head TSRMLS_CC); - - php_strtr(bucket->buf, bucket->buflen, rot13_from, rot13_to, 52); - consumed += bucket->buflen; - + + if (bucket->is_unicode) { + /* rot13 is silly enough, don't apply it to unicode data */ + return PSFS_ERR_FATAL; + } + php_strtr(bucket->buf.str.val, bucket->buf.str.len, rot13_from, rot13_to, 52); php_stream_bucket_append(buckets_out, bucket TSRMLS_CC); + consumed += bucket->buf.str.len; } if (bytes_consumed) { @@ -61,7 +64,8 @@ static php_stream_filter_status_t strfilter_rot13_filter( static php_stream_filter_ops strfilter_rot13_ops = { strfilter_rot13_filter, NULL, - "string.rot13" + "string.rot13", + PSFO_FLAG_ACCEPTS_STRING | PSFO_FLAG_OUTPUTS_STRING }; static php_stream_filter *strfilter_rot13_create(const char *filtername, zval *filterparams, int persistent TSRMLS_DC) @@ -91,18 +95,48 @@ static php_stream_filter_status_t strfilter_toupper_filter( size_t consumed = 0; while (buckets_in->head) { - bucket = php_stream_bucket_make_writeable(buckets_in->head TSRMLS_CC); - - php_strtr(bucket->buf, bucket->buflen, lowercase, uppercase, 26); - consumed += bucket->buflen; - + bucket = buckets_in->head; + if (bucket->is_unicode) { + UErrorCode errCode = U_ZERO_ERROR; + int32_t outbuflen = bucket->buf.ustr.len; + int is_persistent = php_stream_is_persistent(stream); + UChar *outbuf = peumalloc(outbuflen + 1, is_persistent); + + php_stream_bucket_unlink(bucket TSRMLS_CC); + while (1) { + if (!outbuf) { + php_stream_bucket_delref(bucket TSRMLS_CC); + return PSFS_ERR_FATAL; + } + u_strToUpper(outbuf, outbuflen, bucket->buf.ustr.val, bucket->buf.ustr.len, NULL, &errCode); + if (errCode != U_BUFFER_OVERFLOW_ERROR) { + break; + } + outbuflen += 4; + outbuf = peurealloc(outbuf, outbuflen + 1, is_persistent); + consumed += UBYTES(bucket->buf.ustr.len); + } + if (U_FAILURE(errCode)) { + pefree(outbuf, is_persistent); + php_stream_bucket_delref(bucket TSRMLS_CC); + return PSFS_ERR_FATAL; + } + php_stream_bucket_delref(bucket TSRMLS_CC); + + outbuf[outbuflen] = 0; + bucket = php_stream_bucket_new_unicode(stream, outbuf, outbuflen, 1, is_persistent TSRMLS_CC); + } else { + bucket = php_stream_bucket_make_writeable(buckets_in->head TSRMLS_CC); + php_strtr(bucket->buf.str.val, bucket->buf.str.len, lowercase, uppercase, 26); + consumed += bucket->buf.str.len; + } php_stream_bucket_append(buckets_out, bucket TSRMLS_CC); } if (bytes_consumed) { *bytes_consumed = consumed; } - + return PSFS_PASS_ON; } @@ -119,31 +153,63 @@ static php_stream_filter_status_t strfilter_tolower_filter( size_t consumed = 0; while (buckets_in->head) { - bucket = php_stream_bucket_make_writeable(buckets_in->head TSRMLS_CC); - - php_strtr(bucket->buf, bucket->buflen, uppercase, lowercase, 26); - consumed += bucket->buflen; - + bucket = buckets_in->head; + if (bucket->is_unicode) { + UErrorCode errCode = U_ZERO_ERROR; + int32_t outbuflen = bucket->buf.ustr.len; + int is_persistent = php_stream_is_persistent(stream); + UChar *outbuf = peumalloc(outbuflen + 1, is_persistent); + + php_stream_bucket_unlink(bucket TSRMLS_CC); + while (1) { + if (!outbuf) { + php_stream_bucket_delref(bucket TSRMLS_CC); + return PSFS_ERR_FATAL; + } + u_strToLower(outbuf, outbuflen, bucket->buf.ustr.val, bucket->buf.ustr.len, NULL, &errCode); + if (errCode != U_BUFFER_OVERFLOW_ERROR) { + break; + } + outbuflen += 4; + outbuf = peurealloc(outbuf, outbuflen + 1, is_persistent); + consumed += UBYTES(bucket->buf.ustr.len); + } + if (U_FAILURE(errCode)) { + pefree(outbuf, is_persistent); + php_stream_bucket_delref(bucket TSRMLS_CC); + return PSFS_ERR_FATAL; + } + php_stream_bucket_delref(bucket TSRMLS_CC); + + outbuf[outbuflen] = 0; + bucket = php_stream_bucket_new_unicode(stream, outbuf, outbuflen, 1, is_persistent TSRMLS_CC); + } else { + bucket = php_stream_bucket_make_writeable(buckets_in->head TSRMLS_CC); + php_strtr(bucket->buf.str.val, bucket->buf.str.len, uppercase, lowercase, 26); + consumed += bucket->buf.str.len; + } php_stream_bucket_append(buckets_out, bucket TSRMLS_CC); } if (bytes_consumed) { *bytes_consumed = consumed; } - + return PSFS_PASS_ON; } static php_stream_filter_ops strfilter_toupper_ops = { strfilter_toupper_filter, NULL, - "string.toupper" + "string.toupper", + PSFO_FLAG_OUTPUTS_SAME }; static php_stream_filter_ops strfilter_tolower_ops = { strfilter_tolower_filter, NULL, - "string.tolower" + "string.tolower", + PSFO_FLAG_OUTPUTS_SAME }; static php_stream_filter *strfilter_toupper_create(const char *filtername, zval *filterparams, int persistent TSRMLS_DC) @@ -165,6 +231,8 @@ static php_stream_filter_factory strfilter_tolower_factory = { }; /* }}} */ +/* UTODO: Extend to handle unicode data */ + /* {{{ strip_tags filter implementation */ typedef struct _php_strip_tags_filter { const char *allowed_tags; @@ -211,10 +279,15 @@ static php_stream_filter_status_t strfilter_strip_tags_filter( php_strip_tags_filter *inst = (php_strip_tags_filter *) thisfilter->abstract; while (buckets_in->head) { + if (bucket->is_unicode) { + /* Uh oh! */ + return PSFS_ERR_FATAL; + } + bucket = php_stream_bucket_make_writeable(buckets_in->head TSRMLS_CC); - consumed = bucket->buflen; + consumed = bucket->buf.str.len; - bucket->buflen = php_strip_tags(bucket->buf, bucket->buflen, &(inst->state), (char *)inst->allowed_tags, inst->allowed_tags_len); + bucket->buf.str.len = php_strip_tags(bucket->buf.str.val, bucket->buf.str.len, &(inst->state), (char *)inst->allowed_tags, inst->allowed_tags_len); php_stream_bucket_append(buckets_out, bucket TSRMLS_CC); } @@ -238,7 +311,8 @@ static void strfilter_strip_tags_dtor(php_stream_filter *thisfilter TSRMLS_DC) static php_stream_filter_ops strfilter_strip_tags_ops = { strfilter_strip_tags_filter, strfilter_strip_tags_dtor, - "string.strip_tags" + "string.strip_tags", + PSFO_FLAG_ACCEPTS_STRING | PSFO_FLAG_OUTPUTS_STRING }; static php_stream_filter *strfilter_strip_tags_create(const char *filtername, zval *filterparams, int persistent TSRMLS_DC) @@ -1698,11 +1772,15 @@ static php_stream_filter_status_t strfilter_convert_filter( while (buckets_in->head != NULL) { bucket = buckets_in->head; + if (bucket->is_unicode) { + /* Not a unicode capable filter */ + return PSFS_ERR_FATAL; + } php_stream_bucket_unlink(bucket TSRMLS_CC); if (strfilter_convert_append_bucket(inst, stream, thisfilter, - buckets_out, bucket->buf, bucket->buflen, &consumed, + buckets_out, bucket->buf.str.val, bucket->buf.str.len, &consumed, php_stream_is_persistent(stream) TSRMLS_CC) != SUCCESS) { goto out_failure; } @@ -1742,7 +1820,8 @@ static void strfilter_convert_dtor(php_stream_filter *thisfilter TSRMLS_DC) static php_stream_filter_ops strfilter_convert_ops = { strfilter_convert_filter, strfilter_convert_dtor, - "convert.*" + "convert.*", + PSFO_FLAG_ACCEPTS_STRING | PSFO_FLAG_OUTPUTS_STRING }; static php_stream_filter *strfilter_convert_create(const char *filtername, zval *filterparams, int persistent TSRMLS_DC) diff --git a/ext/standard/info.c b/ext/standard/info.c index c1d8d92d27..ab36df5691 100644 --- a/ext/standard/info.c +++ b/ext/standard/info.c @@ -48,6 +48,8 @@ ZEND_EXTERN_MODULE_GLOBALS(mbstring) ZEND_EXTERN_MODULE_GLOBALS(iconv) #endif +#include <unicode/uversion.h> + #define SECTION(name) if (!sapi_module.phpinfo_as_text) { \ PUTS("<h2>" name "</h2>\n"); \ } else { \ @@ -475,6 +477,11 @@ PHPAPI void php_print_info(int flag TSRMLS_DC) php_info_print_table_row(2, "Zend Memory Manager", "disabled" ); #endif + { + char buf[1024]; + snprintf(buf, sizeof(buf), "Based on%s. ICU Version %s.", U_COPYRIGHT_STRING, U_ICU_VERSION); + php_info_print_table_row(2, "Unicode Support", buf); + } #if HAVE_IPV6 php_info_print_table_row(2, "IPv6 Support", "enabled" ); #else diff --git a/ext/standard/php_string.h b/ext/standard/php_string.h index e2a1fa284b..22b447ec17 100644 --- a/ext/standard/php_string.h +++ b/ext/standard/php_string.h @@ -117,6 +117,8 @@ PHPAPI struct lconv *localeconv_r(struct lconv *out); PHPAPI char *php_strtoupper(char *s, size_t len); PHPAPI char *php_strtolower(char *s, size_t len); +PHPAPI UChar *php_u_strtoupper(UChar **s, int32_t *len, const char *locale); +PHPAPI UChar *php_u_strtolower(UChar **s, int32_t *len, const char *locale); PHPAPI char *php_strtr(char *str, int len, char *str_from, char *str_to, int trlen); PHPAPI char *php_addslashes(char *str, int length, int *new_length, int freeit TSRMLS_DC); PHPAPI char *php_addslashes_ex(char *str, int length, int *new_length, int freeit, int ignore_sybase TSRMLS_DC); @@ -135,7 +137,7 @@ PHPAPI size_t php_strip_tags(char *rbuf, int len, int *state, char *allow, int a PHPAPI int php_char_to_str_ex(char *str, uint len, char from, char *to, int to_len, pval *result, int case_sensitivity, int *replace_count); PHPAPI int php_char_to_str(char *str, uint len, char from, char *to, int to_len, pval *result); PHPAPI void php_implode(zval *delim, zval *arr, zval *return_value); -PHPAPI void php_explode(zval *delim, zval *str, zval *return_value, int limit); +PHPAPI void php_explode(char *delim, uint delim_len, char *str, uint str_len, zend_uchar str_type, zval *return_value, int limit); PHPAPI size_t php_strspn(char *s1, char *s2, char *s1_end, char *s2_end); PHPAPI size_t php_strcspn(char *s1, char *s2, char *s1_end, char *s2_end); diff --git a/ext/standard/php_var.h b/ext/standard/php_var.h index d3bbcc9e8d..a65cb32c97 100644 --- a/ext/standard/php_var.h +++ b/ext/standard/php_var.h @@ -24,6 +24,7 @@ #include "ext/standard/php_smart_str_public.h" PHP_FUNCTION(var_dump); +PHP_FUNCTION(var_inspect); PHP_FUNCTION(var_export); PHP_FUNCTION(debug_zval_dump); PHP_FUNCTION(serialize); @@ -32,9 +33,9 @@ PHP_FUNCTION(unserialize); PHP_FUNCTION(memory_get_usage); #endif -PHPAPI void php_var_dump(zval **struc, int level TSRMLS_DC); +PHPAPI void php_var_dump(zval **struc, int level, int verbose TSRMLS_DC); PHPAPI void php_var_export(zval **struc, int level TSRMLS_DC); -PHPAPI void php_debug_zval_dump(zval **struc, int level TSRMLS_DC); +PHPAPI void php_debug_zval_dump(zval **struc, int level, int verbose TSRMLS_DC); /* typdef HashTable php_serialize_data_t; */ #define php_serialize_data_t HashTable diff --git a/ext/standard/streamsfuncs.c b/ext/standard/streamsfuncs.c index 3ac73b30a7..9b5f5ab0e0 100644 --- a/ext/standard/streamsfuncs.c +++ b/ext/standard/streamsfuncs.c @@ -476,22 +476,33 @@ PHP_FUNCTION(stream_get_meta_data) add_assoc_string(return_value, "mode", stream->mode, 1); -#if 0 /* TODO: needs updating for new filter API */ - if (stream->filterhead) { + if (stream->readfilters.head) { php_stream_filter *filter; MAKE_STD_ZVAL(newval); array_init(newval); - for (filter = stream->filterhead; filter != NULL; filter = filter->next) { + for (filter = stream->readfilters.head; filter != NULL; filter = filter->next) { add_next_index_string(newval, (char *)filter->fops->label, 1); } - add_assoc_zval(return_value, "filters", newval); + add_assoc_zval(return_value, "read_filters", newval); + } + + if (stream->writefilters.head) { + php_stream_filter *filter; + + MAKE_STD_ZVAL(newval); + array_init(newval); + + for (filter = stream->writefilters.head; filter != NULL; filter = filter->next) { + add_next_index_string(newval, (char *)filter->fops->label, 1); + } + + add_assoc_zval(return_value, "write_filters", newval); } -#endif - add_assoc_long(return_value, "unread_bytes", stream->writepos - stream->readpos); + add_assoc_long(return_value, "unread_bytes", stream->readbuf_avail); add_assoc_bool(return_value, "seekable", (stream->ops->seek) && (stream->flags & PHP_STREAM_FLAG_NO_SEEK) == 0); if (stream->orig_path) { @@ -668,7 +679,7 @@ static int stream_array_emulate_read_fd_set(zval *stream_array TSRMLS_DC) if (stream == NULL) { continue; } - if ((stream->writepos - stream->readpos) > 0) { + if ((stream->readbuf_avail) > 0) { /* allow readable non-descriptor based streams to participate in stream_select. * Non-descriptor streams will only "work" if they have previously buffered the * data. Not ideal, but better than nothing. @@ -835,7 +846,7 @@ static void user_space_stream_notifier_dtor(php_stream_notifier *notifier) } } -static int parse_context_options(php_stream_context *context, zval *options) +static int parse_context_options(php_stream_context *context, zval *options TSRMLS_DC) { HashPosition pos, opos; zval **wval, **oval; @@ -846,20 +857,37 @@ static int parse_context_options(php_stream_context *context, zval *options) zend_hash_internal_pointer_reset_ex(Z_ARRVAL_P(options), &pos); while (SUCCESS == zend_hash_get_current_data_ex(Z_ARRVAL_P(options), (void**)&wval, &pos)) { - if (HASH_KEY_IS_STRING == zend_hash_get_current_key_ex(Z_ARRVAL_P(options), &wkey, &wkey_len, &num_key, 0, &pos) + int wtype = zend_hash_get_current_key_ex(Z_ARRVAL_P(options), &wkey, &wkey_len, &num_key, 0, &pos); + if (((HASH_KEY_IS_STRING == wtype) || (HASH_KEY_IS_UNICODE == wtype)) && Z_TYPE_PP(wval) == IS_ARRAY) { + if (HASH_KEY_IS_UNICODE == wtype) { + /* fold to string */ + UErrorCode errCode = 0; + + zend_convert_from_unicode(ZEND_U_CONVERTER(UG(runtime_encoding_conv)), &wkey, &wkey_len, (UChar*)wkey, wkey_len, &errCode); + } zend_hash_internal_pointer_reset_ex(Z_ARRVAL_PP(wval), &opos); while (SUCCESS == zend_hash_get_current_data_ex(Z_ARRVAL_PP(wval), (void**)&oval, &opos)) { - - if (HASH_KEY_IS_STRING == zend_hash_get_current_key_ex(Z_ARRVAL_PP(wval), &okey, &okey_len, &num_key, 0, &opos)) { + int otype = zend_hash_get_current_key_ex(Z_ARRVAL_PP(wval), &okey, &okey_len, &num_key, 0, &opos); + if (HASH_KEY_IS_UNICODE == otype) { + /* fold to string */ + UErrorCode errCode = 0; + + zend_convert_from_unicode(ZEND_U_CONVERTER(UG(runtime_encoding_conv)), &okey, &okey_len, (UChar*)okey, okey_len, &errCode); + php_stream_context_set_option(context, wkey, okey, *oval); + efree(okey); + } + if (HASH_KEY_IS_STRING == otype) { php_stream_context_set_option(context, wkey, okey, *oval); } zend_hash_move_forward_ex(Z_ARRVAL_PP(wval), &opos); } - + if (wtype == HASH_KEY_IS_UNICODE) { + efree(wkey); + } } else { - zend_error(E_WARNING, "options should have the form [\"wrappername\"][\"optionname\"] = $value"); + php_error_docref(NULL TSRMLS_CC, E_WARNING, "options should have the form [\"wrappername\"][\"optionname\"] = $value"); } zend_hash_move_forward_ex(Z_ARRVAL_P(options), &pos); } @@ -867,12 +895,24 @@ static int parse_context_options(php_stream_context *context, zval *options) return ret; } -static int parse_context_params(php_stream_context *context, zval *params) +static int parse_context_params(php_stream_context *context, zval *params TSRMLS_DC) { int ret = SUCCESS; zval **tmp; - - if (SUCCESS == zend_hash_find(Z_ARRVAL_P(params), "notification", sizeof("notification"), (void**)&tmp)) { + U_STRING_DECL(u_notification, "notification", 12); + U_STRING_DECL(u_options, "options", 7); + U_STRING_DECL(u_input_encoding, "input_encoding", 14); + U_STRING_DECL(u_output_encoding, "output_encoding", 15); + U_STRING_DECL(u_default_mode, "default_mode", 12); + + U_STRING_INIT(u_notification, "notification", 12); + U_STRING_INIT(u_options, "options", 7); + U_STRING_INIT(u_input_encoding, "input_encoding", 14); + U_STRING_INIT(u_output_encoding, "output_encoding", 15); + U_STRING_INIT(u_default_mode, "default_mode", 12); + + if (SUCCESS == zend_hash_find(Z_ARRVAL_P(params), "notification", sizeof("notification"), (void**)&tmp) || + SUCCESS == zend_u_hash_find(Z_ARRVAL_P(params), IS_UNICODE, u_notification, sizeof("notification"), (void**)&tmp)) { if (context->notifier) { php_stream_notification_free(context->notifier); @@ -885,10 +925,43 @@ static int parse_context_params(php_stream_context *context, zval *params) ZVAL_ADDREF(*tmp); context->notifier->dtor = user_space_stream_notifier_dtor; } - if (SUCCESS == zend_hash_find(Z_ARRVAL_P(params), "options", sizeof("options"), (void**)&tmp)) { - parse_context_options(context, *tmp); + if (SUCCESS == zend_hash_find(Z_ARRVAL_P(params), "options", sizeof("options"), (void**)&tmp) || + SUCCESS == zend_u_hash_find(Z_ARRVAL_P(params), IS_UNICODE, u_options, sizeof("options"), (void**)&tmp)) { + parse_context_options(context, *tmp TSRMLS_CC); + } + if (SUCCESS == zend_hash_find(Z_ARRVAL_P(params), "input_encoding", sizeof("input_encoding"), (void**)&tmp) || + SUCCESS == zend_u_hash_find(Z_ARRVAL_P(params), IS_UNICODE, u_input_encoding, sizeof("input_encoding"), (void**)&tmp)) { + zval strval = **tmp; + + if (context->input_encoding) { + efree(context->input_encoding); + } + + zval_copy_ctor(&strval); + convert_to_string(&strval); + context->input_encoding = Z_STRVAL(strval); + } + if (SUCCESS == zend_hash_find(Z_ARRVAL_P(params), "output_encoding", sizeof("output_encoding"), (void**)&tmp) || + SUCCESS == zend_u_hash_find(Z_ARRVAL_P(params), IS_UNICODE, u_output_encoding, sizeof("output_encoding"), (void**)&tmp)) { + zval strval = **tmp; + + if (context->output_encoding) { + efree(context->output_encoding); + } + + zval_copy_ctor(&strval); + convert_to_string(&strval); + context->output_encoding = Z_STRVAL(strval); + } + if (SUCCESS == zend_hash_find(Z_ARRVAL_P(params), "default_mode", sizeof("default_mode"), (void**)&tmp) || + SUCCESS == zend_u_hash_find(Z_ARRVAL_P(params), IS_UNICODE, u_default_mode, sizeof("default_mode"), (void**)&tmp)) { + zval longval = **tmp; + + zval_copy_ctor(&longval); + convert_to_long(&longval); + context->default_mode = Z_LVAL(longval); + zval_dtor(&longval); } - return ret; } @@ -969,7 +1042,7 @@ PHP_FUNCTION(stream_context_set_option) if (options) { /* handle the array syntax */ - RETVAL_BOOL(parse_context_options(context, options) == SUCCESS); + RETVAL_BOOL(parse_context_options(context, options TSRMLS_CC) == SUCCESS); } else { php_stream_context_set_option(context, wrappername, optionname, zvalue); RETVAL_TRUE; @@ -994,7 +1067,7 @@ PHP_FUNCTION(stream_context_set_params) RETURN_FALSE; } - RETVAL_BOOL(parse_context_params(context, params) == SUCCESS); + RETVAL_BOOL(parse_context_params(context, params TSRMLS_CC) == SUCCESS); } /* }}} */ @@ -1015,7 +1088,7 @@ PHP_FUNCTION(stream_context_get_default) context = FG(default_context); if (params) { - parse_context_options(context, params); + parse_context_options(context, params TSRMLS_CC); } php_stream_context_to_zval(context, return_value); @@ -1036,7 +1109,7 @@ PHP_FUNCTION(stream_context_create) context = php_stream_context_alloc(); if (params) { - parse_context_options(context, params); + parse_context_options(context, params TSRMLS_CC); } php_stream_context_to_zval(context, return_value); @@ -1081,11 +1154,14 @@ static void apply_filter_to_stream(int append, INTERNAL_FUNCTION_PARAMETERS) RETURN_FALSE; } - if (append) { + if (append) { php_stream_filter_append(&stream->readfilters, filter); } else { php_stream_filter_prepend(&stream->readfilters, filter); } + if (FAILURE == php_stream_filter_check_chain(&stream->readfilters)) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Readfilter chain unstable -- unresolvable unicode/string conversion conflict"); + } } if (read_write & PHP_STREAM_FILTER_WRITE) { @@ -1099,6 +1175,9 @@ static void apply_filter_to_stream(int append, INTERNAL_FUNCTION_PARAMETERS) } else { php_stream_filter_prepend(&stream->writefilters, filter); } + if (FAILURE == php_stream_filter_check_chain(&stream->writefilters)) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Writefilter chain unstable -- unresolvable unicode/string conversion conflict"); + } } if (filter) { @@ -1150,6 +1229,10 @@ PHP_FUNCTION(stream_filter_remove) php_error_docref(NULL TSRMLS_CC, E_WARNING, "Could not invalidate filter, not removing"); RETURN_FALSE; } else { + if (FAILURE == php_stream_filter_check_chain(filter->chain)) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Filterchain unstable -- unresolvable unicode/string conversion conflict"); + } + php_stream_filter_remove(filter, 1 TSRMLS_CC); RETURN_TRUE; } @@ -1158,6 +1241,7 @@ PHP_FUNCTION(stream_filter_remove) /* {{{ proto string stream_get_line(resource stream, int maxlen [, string ending]) Read up to maxlen bytes from a stream or until the ending string is found */ +/* UTODO */ PHP_FUNCTION(stream_get_line) { char *str = NULL; diff --git a/ext/standard/string.c b/ext/standard/string.c index 002b469476..cc15d8898b 100644 --- a/ext/standard/string.c +++ b/ext/standard/string.c @@ -53,6 +53,8 @@ #include "TSRM.h" #endif +#include "unicode/uchar.h" + #define STR_PAD_LEFT 0 #define STR_PAD_RIGHT 1 #define STR_PAD_BOTH 2 @@ -578,26 +580,183 @@ PHPAPI char *php_trim(char *c, int len, char *what, int what_len, zval *return_v } /* }}} */ +/* {{{ php_expand_u_trim_range() + * Expands possible ranges of the form 'a..b' in input charlist, + * where a < b in code-point order + */ +static int php_expand_u_trim_range(UChar **range, int32_t *range_len) +{ + UChar32 *codepts, *tmp, *input, *end, c; + int32_t len, tmp_len, idx; + UErrorCode err; + int expanded = 0; + int result = SUCCESS; + + /* First, convert UTF-16 to UTF-32 */ + len = *range_len; + codepts = (UChar32 *)emalloc((len+1)*sizeof(UChar32)); + err = U_ZERO_ERROR; + u_strToUTF32((UChar32 *)codepts, len+1, &len, *range, len, &err); + + /* Expand ranges, if any - taken from php_charmask() */ + tmp_len = len; + tmp = (UChar32 *)emalloc((tmp_len+1)*sizeof(UChar32)); + input = codepts; + for ( idx = 0, end = input+len ; input < end ; input++ ) { + c = input[0]; + if ( (input+3 < end) && input[1] == '.' && input[2] == '.' && input[3] >= c ) { + tmp_len += (input[3] - c + 1); + tmp = (UChar32 *)erealloc(tmp, tmp_len*sizeof(UChar)); + for ( ; c <= input[3] ; c++ ) { + if ( U_IS_UNICODE_CHAR(c) ) tmp[idx++] = c; + } + input += 3; + expanded++; + } else if ( (input+1 < end) && input[0] == '.' && input[1] == '.' ) { + /* Error, try to be as helpful as possible: + (a range ending/starting with '.' won't be captured here) */ + if ( end-len >= input ) { /* There is no 'left' char */ + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid '..'-range, no character to the left of '..'"); + result = FAILURE; + continue; + } + if ( input+2 >= end ) { /* There is no 'right' char */ + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid '..'-range, no character to the right of '..'"); + result = FAILURE; + continue; + } + if ( input[-1] > input[2] ) { /* Wrong order */ + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid '..'-range, '..'-range needs to be incrementing"); + result = FAILURE; + continue; + } + /* FIXME: Better error (a..b..c is the only left possibility?) */ + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid '..'-range"); + result = FAILURE; + continue; + } else { + tmp[idx++] = c; + } + } + + /* If any ranges were expanded, convert the expanded results back to UTF-16 */ + if ( expanded > 0 ) { + len = tmp_len; + *range = (UChar *)erealloc(*range, (len+1)*sizeof(UChar)); + err = U_ZERO_ERROR; + u_strFromUTF32(*range, len+1, &len, tmp, tmp_len, &err); + if ( U_FAILURE(err) == U_BUFFER_OVERFLOW_ERROR ) { + err = U_ZERO_ERROR; + *range = (UChar *)erealloc(*range, (len+1)*sizeof(UChar)); + u_strFromUTF32(*range, len+1, NULL, tmp, tmp_len, &err); + if ( U_FAILURE(err) ) { /* Internal ICU error */ + result = FAILURE; + } + } + *range_len = len; + } + + efree(tmp); + efree(codepts); + + return result; +} +/* }}} */ + +/* {{{ php_u_trim() + * Unicode capable version of php_trim() + */ +static UChar *php_u_trim(UChar *c, int32_t len, UChar *what, int32_t what_len, zval *return_value, int mode TSRMLS_DC) +{ + int32_t i,j; + UChar ch,wh; + int32_t start = 0, end = len; + + if ( what ) { + php_expand_u_trim_range(&what, &what_len); + } + + if ( mode & 1 ) { + for ( i = 0 ; i < end ; ) { + U16_NEXT(c, i, end, ch); + if ( what ) { + for ( j = 0 ; j < what_len ; ) { + U16_NEXT(what, j, what_len, wh); + if ( wh == ch ) break; + } + if ( wh != ch ) break; + } else { + if ( u_isWhitespace(ch) == FALSE ) break; + } + } + if ( i < end ) { + U16_BACK_1(c, 0, i); /* U16_NEXT() post-increments 'i' */ + } + start = i; + } + if ( mode & 2 ) { + for ( i = end ; i > start ; ) { + U16_PREV(c, 0, i, ch); + if ( what ) { + for ( j = 0 ; j < what_len ; ) { + U16_NEXT(what, j, what_len, wh); + if ( wh == ch ) break; + } + if ( wh != ch ) break; + } else { + if ( u_isWhitespace(ch) == FALSE ) break; + } + } + end = i; + } + + if ( start < len ) { + if ( return_value ) { + RETVAL_UNICODEL(c+start, end-start+1, 1); + } else { + return eustrndup(c+start, end-start+1); + } + } else { /* Trimmed the whole string */ + if ( return_value ) { + RETURN_EMPTY_UNICODE(); + } else { + return (USTR_MAKE("")); + } + } + +} +/* }}} */ + /* {{{ php_do_trim * Base for trim(), rtrim() and ltrim() functions. */ static void php_do_trim(INTERNAL_FUNCTION_PARAMETERS, int mode) { - zval **str; - zval **what = NULL; - int argc = ZEND_NUM_ARGS(); - - if (argc < 1 || argc > 2 || zend_get_parameters_ex(argc, &str, &what) == FAILURE) { - WRONG_PARAM_COUNT; + void *str; + int32_t str_len; + zend_uchar str_type; + void *what; + int32_t what_len; + zend_uchar what_type; + int argc = ZEND_NUM_ARGS(); + + if ( zend_parse_parameters(argc TSRMLS_CC, "T|T", &str, &str_len, &str_type, + &what, &what_len, &what_type) == FAILURE ) { + return; } - convert_to_string_ex(str); - - if (argc > 1) { - convert_to_string_ex(what); - php_trim(Z_STRVAL_PP(str), Z_STRLEN_PP(str), Z_STRVAL_PP(what), Z_STRLEN_PP(what), return_value, mode TSRMLS_CC); + if ( argc > 1 ) { + if ( str_type == IS_UNICODE ) { + php_u_trim(str, str_len, what, what_len, return_value, mode TSRMLS_CC); + } else { + php_trim(str, str_len, what, what_len, return_value, mode TSRMLS_CC); + } } else { - php_trim(Z_STRVAL_PP(str), Z_STRLEN_PP(str), NULL, 0, return_value, mode TSRMLS_CC); + if ( str_type == IS_UNICODE ) { + php_u_trim(str, str_len, NULL, 0, return_value, mode TSRMLS_CC); + } else { + php_trim(str, str_len, NULL, 0, return_value, mode TSRMLS_CC); + } } } /* }}} */ @@ -760,45 +919,56 @@ PHP_FUNCTION(wordwrap) /* {{{ php_explode */ -PHPAPI void php_explode(zval *delim, zval *str, zval *return_value, int limit) +PHPAPI void php_explode(char *delim, uint delim_len, char *str, uint str_len, zend_uchar str_type, zval *return_value, int limit) { char *p1, *p2, *endp; - endp = Z_STRVAL_P(str) + Z_STRLEN_P(str); - - p1 = Z_STRVAL_P(str); - p2 = php_memnstr(Z_STRVAL_P(str), Z_STRVAL_P(delim), Z_STRLEN_P(delim), endp); + endp = str + str_len; + p1 = str; + p2 = php_memnstr(str, delim, delim_len, endp); - if (p2 == NULL) { - add_next_index_stringl(return_value, p1, Z_STRLEN_P(str), 1); + if ( p2 == NULL ) { + if ( str_type == IS_BINARY ) { + add_next_index_binaryl(return_value, p1, str_len, 1); + } else { + add_next_index_stringl(return_value, p1, str_len, 1); + } } else { do { - add_next_index_stringl(return_value, p1, p2 - p1, 1); - p1 = p2 + Z_STRLEN_P(delim); - } while ((p2 = php_memnstr(p1, Z_STRVAL_P(delim), Z_STRLEN_P(delim), endp)) != NULL && - (limit == -1 || --limit > 1)); + if ( str_type == IS_BINARY ) { + add_next_index_binaryl(return_value, p1, p2-p1, 1); + } else { + add_next_index_stringl(return_value, p1, p2-p1, 1); + } + p1 = p2 + delim_len; + } while ( (p2 = php_memnstr(p1, delim, delim_len, endp)) != NULL && + (limit == -1 || --limit > 1) ); - if (p1 <= endp) - add_next_index_stringl(return_value, p1, endp-p1, 1); + if ( p1 <= endp ) { + if ( str_type == IS_BINARY ) { + add_next_index_binaryl(return_value, p1, endp-p1, 1); + } else { + add_next_index_stringl(return_value, p1, endp-p1, 1); + } + } } } /* }}} */ /* {{{ php_explode_negative_limit */ -PHPAPI void php_explode_negative_limit(zval *delim, zval *str, zval *return_value, int limit) +PHPAPI void php_explode_negative_limit(char *delim, uint delim_len, char *str, uint str_len, zend_uchar str_type, zval *return_value, int limit) { #define EXPLODE_ALLOC_STEP 50 char *p1, *p2, *endp; int allocated = EXPLODE_ALLOC_STEP, found = 0, i = 0, to_return = 0; char **positions = safe_emalloc(allocated, sizeof(char *), 0); - - endp = Z_STRVAL_P(str) + Z_STRLEN_P(str); - p1 = Z_STRVAL_P(str); - p2 = php_memnstr(Z_STRVAL_P(str), Z_STRVAL_P(delim), Z_STRLEN_P(delim), endp); + endp = str + str_len; + p1 = str; + p2 = php_memnstr(str, delim, delim_len, endp); - if (p2 == NULL) { + if ( p2 == NULL ) { /* do nothing since limit <= -1, thus if only one chunk - 1 + (limit) <= 0 by doing nothing we return empty array @@ -806,20 +976,23 @@ PHPAPI void php_explode_negative_limit(zval *delim, zval *str, zval *return_valu } else { positions[found++] = p1; do { - if (found >= allocated) { + if ( found >= allocated ) { allocated = found + EXPLODE_ALLOC_STEP;/* make sure we have enough memory */ positions = erealloc(positions, allocated*sizeof(char *)); } - positions[found++] = p1 = p2 + Z_STRLEN_P(delim); - } while ((p2 = php_memnstr(p1, Z_STRVAL_P(delim), Z_STRLEN_P(delim), endp)) != NULL); + positions[found++] = p1 = p2 + delim_len; + } while ( (p2 = php_memnstr(p1, delim, delim_len, endp)) != NULL ); to_return = limit + found; /* limit is at least -1 therefore no need of bounds checking : i will be always less than found */ - for (i = 0;i < to_return;i++) { /* this checks also for to_return > 0 */ - add_next_index_stringl(return_value, positions[i], - (positions[i+1] - Z_STRLEN_P(delim)) - positions[i], - 1 - ); + for ( i = 0 ; i < to_return ; i++ ) { /* this checks also for to_return > 0 */ + if ( str_type == IS_BINARY ) { + add_next_index_binaryl(return_value, positions[i], + (positions[i+1]-delim_len) - positions[i], 1); + } else { + add_next_index_stringl(return_value, positions[i], + (positions[i+1]-delim_len) - positions[i], 1); + } } } efree(positions); @@ -827,45 +1000,132 @@ PHPAPI void php_explode_negative_limit(zval *delim, zval *str, zval *return_valu } /* }}} */ +/* {{{ php_u_explode + * Unicode capable version of php_explode() + */ +static void php_u_explode(UChar *delim, uint delim_len, UChar *str, uint str_len, zval *return_value, int limit) +{ + UChar *p1, *p2, *endp; + + endp = str + str_len; + p1 = str; + p2 = zend_u_memnstr(str, delim, delim_len, endp); + + if ( p2 == NULL ) { + add_next_index_unicodel(return_value, p1, str_len, 1); + } else { + do { + add_next_index_unicodel(return_value, p1, p2-p1, 1); + p1 = (UChar *)p2 + delim_len; + } while ((p2 = zend_u_memnstr(p1, delim, delim_len, endp)) != NULL && + (limit == -1 || --limit > 1) ); + + if ( p1 <= endp ) { + add_next_index_unicodel(return_value, p1, endp-p1, 1); + } + } +} +/* }}} */ + +/* {{{ php_u_explode_negative_limit + * Unicode capable version of php_explode_negative_limit() + */ +static void php_u_explode_negative_limit(UChar *delim, uint delim_len, UChar *str, uint str_len, zval *return_value, int limit) +{ +#define EXPLODE_ALLOC_STEP 50 + UChar *p1, *p2, *endp; + int allocated = EXPLODE_ALLOC_STEP, found = 0, i = 0, to_return = 0; + UChar **positions = safe_emalloc(allocated, sizeof(UChar *), 0); + + endp = str + str_len; + p1 = str; + p2 = zend_u_memnstr(str, delim, delim_len, endp); + + if ( p2 == NULL ) { + /* + do nothing since limit <= -1, thus if only one chunk - 1 + (limit) <= 0 + by doing nothing we return empty array + */ + } else { + positions[found++] = p1; + do { + if ( found >= allocated ) { + allocated = found + EXPLODE_ALLOC_STEP;/* make sure we have enough memory */ + positions = erealloc(positions, allocated*sizeof(UChar *)); + } + positions[found++] = p1 = p2 + delim_len; + } while ( (p2 = zend_u_memnstr(p1, delim, delim_len, endp)) != NULL ); + + to_return = limit + found; + /* limit is at least -1 therefore no need of bounds checking : i will be always less than found */ + for ( i = 0 ; i < to_return ; i++ ) { /* this checks also for to_return > 0 */ + add_next_index_unicodel(return_value, positions[i], + (positions[i+1]-delim_len) - positions[i], 1); + } + } + efree(positions); +#undef EXPLODE_ALLOC_STEP +} +/* }}} */ /* {{{ proto array explode(string separator, string str [, int limit]) Splits a string on string separator and return array of components. If limit is positive only limit number of components is returned. If limit is negative all components except the last abs(limit) are returned. */ PHP_FUNCTION(explode) { - zval **str, **delim, **zlimit = NULL; - int limit = -1; - int argc = ZEND_NUM_ARGS(); + void *str, *delim; + int32_t str_len, delim_len; + zend_uchar str_type, delim_type; + int limit = -1; + int argc = ZEND_NUM_ARGS(); - if (argc < 2 || argc > 3 || zend_get_parameters_ex(argc, &delim, &str, &zlimit) == FAILURE) { + if ( argc < 2 || argc > 3 ) { WRONG_PARAM_COUNT; } - convert_to_string_ex(str); - convert_to_string_ex(delim); - if (argc > 2) { - convert_to_long_ex(zlimit); - limit = Z_LVAL_PP(zlimit); + if ( zend_parse_parameters(argc TSRMLS_CC, "TT|l", &delim, &delim_len, &delim_type, + &str, &str_len, &str_type, &limit) == FAILURE) { + return; } - if (! Z_STRLEN_PP(delim)) { + if ( delim_len == 0 ) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter."); RETURN_FALSE; } array_init(return_value); - if (! Z_STRLEN_PP(str)) { - add_next_index_stringl(return_value, "", sizeof("") - 1, 1); + if ( str_len == 0 ) { + if ( str_type == IS_UNICODE ) { + add_next_index_unicodel(return_value, USTR_MAKE(""), sizeof("")-1, 1); + } else if ( str_type == IS_BINARY ) { + add_next_index_binaryl(return_value, "", sizeof("")-1, 1); + } else { + add_next_index_stringl(return_value, "", sizeof("")-1, 1); + } return; } if (limit == 0 || limit == 1) { - add_index_stringl(return_value, 0, Z_STRVAL_PP(str), Z_STRLEN_PP(str), 1); + if ( str_type == IS_UNICODE ) { + add_index_unicodel(return_value, 0, (UChar *)str, str_len, 1); + } else if ( str_type == IS_BINARY ) { + add_index_binaryl(return_value, 0, (char *)str, str_len, 1); + } else { + add_index_stringl(return_value, 0, (char *)str, str_len, 1); + } } else if (limit < 0 && argc == 3) { - php_explode_negative_limit(*delim, *str, return_value, limit); + if ( str_type == IS_UNICODE ) { + php_u_explode_negative_limit((UChar *)delim, delim_len, (UChar *)str, str_len, return_value, limit); + } else { + php_explode_negative_limit((char *)delim, delim_len, (char *)str, str_len, str_type, return_value, limit); + } } else { - php_explode(*delim, *str, return_value, limit); + if ( str_type == IS_UNICODE ) { + php_u_explode((UChar *)delim, delim_len, (UChar *)str, str_len, return_value, limit); + } else { + php_explode((char *)delim, delim_len, (char *)str, str_len, str_type, return_value, limit); + } } } /* }}} */ @@ -1067,6 +1327,37 @@ PHPAPI char *php_strtoupper(char *s, size_t len) } /* }}} */ +/* {{{ php_u_strtoupper + */ +PHPAPI UChar* php_u_strtoupper(UChar **s, int32_t *len, const char* locale) +{ + UChar *dest = NULL; + int32_t dest_len; + UErrorCode status; + + dest_len = *len; + while (1) { + status = U_ZERO_ERROR; + dest = eurealloc(dest, dest_len+1); + dest_len = u_strToUpper(dest, dest_len, *s, *len, locale, &status); + if (status != U_BUFFER_OVERFLOW_ERROR) { + break; + } + } + + if (U_SUCCESS(status)) { + efree(*s); + dest[dest_len] = 0; + *s = dest; + *len = dest_len; + } else { + efree(dest); + } + + return *s; +} +/* }}} */ + /* {{{ proto string strtoupper(string str) Makes a string uppercase */ PHP_FUNCTION(strtoupper) @@ -1076,10 +1367,50 @@ PHP_FUNCTION(strtoupper) if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &arg)) { WRONG_PARAM_COUNT; } - convert_to_string_ex(arg); + if (Z_TYPE_PP(arg) != IS_STRING && Z_TYPE_PP(arg) != IS_UNICODE) { + if (UG(unicode)) { + convert_to_unicode_ex(arg); + } else { + convert_to_string_ex(arg); + } + } RETVAL_ZVAL(*arg, 1, 0); - php_strtoupper(Z_STRVAL_P(return_value), Z_STRLEN_P(return_value)); + if (Z_TYPE_P(return_value) == IS_UNICODE) { + php_u_strtoupper(&Z_USTRVAL_P(return_value), &Z_USTRLEN_P(return_value), UG(default_locale)); + } else { + php_strtoupper(Z_STRVAL_P(return_value), Z_STRLEN_P(return_value)); + } +} +/* }}} */ + +/* {{{ php_u_strtolower + */ +PHPAPI UChar *php_u_strtolower(UChar **s, int32_t *len, const char* locale) +{ + UChar *dest = NULL; + int32_t dest_len; + UErrorCode status = U_ZERO_ERROR; + + dest_len = *len; + while (1) { + status = U_ZERO_ERROR; + dest = eurealloc(dest, dest_len+1); + dest_len = u_strToLower(dest, dest_len, *s, *len, locale, &status); + if (status != U_BUFFER_OVERFLOW_ERROR) { + break; + } + } + + if (U_SUCCESS(status)) { + efree(*s); + dest[dest_len] = 0; + *s = dest; + *len = dest_len; + } else { + efree(dest); + } + return *s; } /* }}} */ @@ -1105,15 +1436,24 @@ PHPAPI char *php_strtolower(char *s, size_t len) PHP_FUNCTION(strtolower) { zval **str; - char *ret; if (ZEND_NUM_ARGS() != 1 || zend_get_parameters_ex(1, &str)) { WRONG_PARAM_COUNT; } - convert_to_string_ex(str); + if (Z_TYPE_PP(str) != IS_STRING && Z_TYPE_PP(str) != IS_UNICODE) { + if (UG(unicode)) { + convert_to_unicode_ex(str); + } else { + convert_to_string_ex(str); + } + } RETVAL_ZVAL(*str, 1, 0); - ret = php_strtolower(Z_STRVAL_P(return_value), Z_STRLEN_P(return_value)); + if (Z_TYPE_P(return_value) == IS_UNICODE) { + php_u_strtolower(&Z_USTRVAL_P(return_value), &Z_USTRLEN_P(return_value), UG(default_locale)); + } else { + php_strtolower(Z_STRVAL_P(return_value), Z_STRLEN_P(return_value)); + } } /* }}} */ @@ -1499,49 +1839,113 @@ PHP_FUNCTION(stristr) Finds first occurrence of a string within another */ PHP_FUNCTION(strstr) { - char *haystack; - long haystack_len; - zval *needle; + void *haystack; + int32_t haystack_len; + zend_uchar haystack_type; + zval **needle; + void *found = NULL; + char needle_char[2]; + UChar u_needle_char[3]; + int32_t n_len = 0; + size_t found_offset; zend_bool part = 0; - char *found = NULL; - char needle_char[2]; - long found_offset; - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sz|b", &haystack, &haystack_len, &needle, &part) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "tZ|b", &haystack, &haystack_len, &haystack_type, &needle, &part) == FAILURE) { return; } - if (Z_TYPE_P(needle) == IS_STRING) { - if (!Z_STRLEN_P(needle)) { + if (Z_TYPE_PP(needle) == IS_STRING || Z_TYPE_PP(needle) == IS_UNICODE || Z_TYPE_PP(needle) == IS_BINARY) { + if (!Z_STRLEN_PP(needle)) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter."); RETURN_FALSE; } - found = php_memnstr(haystack, - Z_STRVAL_P(needle), - Z_STRLEN_P(needle), - haystack + haystack_len); + /* haystack type determines the needle type */ + if (haystack_type == IS_UNICODE) { + convert_to_unicode_ex(needle); + found = zend_u_memnstr((UChar*)haystack, + Z_USTRVAL_PP(needle), + Z_USTRLEN_PP(needle), + (UChar*)haystack + haystack_len); + } else { + convert_to_string_ex(needle); + found = php_memnstr((char*)haystack, + Z_STRVAL_PP(needle), + Z_STRLEN_PP(needle), + (char*)haystack + haystack_len); + } } else { - convert_to_long_ex(&needle); - needle_char[0] = (char) Z_LVAL_P(needle); - needle_char[1] = 0; + convert_to_long_ex(needle); + if (haystack_type == IS_UNICODE) { + if (Z_LVAL_PP(needle) < 0 || Z_LVAL_PP(needle) > 0x10FFFF) { + php_error(E_WARNING, "Needle argument codepoint value out of range (0 - 0x10FFFF)"); + RETURN_FALSE; + } + /* supplementary codepoint values may require 2 UChar's */ + if (U_IS_BMP(Z_LVAL_PP(needle))) { + u_needle_char[n_len++] = (UChar) Z_LVAL_PP(needle); + u_needle_char[n_len] = 0; + } else { + u_needle_char[n_len++] = (UChar) U16_LEAD(Z_LVAL_PP(needle)); + u_needle_char[n_len++] = (UChar) U16_TRAIL(Z_LVAL_PP(needle)); + u_needle_char[n_len] = 0; + } - found = php_memnstr(haystack, - needle_char, - 1, - haystack + haystack_len); + found = zend_u_memnstr((UChar*)haystack, + u_needle_char, + n_len, + (UChar*)haystack + haystack_len); + } else { + needle_char[0] = (char) Z_LVAL_PP(needle); + needle_char[1] = 0; + + found = php_memnstr((char*)haystack, + needle_char, + 1, + (char*)haystack + haystack_len); + } } if (found) { - found_offset = found - haystack; - if (part) { - char *ret; - ret = emalloc(found_offset + 1); - strncpy(ret, haystack, found_offset); - ret[found_offset] = '\0'; - RETURN_STRINGL(ret , found_offset, 0); - } else { - RETURN_STRINGL(found, haystack_len - found_offset, 1); + switch (haystack_type) { + case IS_UNICODE: + found_offset = (UChar*)found - (UChar*)haystack; + if (part) { + char *ret; + ret = eumalloc(found_offset + 1); + u_strncpy(ret, haystack, found_offset); + ret[found_offset] = '\0'; + RETURN_UNICODEL(ret , found_offset, 0); + } else { + RETURN_UNICODEL(found, haystack_len - found_offset, 1); + } + break; + + case IS_STRING: + found_offset = (char *)found - (char *)haystack; + if (part) { + char *ret; + ret = emalloc(found_offset + 1); + strncpy(ret, haystack, found_offset); + ret[found_offset] = '\0'; + RETURN_STRINGL(ret , found_offset, 0); + } else { + RETURN_STRINGL(found, haystack_len - found_offset, 1); + } + break; + + case IS_BINARY: + found_offset = (char *)found - (char *)haystack; + if (part) { + char *ret; + ret = emalloc(found_offset + 1); + strncpy(ret, haystack, found_offset); + ret[found_offset] = '\0'; + RETURN_BINARYL(ret , found_offset, 0); + } else { + RETURN_BINARYL(found, haystack_len - found_offset, 1); + } + break; } } else { RETURN_FALSE; @@ -1553,54 +1957,101 @@ PHP_FUNCTION(strstr) An alias for strstr */ /* }}} */ -/* {{{ proto int strpos(string haystack, string needle [, int offset]) +/* {{{ proto int strpos(text haystack, mixed needle [, int offset]) Finds position of first occurrence of a string within another */ PHP_FUNCTION(strpos) { - zval **haystack, **needle, **z_offset; - char *found = NULL; - char needle_char[2]; + void *haystack; + int32_t haystack_len; + zend_uchar haystack_type; + zval **needle; int offset = 0; - int argc = ZEND_NUM_ARGS(); - - if (argc < 2 || argc > 3 || zend_get_parameters_ex(argc, &haystack, &needle, &z_offset) == FAILURE) { - WRONG_PARAM_COUNT; - } - convert_to_string_ex(haystack); + void *found = NULL; + char needle_char[2]; + UChar u_needle_char[3]; + int32_t n_len = 0; - if (argc > 2) { - convert_to_long_ex(z_offset); - offset = Z_LVAL_PP(z_offset); + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "tZ|l", &haystack, + &haystack_len, &haystack_type, &needle, &offset) == FAILURE) { + return; } - if (offset < 0 || offset > Z_STRLEN_PP(haystack)) { + /* + * Unicode note: it's okay to not convert offset to codepoint offset here. + * We'll just do a rough check that the offset does not exceed length in + * code units, and leave the rest to zend_u_memnstr(). + */ + if (offset < 0 || offset > haystack_len) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string."); RETURN_FALSE; } - if (Z_TYPE_PP(needle) == IS_STRING) { + if (Z_TYPE_PP(needle) == IS_STRING || Z_TYPE_PP(needle) == IS_UNICODE || Z_TYPE_PP(needle) == IS_BINARY) { if (!Z_STRLEN_PP(needle)) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter."); RETURN_FALSE; } - found = php_memnstr(Z_STRVAL_PP(haystack) + offset, - Z_STRVAL_PP(needle), - Z_STRLEN_PP(needle), - Z_STRVAL_PP(haystack) + Z_STRLEN_PP(haystack)); + /* haystack type determines the needle type */ + if (haystack_type == IS_UNICODE) { + int32_t cp_offset = 0; + convert_to_unicode_ex(needle); + /* locate the codepoint at the specified offset */ + U16_FWD_N((UChar*)haystack, cp_offset, haystack_len, offset); + found = zend_u_memnstr((UChar*)haystack + cp_offset, + Z_USTRVAL_PP(needle), + Z_USTRLEN_PP(needle), + (UChar*)haystack + haystack_len); + } else { + convert_to_string_ex(needle); + found = php_memnstr((char*)haystack + offset, + Z_STRVAL_PP(needle), + Z_STRLEN_PP(needle), + (char*)haystack + haystack_len); + } } else { convert_to_long_ex(needle); - needle_char[0] = (char) Z_LVAL_PP(needle); - needle_char[1] = 0; + if (haystack_type == IS_UNICODE) { + int32_t cp_offset = 0; + if (Z_LVAL_PP(needle) < 0 || Z_LVAL_PP(needle) > 0x10FFFF) { + php_error(E_WARNING, "Needle argument codepoint value out of range (0 - 0x10FFFF)"); + RETURN_FALSE; + } + /* supplementary codepoint values may require 2 UChar's */ + if (U_IS_BMP(Z_LVAL_PP(needle))) { + u_needle_char[n_len++] = (UChar) Z_LVAL_PP(needle); + u_needle_char[n_len] = 0; + } else { + u_needle_char[n_len++] = (UChar) U16_LEAD(Z_LVAL_PP(needle)); + u_needle_char[n_len++] = (UChar) U16_TRAIL(Z_LVAL_PP(needle)); + u_needle_char[n_len] = 0; + } - found = php_memnstr(Z_STRVAL_PP(haystack) + offset, - needle_char, - 1, - Z_STRVAL_PP(haystack) + Z_STRLEN_PP(haystack)); + /* locate the codepoint at the specified offset */ + U16_FWD_N((UChar*)haystack, cp_offset, haystack_len, offset); + found = zend_u_memnstr((UChar*)haystack + cp_offset, + u_needle_char, + n_len, + (UChar*)haystack + haystack_len); + } else { + needle_char[0] = (char) Z_LVAL_PP(needle); + needle_char[1] = 0; + + found = php_memnstr((char*)haystack + offset, + needle_char, + 1, + (char*)haystack + haystack_len); + } } if (found) { - RETURN_LONG(found - Z_STRVAL_PP(haystack)); + if (haystack_type == IS_UNICODE) { + /* simple subtraction will not suffice, since there may be + supplementary codepoints */ + RETURN_LONG(u_countChar32(haystack, ((char *)found - (char *)haystack)/sizeof(UChar))); + } else { + RETURN_LONG((char *)found - (char *)haystack); + } } else { RETURN_FALSE; } @@ -1955,32 +2406,31 @@ PHP_FUNCTION(chunk_split) Returns part of a string */ PHP_FUNCTION(substr) { - zval **str, **from, **len; - int l; + void *str; + int32_t str_len, cp_len; + zend_uchar str_type; + int l = -1; int f; - int argc = ZEND_NUM_ARGS(); - if (argc < 2 || argc > 3 || zend_get_parameters_ex(argc, &str, &from, &len) == FAILURE) { - WRONG_PARAM_COUNT; + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "tl|l", &str, &str_len, &str_type, &f, &l) == FAILURE) { + return; } - convert_to_string_ex(str); - convert_to_long_ex(from); - - if (argc > 2) { - convert_to_long_ex(len); - l = Z_LVAL_PP(len); + if (str_type == IS_UNICODE) { + cp_len = u_countChar32(str, str_len); } else { - l = Z_STRLEN_PP(str); + cp_len = str_len; } - - f = Z_LVAL_PP(from); + if (ZEND_NUM_ARGS() == 2) { + l = cp_len; + } + /* if "from" position is negative, count start position from the end * of the string */ if (f < 0) { - f = Z_STRLEN_PP(str) + f; + f = cp_len + f; if (f < 0) { f = 0; } @@ -1990,21 +2440,29 @@ PHP_FUNCTION(substr) * needed to stop that many chars from the end of the string */ if (l < 0) { - l = (Z_STRLEN_PP(str) - f) + l; + l = (cp_len - f) + l; if (l < 0) { l = 0; } } - if (f >= Z_STRLEN_PP(str)) { + if (f >= cp_len) { RETURN_FALSE; } - if (((unsigned) f + (unsigned) l) > Z_STRLEN_PP(str)) { - l = Z_STRLEN_PP(str) - f; + if (((unsigned) f + (unsigned) l) > cp_len) { + l = cp_len - f; } - RETURN_STRINGL(Z_STRVAL_PP(str) + f, l, 1); + if (str_type == IS_UNICODE) { + int32_t start = 0, end = 0; + U16_FWD_N((UChar*)str, end, str_len, f); + start = end; + U16_FWD_N((UChar*)str, end, str_len, l); + RETURN_UNICODEL((UChar*)str + start, end-start, 1); + } else { + RETURN_STRINGL((char*)str + f, l, 1); + } } /* }}} */ @@ -4209,61 +4667,88 @@ reg_char: Returns the input string repeat mult times */ PHP_FUNCTION(str_repeat) { - zval **input_str; /* Input string */ - zval **mult; /* Multiplier */ - char *result; /* Resulting string */ - int result_len; /* Length of the resulting string */ - - if (ZEND_NUM_ARGS() != 2 || zend_get_parameters_ex(2, &input_str, &mult) == FAILURE) { - WRONG_PARAM_COUNT; + void *input_str; /* Input string */ + int32_t input_str_len; + int32_t input_str_chars; + zend_uchar input_str_type; + long mult; /* Multiplier */ + void *result; /* Resulting string */ + int32_t result_len; /* Length of the resulting string, in bytes */ + int32_t result_chars; /* Chars/UChars in resulting string */ + + if ( zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "tl", &input_str, + &input_str_chars, &input_str_type, &mult) == FAILURE ) { + return; } - - /* Make sure we're dealing with proper types */ - convert_to_string_ex(input_str); - convert_to_long_ex(mult); - - if (Z_LVAL_PP(mult) < 0) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Second argument has to be greater than or equal to 0."); + + if ( mult < 0 ) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Second argument has to be greater than or equal to 0"); return; } - /* Don't waste our time if it's empty */ - if (Z_STRLEN_PP(input_str) == 0) - RETURN_STRINGL("", 0, 1); - - /* ... or if the multiplier is zero */ - if (Z_LVAL_PP(mult) == 0) - RETURN_STRINGL("", 0, 1); - + /* Don't waste our time if input is empty or if the multiplier is zero */ + if ( input_str_chars == 0 || mult == 0 ) { + if ( input_str_type == IS_UNICODE ) { + RETURN_UNICODEL(USTR_MAKE(""), 0, 0); + } else if ( input_str_type == IS_STRING ) { + RETURN_STRINGL("", 0, 1); + } else { + RETURN_BINARYL("", 0, 1); + } + } + /* Initialize the result string */ - result_len = Z_STRLEN_PP(input_str) * Z_LVAL_PP(mult); - if (result_len < 1 || result_len > 2147483647) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "You may not create strings longer then 2147483647 bytes"); - RETURN_FALSE; + result_chars = (input_str_chars * mult) + 1; + if ( input_str_type == IS_UNICODE ) { + input_str_len = UBYTES(input_str_chars); + result_len = UBYTES(result_chars); + if ( result_chars < 1 || result_chars > (2147483647/UBYTES(1)) ) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "You may not create strings longer then %ld characters", 2147483647/UBYTES(1)); + RETURN_FALSE; + } + } else { + input_str_len = input_str_chars; + result_len = result_chars; + if ( result_chars < 1 || result_chars > 2147483647 ) { + if ( input_str_type == IS_STRING ) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "You may not create strings longer then 2147483647 characters"); + } else { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "You may not create strings longer then 2147483647 bytes"); + } + RETURN_FALSE; + } } - result = (char *)emalloc(result_len + 1); + result = emalloc(result_len); /* Heavy optimization for situations where input string is 1 byte long */ - if (Z_STRLEN_PP(input_str) == 1) { - memset(result, *(Z_STRVAL_PP(input_str)), Z_LVAL_PP(mult)); + if ( input_str_len == 1 ) { + memset(result, *((char *)input_str), mult); } else { char *s, *e, *ee; int l=0; - memcpy(result, Z_STRVAL_PP(input_str), Z_STRLEN_PP(input_str)); + memcpy(result, input_str, input_str_len); s = result; - e = result + Z_STRLEN_PP(input_str); + e = result + input_str_len; ee = result + result_len; - - while (e<ee) { + + while ( e < ee ) { l = (e-s) < (ee-e) ? (e-s) : (ee-e); memmove(e, s, l); e += l; } } - - result[result_len] = '\0'; - RETURN_STRINGL(result, result_len, 0); + if ( input_str_type == IS_UNICODE ) { + *(((UChar *)result)+result_chars-1) = 0; + RETURN_UNICODEL((UChar *)result, result_chars, 0); + } else { + *(((char *)result)+result_chars-1) = '\0'; + if ( input_str_type == IS_BINARY ) { + RETURN_BINARYL((char *)result, result_chars, 0); + } else { + RETURN_STRINGL((char *)result, result_chars, 0); + } + } } /* }}} */ diff --git a/ext/standard/tests/file/stream_get_line.phpt b/ext/standard/tests/file/stream_get_line.phpt index 2c11f00eed..51261f7043 100644 --- a/ext/standard/tests/file/stream_get_line.phpt +++ b/ext/standard/tests/file/stream_get_line.phpt @@ -2,6 +2,7 @@ Crash inside stream_get_line(), when length=0 --FILE-- <?php +die("Temporary unavailable in unicode PHP. Remove this line."); $path = dirname(__FILE__) . '/test.html'; file_put_contents($path, "foo<br>bar<br>foo"); diff --git a/ext/standard/type.c b/ext/standard/type.c index 0b5fb7d661..c0e4d15cf6 100644 --- a/ext/standard/type.c +++ b/ext/standard/type.c @@ -52,6 +52,14 @@ PHP_FUNCTION(gettype) RETVAL_STRING("string", 1); break; + case IS_BINARY: + RETVAL_STRING("binary", 1); + break; + + case IS_UNICODE: + RETVAL_STRING("unicode", 1); + break; + case IS_ARRAY: RETVAL_STRING("array", 1); break; @@ -348,6 +356,8 @@ PHP_FUNCTION(is_scalar) case IS_DOUBLE: case IS_LONG: case IS_STRING: + case IS_BINARY: + case IS_UNICODE: RETURN_TRUE; break; diff --git a/ext/standard/user_filters.c b/ext/standard/user_filters.c index 9dd786cdef..c8daad1a25 100644 --- a/ext/standard/user_filters.c +++ b/ext/standard/user_filters.c @@ -143,7 +143,7 @@ php_stream_filter_status_t userfilter_filter( php_stream_filter *thisfilter, php_stream_bucket_brigade *buckets_in, php_stream_bucket_brigade *buckets_out, - size_t *bytes_consumed, + size_t *consumed, int flags TSRMLS_DC) { @@ -176,11 +176,7 @@ php_stream_filter_status_t userfilter_filter( args[1] = &zout; ALLOC_INIT_ZVAL(zconsumed); - if (bytes_consumed) { - ZVAL_LONG(zconsumed, *bytes_consumed); - } else { - ZVAL_NULL(zconsumed); - } + ZVAL_NULL(zconsumed); args[2] = &zconsumed; ALLOC_INIT_ZVAL(zclosing); @@ -196,15 +192,15 @@ php_stream_filter_status_t userfilter_filter( if (call_result == SUCCESS && retval != NULL) { convert_to_long(retval); + if (consumed) { + convert_to_long(zconsumed); + *consumed = Z_LVAL_P(zconsumed); + } ret = Z_LVAL_P(retval); } else if (call_result == FAILURE) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "failed to call filter function"); } - if (bytes_consumed) { - *bytes_consumed = Z_LVAL_P(zconsumed); - } - if (retval) zval_ptr_dtor(&retval); zval_ptr_dtor(&zclosing); @@ -218,7 +214,8 @@ php_stream_filter_status_t userfilter_filter( static php_stream_filter_ops userfilter_ops = { userfilter_filter, userfilter_dtor, - "user-filter" + "user-filter", + PSFO_FLAG_OUTPUTS_SAME }; static php_stream_filter *user_filter_factory_create(const char *filtername, @@ -376,8 +373,17 @@ PHP_FUNCTION(stream_bucket_make_writeable) add_property_zval(return_value, "bucket", zbucket); /* add_property_zval increments the refcount which is unwanted here */ zval_ptr_dtor(&zbucket); - add_property_stringl(return_value, "data", bucket->buf, bucket->buflen, 1); - add_property_long(return_value, "datalen", bucket->buflen); + if (bucket->is_unicode) { + zval *unicode_data; + + ALLOC_INIT_ZVAL(unicode_data); + ZVAL_UNICODEL(unicode_data, bucket->buf.ustr.val, bucket->buf.ustr.len, 1); + add_property_zval(return_value, "data", unicode_data); + add_property_long(return_value, "datalen", bucket->buf.str.len); + } else { + add_property_stringl(return_value, "data", bucket->buf.str.val, bucket->buf.str.len, 1); + add_property_long(return_value, "datalen", bucket->buf.str.len); + } } } /* }}} */ @@ -402,15 +408,40 @@ static void php_stream_bucket_attach(int append, INTERNAL_FUNCTION_PARAMETERS) ZEND_FETCH_RESOURCE(brigade, php_stream_bucket_brigade *, &zbrigade, -1, PHP_STREAM_BRIGADE_RES_NAME, le_bucket_brigade); ZEND_FETCH_RESOURCE(bucket, php_stream_bucket *, pzbucket, -1, PHP_STREAM_BUCKET_RES_NAME, le_bucket); - if (SUCCESS == zend_hash_find(Z_OBJPROP_P(zobject), "data", 5, (void**)&pzdata) && (*pzdata)->type == IS_STRING) { + if (SUCCESS == zend_hash_find(Z_OBJPROP_P(zobject), "data", 5, (void**)&pzdata)) { if (!bucket->own_buf) { bucket = php_stream_bucket_make_writeable(bucket TSRMLS_CC); } - if (bucket->buflen != Z_STRLEN_PP(pzdata)) { - bucket->buf = perealloc(bucket->buf, Z_STRLEN_PP(pzdata), bucket->is_persistent); - bucket->buflen = Z_STRLEN_PP(pzdata); + if (Z_TYPE_PP(pzdata) == IS_UNICODE) { + if (!bucket->is_unicode) { + pefree(bucket->buf.str.val, bucket->is_persistent); + bucket->buf.ustr.len = Z_USTRLEN_PP(pzdata); + bucket->buf.ustr.val = safe_pemalloc(sizeof(UChar), bucket->buf.ustr.len, 0, bucket->is_persistent); + bucket->is_unicode = 1; + } + if (bucket->buf.ustr.len < Z_USTRLEN_PP(pzdata)) { + pefree(bucket->buf.ustr.val, bucket->is_persistent); + bucket->buf.ustr.len = Z_USTRLEN_PP(pzdata); + bucket->buf.ustr.val = safe_pemalloc(sizeof(UChar), bucket->buf.ustr.len, 0, bucket->is_persistent); + } + bucket->buf.ustr.len = Z_USTRLEN_PP(pzdata); + memcpy(bucket->buf.ustr.val, Z_USTRVAL_PP(pzdata), bucket->buf.ustr.len * sizeof(UChar)); + } else { /* string -- or at least string expressable */ + SEPARATE_ZVAL_IF_NOT_REF(pzdata); + convert_to_string_ex(pzdata); + if (bucket->is_unicode) { + pefree(bucket->buf.ustr.val, bucket->is_persistent); + bucket->buf.str.len = Z_STRLEN_PP(pzdata); + bucket->buf.str.val = pemalloc(bucket->buf.str.len, bucket->is_persistent); + bucket->is_unicode = 0; + } + if (bucket->buf.str.len < Z_STRLEN_PP(pzdata)) { + bucket->buf.str.len = Z_STRLEN_PP(pzdata); + bucket->buf.str.val = perealloc(bucket->buf.str.val, bucket->buf.str.len, bucket->is_persistent); + } + bucket->buf.str.len = Z_STRLEN_PP(pzdata); + memcpy(bucket->buf.str.val, Z_STRVAL_PP(pzdata), bucket->buf.str.len); } - memcpy(bucket->buf, Z_STRVAL_PP(pzdata), bucket->buflen); } if (append) { @@ -443,33 +474,35 @@ PHP_FUNCTION(stream_bucket_new) { zval *zstream, *zbucket; php_stream *stream; - char *buffer; + zval *buffer; char *pbuffer; - int buffer_len; php_stream_bucket *bucket; - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zs", &zstream, &buffer, &buffer_len) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zz", &zstream, &buffer) == FAILURE) { RETURN_FALSE; } php_stream_from_zval(stream, &zstream); - if (!(pbuffer = pemalloc(buffer_len, php_stream_is_persistent(stream)))) { - RETURN_FALSE; - } - - memcpy(pbuffer, buffer, buffer_len); - - bucket = php_stream_bucket_new(stream, pbuffer, buffer_len, 1, php_stream_is_persistent(stream) TSRMLS_CC); + object_init(return_value); + if (Z_TYPE_P(buffer) == IS_UNICODE) { + bucket = php_stream_bucket_new_unicode(stream, Z_USTRVAL_P(buffer), Z_USTRLEN_P(buffer), 0, php_stream_is_persistent(stream) TSRMLS_CC); + ZVAL_ADDREF(buffer); + add_property_zval(return_value, "data", buffer); + add_property_long(return_value, "datalen", Z_USTRLEN_P(buffer)); + } else { + convert_to_string(buffer); + bucket = php_stream_bucket_new(stream, Z_STRVAL_P(buffer), Z_STRLEN_P(buffer), 0, php_stream_is_persistent(stream) TSRMLS_CC); + + add_property_zval(return_value, "data", buffer); + add_property_long(return_value, "datalen", Z_STRLEN_P(buffer)); + } ALLOC_INIT_ZVAL(zbucket); ZEND_REGISTER_RESOURCE(zbucket, bucket, le_bucket); - object_init(return_value); add_property_zval(return_value, "bucket", zbucket); /* add_property_zval increments the refcount which is unwanted here */ zval_ptr_dtor(&zbucket); - add_property_stringl(return_value, "data", bucket->buf, bucket->buflen, 1); - add_property_long(return_value, "datalen", bucket->buflen); } /* }}} */ diff --git a/ext/standard/var.c b/ext/standard/var.c index 6f50ddf70b..09778e61f8 100644 --- a/ext/standard/var.c +++ b/ext/standard/var.c @@ -41,27 +41,98 @@ /* }}} */ /* {{{ php_var_dump */ +/* temporary, for debugging */ +static void php_var_dump_unicode(UChar *ustr, int32_t length, int verbose TSRMLS_DC) +{ + UChar32 c; + int32_t i; + UErrorCode status = U_ZERO_ERROR; + int32_t clen; + char *out = NULL; + + if (length == 0) { + php_printf("\"\""); + return; + } + + clen = length * ucnv_getMaxCharSize(ZEND_U_CONVERTER(UG(output_encoding_conv))) + 1; + while (1) { + status = U_ZERO_ERROR; + out = erealloc(out, clen+1); + clen = ucnv_fromUChars(ZEND_U_CONVERTER(UG(output_encoding_conv)), out, clen+1, ustr, length, &status); + if (status != U_BUFFER_OVERFLOW_ERROR) { + break; + } + } + if(U_FAILURE(status) || status==U_STRING_NOT_TERMINATED_WARNING) { + php_printf("problem converting string from Unicode: %s\n", u_errorName(status)); + efree(out); + return; + } + + if (verbose) { + php_printf("\"%s\" {", out); + + /* output the code points (not code units) */ + if(length>=0) { + /* s is not NUL-terminated */ + for(i=0; i<length; /* U16_NEXT post-increments */) { + U16_NEXT(ustr, i, length, c); + php_printf(" %04x", c); + } + } else { + /* s is NUL-terminated */ + for(i=0; /* condition in loop body */; /* U16_NEXT post-increments */) { + U16_NEXT(ustr, i, length, c); + if(c==0) { + break; + } + php_printf(" %04x", c); + } + } + php_printf(" }"); + } else { + php_printf("\"%s\"", out); + } + efree(out); +} + static int php_array_element_dump(zval **zv, int num_args, va_list args, zend_hash_key *hash_key) { int level; + int verbose; TSRMLS_FETCH(); level = va_arg(args, int); + verbose = va_arg(args, int); if (hash_key->nKeyLength==0) { /* numeric key */ php_printf("%*c[%ld]=>\n", level + 1, ' ', hash_key->h); } else { /* string key */ - if (va_arg(args, int) && hash_key->arKey[0] == '\0') { + if (va_arg(args, int) && + ((hash_key->type == IS_STRING && hash_key->u.string[0] == 0) || + (hash_key->type == IS_UNICODE && hash_key->u.unicode[0] == '\0'))) { /* XXX: perhaps when we are inside the class we should permit access to * private & protected values */ return 0; } - php_printf("%*c[\"", level + 1, ' '); - PHPWRITE(hash_key->arKey, hash_key->nKeyLength - 1); - php_printf("\"]=>\n"); + php_printf("%*c[", level + 1, ' '); + if (hash_key->type == IS_STRING) { + php_printf("\""); + PHPWRITE(hash_key->u.string, hash_key->nKeyLength - 1); + php_printf("\""); + } else if (hash_key->type == IS_BINARY) { + php_printf("b\""); + PHPWRITE(hash_key->u.string, hash_key->nKeyLength - 1); + php_printf("\""); + } else if (hash_key->type == IS_UNICODE) { + php_printf("u"); + php_var_dump_unicode(hash_key->u.unicode, hash_key->nKeyLength-1, verbose TSRMLS_CC); + } + php_printf("]=>\n"); } - php_var_dump(zv, level + 2 TSRMLS_CC); + php_var_dump(zv, level + 2, 0 TSRMLS_CC); return 0; } @@ -69,35 +140,53 @@ static int php_object_property_dump(zval **zv, int num_args, va_list args, zend_ { int level; char *prop_name, *class_name; + int verbose; TSRMLS_FETCH(); level = va_arg(args, int); + verbose = va_arg(args, int); if (hash_key->nKeyLength ==0 ) { /* numeric key */ php_printf("%*c[%ld]=>\n", level + 1, ' ', hash_key->h); } else { /* string key */ - zend_unmangle_property_name(hash_key->arKey, &class_name, &prop_name); + zend_u_unmangle_property_name(hash_key->type, hash_key->u.string, &class_name, &prop_name); + php_printf("%*c[", level + 1, ' '); + if (class_name) { - php_printf("%*c[\"%s", level + 1, ' ', prop_name); + if (hash_key->type == IS_STRING) { + php_printf("\""); + PHPWRITE(prop_name, strlen(prop_name)); + php_printf("\""); + } else if (hash_key->type == IS_UNICODE) { + php_printf("u"); + php_var_dump_unicode((UChar*)prop_name, u_strlen((UChar*)prop_name), verbose TSRMLS_CC); + } if (class_name[0]=='*') { ZEND_PUTS(":protected"); } else { ZEND_PUTS(":private"); } } else { - php_printf("%*c[\"%s", level + 1, ' ', hash_key->arKey); -#ifdef ANDREY_0 + if (hash_key->type == IS_STRING) { + php_printf("\""); + PHPWRITE(hash_key->u.string, hash_key->nKeyLength - 1); + php_printf("\""); + } else if (hash_key->type == IS_UNICODE) { + php_printf("u"); + php_var_dump_unicode(hash_key->u.unicode, hash_key->nKeyLength-1, verbose TSRMLS_CC); + } ZEND_PUTS(":public"); -#endif } +#ifdef ANDREY_0 +#endif ZEND_PUTS("\"]=>\n"); } - php_var_dump(zv, level + 2 TSRMLS_CC); + php_var_dump(zv, level + 2, verbose TSRMLS_CC); return 0; } -PHPAPI void php_var_dump(zval **struc, int level TSRMLS_DC) +PHPAPI void php_var_dump(zval **struc, int level, int verbose TSRMLS_DC) { HashTable *myht = NULL; char *class_name; @@ -126,6 +215,17 @@ PHPAPI void php_var_dump(zval **struc, int level TSRMLS_DC) PHPWRITE(Z_STRVAL_PP(struc), Z_STRLEN_PP(struc)); PUTS("\"\n"); break; + case IS_BINARY: + php_printf("%sbinary(%d) \"", COMMON, Z_STRLEN_PP(struc)); + PHPWRITE(Z_STRVAL_PP(struc), Z_STRLEN_PP(struc)); + PUTS("\"\n"); + break; + case IS_UNICODE: + /* temporary, for debugging */ + php_printf("%sunicode(%d) ", COMMON, u_countChar32((*struc)->value.ustr.val, (*struc)->value.ustr.len)); + php_var_dump_unicode((*struc)->value.ustr.val, (*struc)->value.ustr.len, verbose TSRMLS_CC); + PUTS("\n"); + break; case IS_ARRAY: myht = Z_ARRVAL_PP(struc); if (myht->nApplyCount > 1) { @@ -143,12 +243,12 @@ PHPAPI void php_var_dump(zval **struc, int level TSRMLS_DC) } Z_OBJ_HANDLER(**struc, get_class_name)(*struc, &class_name, &class_name_len, 0 TSRMLS_CC); - php_printf("%sobject(%s)#%d (%d) {\n", COMMON, class_name, Z_OBJ_HANDLE_PP(struc), myht ? zend_hash_num_elements(myht) : 0); + php_printf("%sobject(%v)#%d (%d) {\n", COMMON, class_name, Z_OBJ_HANDLE_PP(struc), myht ? zend_hash_num_elements(myht) : 0); efree(class_name); php_element_dump_func = php_object_property_dump; head_done: if (myht) { - zend_hash_apply_with_arguments(myht, (apply_func_args_t) php_element_dump_func, 1, level, (Z_TYPE_PP(struc) == IS_ARRAY ? 0 : 1)); + zend_hash_apply_with_arguments(myht, (apply_func_args_t) php_element_dump_func, 3, level, verbose, (Z_TYPE_PP(struc) == IS_ARRAY ? 0 : 1)); } if (level > 1) { php_printf("%*c", level-1, ' '); @@ -189,7 +289,31 @@ PHP_FUNCTION(var_dump) } for (i=0; i<argc; i++) - php_var_dump(args[i], 1 TSRMLS_CC); + php_var_dump(args[i], 1, 0 TSRMLS_CC); + + efree(args); +} +/* }}} */ + + +/* {{{ proto void var_inspect(mixed var) + Dumps a string representation of variable to output (verbose form) */ +PHP_FUNCTION(var_inspect) +{ + zval ***args; + int argc; + int i; + + argc = ZEND_NUM_ARGS(); + + args = (zval ***)safe_emalloc(argc, sizeof(zval **), 0); + if (ZEND_NUM_ARGS() == 0 || zend_get_parameters_array_ex(argc, args) == FAILURE) { + efree(args); + WRONG_PARAM_COUNT; + } + + for (i=0; i<argc; i++) + php_var_dump(args[i], 1, 1 TSRMLS_CC); efree(args); } @@ -210,18 +334,31 @@ static int zval_array_element_dump(zval **zv, int num_args, va_list args, zend_h /* XXX: perphaps when we are inside the class we should permit access to * private & protected values */ - if (va_arg(args, int) && hash_key->arKey[0] == '\0') { + if (va_arg(args, int) && + ((hash_key->type == IS_STRING && hash_key->u.string[0] == 0) || + (hash_key->type == IS_UNICODE && hash_key->u.unicode[0] == '\0'))) { return 0; } - php_printf("%*c[\"", level + 1, ' '); - PHPWRITE(hash_key->arKey, hash_key->nKeyLength - 1); - php_printf("\"]=>\n"); + php_printf("%*c[", level + 1, ' '); + if (hash_key->type == IS_STRING) { + php_printf("\""); + PHPWRITE(hash_key->u.string, hash_key->nKeyLength - 1); + php_printf("\""); + } else if (hash_key->type == IS_BINARY) { + php_printf("b\""); + PHPWRITE(hash_key->u.string, hash_key->nKeyLength - 1); + php_printf("\""); + } else if (hash_key->type == IS_UNICODE) { + php_printf("u"); + php_var_dump_unicode(hash_key->u.unicode, hash_key->nKeyLength-1, 1 TSRMLS_CC); + } + php_printf("]=>\n"); } - php_debug_zval_dump(zv, level + 2 TSRMLS_CC); + php_debug_zval_dump(zv, level + 2, 1 TSRMLS_CC); return 0; } -PHPAPI void php_debug_zval_dump(zval **struc, int level TSRMLS_DC) +PHPAPI void php_debug_zval_dump(zval **struc, int level, int verbose TSRMLS_DC) { HashTable *myht = NULL; char *class_name; @@ -250,6 +387,17 @@ PHPAPI void php_debug_zval_dump(zval **struc, int level TSRMLS_DC) PHPWRITE(Z_STRVAL_PP(struc), Z_STRLEN_PP(struc)); php_printf("\" refcount(%u)\n", Z_REFCOUNT_PP(struc)); break; + case IS_BINARY: + php_printf("%sbinary(%d) \"", COMMON, Z_STRLEN_PP(struc)); + PHPWRITE(Z_STRVAL_PP(struc), Z_STRLEN_PP(struc)); + php_printf("\" refcount(%u)\n", Z_REFCOUNT_PP(struc)); + break; + case IS_UNICODE: + /* temporary, for debugging */ + php_printf("%sunicode(%d) ", COMMON, u_countChar32((*struc)->value.ustr.val, (*struc)->value.ustr.len)); + php_var_dump_unicode((*struc)->value.ustr.val, (*struc)->value.ustr.len, verbose TSRMLS_CC); + php_printf("\" refcount(%u)\n", Z_REFCOUNT_PP(struc)); + break; case IS_ARRAY: myht = Z_ARRVAL_PP(struc); if (myht->nApplyCount > 1) { @@ -266,7 +414,7 @@ PHPAPI void php_debug_zval_dump(zval **struc, int level TSRMLS_DC) } ce = Z_OBJCE(**struc); Z_OBJ_HANDLER(**struc, get_class_name)(*struc, &class_name, &class_name_len, 0 TSRMLS_CC); - php_printf("%sobject(%s)#%d (%d) refcount(%u){\n", COMMON, class_name, Z_OBJ_HANDLE_PP(struc), myht ? zend_hash_num_elements(myht) : 0, Z_REFCOUNT_PP(struc)); + php_printf("%sobject(%v)#%d (%d) refcount(%u){\n", COMMON, class_name, Z_OBJ_HANDLE_PP(struc), myht ? zend_hash_num_elements(myht) : 0, Z_REFCOUNT_PP(struc)); efree(class_name); head_done: if (myht) { @@ -309,7 +457,7 @@ PHP_FUNCTION(debug_zval_dump) } for (i=0; i<argc; i++) - php_debug_zval_dump(args[i], 1 TSRMLS_CC); + php_debug_zval_dump(args[i], 1, 1 TSRMLS_CC); efree(args); } @@ -328,13 +476,18 @@ static int php_array_element_export(zval **zv, int num_args, va_list args, zend_ if (hash_key->nKeyLength==0) { /* numeric key */ php_printf("%*c%ld => ", level + 1, ' ', hash_key->h); } else { /* string key */ - char *key; - int key_len; - key = php_addcslashes(hash_key->arKey, hash_key->nKeyLength - 1, &key_len, 0, "'\\", 2 TSRMLS_CC); php_printf("%*c'", level + 1, ' '); - PHPWRITE(key, key_len); + if (hash_key->type == IS_UNICODE) { + php_printf("%r", hash_key->u.unicode); + } else { + char *key; + int key_len; + + key = php_addcslashes(hash_key->u.string, hash_key->nKeyLength - 1, &key_len, 0, "'\\", 2 TSRMLS_CC); + PHPWRITE(key, key_len); + efree(key); + } php_printf("' => "); - efree(key); } php_var_export(zv, level + 2 TSRMLS_CC); PUTS (",\n"); @@ -351,7 +504,7 @@ static int php_object_element_export(zval **zv, int num_args, va_list args, zend if (hash_key->nKeyLength != 0) { php_printf("%*c", level + 1, ' '); - zend_unmangle_property_name(hash_key->arKey, &class_name, &prop_name); + zend_u_unmangle_property_name(hash_key->type, hash_key->u.string, &class_name, &prop_name); if (class_name) { if (class_name[0] == '*') { php_printf("protected"); @@ -361,7 +514,7 @@ static int php_object_element_export(zval **zv, int num_args, va_list args, zend } else { php_printf("public"); } - php_printf(" $%s = ", prop_name); + php_printf(" $%R = ", hash_key->type, prop_name); php_var_export(zv, level + 2 TSRMLS_CC); PUTS (";\n"); } @@ -389,6 +542,8 @@ PHPAPI void php_var_export(zval **struc, int level TSRMLS_DC) case IS_DOUBLE: php_printf("%.*G", (int) EG(precision), Z_DVAL_PP(struc)); break; + case IS_BINARY: + PUTS ("b"); case IS_STRING: tmp_str = php_addcslashes(Z_STRVAL_PP(struc), Z_STRLEN_PP(struc), &tmp_len, 0, "'\\", 2 TSRMLS_CC); PUTS ("'"); @@ -396,6 +551,14 @@ PHPAPI void php_var_export(zval **struc, int level TSRMLS_DC) PUTS ("'"); efree (tmp_str); break; + case IS_UNICODE: +/* TODO + tmp_str = php_addcslashes(Z_STRVAL_PP(struc), Z_STRLEN_PP(struc), &tmp_len, 0, "'\\", 2 TSRMLS_CC); +*/ + PUTS ("'"); + php_printf("%r", Z_USTRVAL_PP(struc)); + PUTS ("'"); + break; case IS_ARRAY: myht = Z_ARRVAL_PP(struc); if (level > 1) { @@ -414,7 +577,7 @@ PHPAPI void php_var_export(zval **struc, int level TSRMLS_DC) php_printf("\n%*c", level - 1, ' '); } Z_OBJ_HANDLER(**struc, get_class_name)(*struc, &class_name, &class_name_len, 0 TSRMLS_CC); - php_printf ("class %s {\n", class_name); + php_printf ("class %v {\n", class_name); efree(class_name); if (myht) { zend_hash_apply_with_arguments(myht, (apply_func_args_t) php_object_element_export, 1, level); @@ -516,6 +679,40 @@ static inline void php_var_serialize_string(smart_str *buf, char *str, int len) smart_str_appendl(buf, "\";", 2); } +static inline void php_var_serialize_binary(smart_str *buf, char *str, int len) +{ + smart_str_appendl(buf, "B:", 2); + smart_str_append_long(buf, len); + smart_str_appendl(buf, ":\"", 2); + smart_str_appendl(buf, str, len); + smart_str_appendl(buf, "\";", 2); +} + +static inline void php_var_serialize_ustr(smart_str *buf, UChar *ustr, int len) +{ + static const char hex[] = "0123456789abcdef"; + UChar32 c; + int32_t i; + + for(i=0; i<len; /* U16_NEXT post-increments */) { + U16_NEXT(ustr, i, len, c); + smart_str_appendl(buf, "\\u", 2); + smart_str_appendc(buf, hex[(c >> 12) & 0xf]); + smart_str_appendc(buf, hex[(c >> 8) & 0xf]); + smart_str_appendc(buf, hex[(c >> 4) & 0xf]); + smart_str_appendc(buf, hex[(c >> 0) & 0xf]); + } +} + +static inline void php_var_serialize_unicode(smart_str *buf, UChar *ustr, int len) +{ + smart_str_appendl(buf, "U:", 2); + smart_str_append_long(buf, len); + smart_str_appendl(buf, ":\"", 2); + php_var_serialize_ustr(buf, ustr, len); + smart_str_appendl(buf, "\";", 2); +} + static inline zend_bool php_var_serialize_class_name(smart_str *buf, zval **struc TSRMLS_DC) { PHP_CLASS_ATTRIBUTES; @@ -524,7 +721,11 @@ static inline zend_bool php_var_serialize_class_name(smart_str *buf, zval **stru smart_str_appendl(buf, "O:", 2); smart_str_append_long(buf, name_len); smart_str_appendl(buf, ":\"", 2); - smart_str_appendl(buf, class_name, name_len); + if (UG(unicode)) { + php_var_serialize_ustr(buf, (UChar*)class_name, name_len); + } else { + smart_str_appendl(buf, class_name, name_len); + } smart_str_appendl(buf, "\":", 2); PHP_CLEANUP_CLASS_ATTRIBUTES(); return incomplete_class; @@ -571,7 +772,7 @@ static void php_var_serialize_class(smart_str *buf, zval **struc, zval *retval_p zend_hash_get_current_data_ex(HASH_OF(retval_ptr), (void **) &name, &pos); - if (Z_TYPE_PP(name) != IS_STRING) { + if (Z_TYPE_PP(name) != (UG(unicode)?IS_UNICODE:IS_STRING)) { php_error_docref(NULL TSRMLS_CC, E_NOTICE, "__sleep should return an array only " "containing the names of instance-variables to " "serialize."); @@ -580,9 +781,13 @@ static void php_var_serialize_class(smart_str *buf, zval **struc, zval *retval_p smart_str_appendl(buf,"N;", 2); continue; } - if (zend_hash_find(Z_OBJPROP_PP(struc), Z_STRVAL_PP(name), - Z_STRLEN_PP(name) + 1, (void *) &d) == SUCCESS) { - php_var_serialize_string(buf, Z_STRVAL_PP(name), Z_STRLEN_PP(name)); + if (zend_u_hash_find(Z_OBJPROP_PP(struc), Z_TYPE_PP(name), Z_UNIVAL_PP(name), + Z_UNILEN_PP(name) + 1, (void *) &d) == SUCCESS) { + if (Z_TYPE_PP(name) == IS_UNICODE) { + php_var_serialize_unicode(buf, Z_USTRVAL_PP(name), Z_USTRLEN_PP(name)); + } else { + php_var_serialize_string(buf, Z_STRVAL_PP(name), Z_STRLEN_PP(name)); + } php_var_serialize_intern(buf, d, var_hash TSRMLS_CC); } else { zend_class_entry *ce; @@ -592,30 +797,46 @@ static void php_var_serialize_class(smart_str *buf, zval **struc, zval *retval_p int prop_name_length; do { - zend_mangle_property_name(&priv_name, &prop_name_length, ce->name, ce->name_length, + zend_u_mangle_property_name(&priv_name, &prop_name_length, Z_TYPE_PP(name), ce->name, ce->name_length, Z_STRVAL_PP(name), Z_STRLEN_PP(name), ce->type & ZEND_INTERNAL_CLASS); - if (zend_hash_find(Z_OBJPROP_PP(struc), priv_name, prop_name_length+1, (void *) &d) == SUCCESS) { - php_var_serialize_string(buf, priv_name, prop_name_length); + if (zend_u_hash_find(Z_OBJPROP_PP(struc), Z_TYPE_PP(name), priv_name, prop_name_length, (void *) &d) == SUCCESS) { + if (Z_TYPE_PP(name) == IS_UNICODE) { + php_var_serialize_unicode(buf, priv_name, prop_name_length-1); + } else { + php_var_serialize_string(buf, priv_name, prop_name_length-1); + } efree(priv_name); php_var_serialize_intern(buf, d, var_hash TSRMLS_CC); break; } efree(priv_name); - zend_mangle_property_name(&prot_name, &prop_name_length, "*", 1, + zend_u_mangle_property_name(&prot_name, &prop_name_length, Z_TYPE_PP(name), "*", 1, Z_STRVAL_PP(name), Z_STRLEN_PP(name), ce->type & ZEND_INTERNAL_CLASS); - if (zend_hash_find(Z_OBJPROP_PP(struc), prot_name, prop_name_length+1, (void *) &d) == SUCCESS) { - php_var_serialize_string(buf, prot_name, prop_name_length); + if (zend_u_hash_find(Z_OBJPROP_PP(struc), Z_TYPE_PP(name), prot_name, prop_name_length, (void *) &d) == SUCCESS) { + if (Z_TYPE_PP(name) == IS_UNICODE) { + php_var_serialize_unicode(buf, prot_name, prop_name_length-1); + } else { + php_var_serialize_string(buf, prot_name, prop_name_length-1); + } efree(prot_name); php_var_serialize_intern(buf, d, var_hash TSRMLS_CC); break; } efree(prot_name); php_error_docref(NULL TSRMLS_CC, E_NOTICE, "\"%s\" returned as member variable from __sleep() but does not exist", Z_STRVAL_PP(name)); - php_var_serialize_string(buf, Z_STRVAL_PP(name), Z_STRLEN_PP(name)); + if (Z_TYPE_PP(name) == IS_UNICODE) { + php_var_serialize_unicode(buf, Z_USTRVAL_PP(name), Z_USTRLEN_PP(name)); + } else { + php_var_serialize_string(buf, Z_STRVAL_PP(name), Z_STRLEN_PP(name)); + } php_var_serialize_intern(buf, &nvalp, var_hash TSRMLS_CC); } while (0); } else { - php_var_serialize_string(buf, Z_STRVAL_PP(name), Z_STRLEN_PP(name)); + if (Z_TYPE_PP(name) == IS_UNICODE) { + php_var_serialize_unicode(buf, Z_USTRVAL_PP(name), Z_USTRLEN_PP(name)); + } else { + php_var_serialize_string(buf, Z_STRVAL_PP(name), Z_STRLEN_PP(name)); + } php_var_serialize_intern(buf, &nvalp, var_hash TSRMLS_CC); } } @@ -675,6 +896,14 @@ static void php_var_serialize_intern(smart_str *buf, zval **struc, HashTable *va php_var_serialize_string(buf, Z_STRVAL_PP(struc), Z_STRLEN_PP(struc)); return; + case IS_BINARY: + php_var_serialize_binary(buf, Z_STRVAL_PP(struc), Z_STRLEN_PP(struc)); + return; + + case IS_UNICODE: + php_var_serialize_unicode(buf, Z_USTRVAL_PP(struc), Z_USTRLEN_PP(struc)); + return; + case IS_OBJECT: { zval *retval_ptr = NULL; zval fname; @@ -783,6 +1012,12 @@ static void php_var_serialize_intern(smart_str *buf, zval **struc, HashTable *va case HASH_KEY_IS_STRING: php_var_serialize_string(buf, key, key_len - 1); break; + case HASH_KEY_IS_BINARY: + php_var_serialize_binary(buf, key, key_len - 1); + break; + case HASH_KEY_IS_UNICODE: + php_var_serialize_unicode(buf, (UChar*)key, key_len - 1); + break; } /* we should still add element even if it's not OK, diff --git a/ext/standard/var_unserializer.c b/ext/standard/var_unserializer.c index ca20aa0411..4f152baf76 100644 --- a/ext/standard/var_unserializer.c +++ b/ext/standard/var_unserializer.c @@ -282,7 +282,7 @@ static inline int object_custom(UNSERIALIZE_PARAMETER, zend_class_entry *ce) long datalen; if(ce->unserialize == NULL) { - zend_error(E_WARNING, "Class %s has no unserializer", ce->name); + zend_error(E_WARNING, "Class %v has no unserializer", ce->name); return 0; } diff --git a/ext/standard/var_unserializer.re b/ext/standard/var_unserializer.re index 09d63d30bd..8012c406e6 100644 --- a/ext/standard/var_unserializer.re +++ b/ext/standard/var_unserializer.re @@ -286,7 +286,7 @@ static inline int object_custom(UNSERIALIZE_PARAMETER, zend_class_entry *ce) long datalen; if(ce->unserialize == NULL) { - zend_error(E_WARNING, "Class %s has no unserializer", ce->name); + zend_error(E_WARNING, "Class %v has no unserializer", ce->name); return 0; } diff --git a/ext/unicode/CREDITS b/ext/unicode/CREDITS new file mode 100644 index 0000000000..df6de5bf06 --- /dev/null +++ b/ext/unicode/CREDITS @@ -0,0 +1,2 @@ +unicode +Andrei Zmievski diff --git a/ext/unicode/EXPERIMENTAL b/ext/unicode/EXPERIMENTAL new file mode 100644 index 0000000000..6443e99646 --- /dev/null +++ b/ext/unicode/EXPERIMENTAL @@ -0,0 +1,5 @@ +this extension is experimental, +its functions may change their names +or move to extension all together +so do not rely to much on them +you have been warned! diff --git a/ext/unicode/config.m4 b/ext/unicode/config.m4 new file mode 100644 index 0000000000..139963b91a --- /dev/null +++ b/ext/unicode/config.m4 @@ -0,0 +1,13 @@ +dnl +dnl $ Id: $ +dnl + +PHP_ARG_ENABLE(unicode, whether to enable unicode functions, +[ --disable-unicode Disable Unicode API support]) + +if test "$PHP_UNICODE" != "no"; then + PHP_SUBST(UNICODE_SHARED_LIBADD) + AC_DEFINE(HAVE_UNICODE, 1, [ ]) + PHP_NEW_EXTENSION(unicode, unicode.c locale.c unicode_filter.c, $ext_shared) +fi + diff --git a/ext/unicode/config.w32 b/ext/unicode/config.w32 new file mode 100644 index 0000000000..915b84ba1f --- /dev/null +++ b/ext/unicode/config.w32 @@ -0,0 +1,9 @@ +// $ Id: $ +// vim:ft=javascript + +ARG_ENABLE('unicode' , 'ICU API extension', 'no'); +if (PHP_UNICODE) { + + EXTENSION("unicode", "unicode.c"); + AC_DEFINE('HAVE_UNICODE', 1, 'ICU API extension'); +} diff --git a/ext/unicode/locale.c b/ext/unicode/locale.c new file mode 100644 index 0000000000..ced73d0437 --- /dev/null +++ b/ext/unicode/locale.c @@ -0,0 +1,92 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.0 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_0.txt. | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Andrei Zmievski <andrei@php.net> | + +----------------------------------------------------------------------+ +*/ + +/* $ Id: $ */ + +#include "php_unicode.h" + +#if HAVE_UNICODE + +static void php_canonicalize_locale_id(char **target, int32_t *target_len, char *locale, UErrorCode *status) +{ + char *canonicalized = NULL; + int32_t canonicalized_len = 128; + + while (1) { + *status = U_ZERO_ERROR; + canonicalized = erealloc(canonicalized, canonicalized_len + 1); + canonicalized_len = uloc_canonicalize(locale, canonicalized, canonicalized_len, status); + if (*status != U_BUFFER_OVERFLOW_ERROR) { + break; + } + } + + canonicalized[canonicalized_len] = 0; + *target = canonicalized; + *target_len = canonicalized_len; +} + +PHP_FUNCTION(icu_loc_get_default) +{ + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "") == FAILURE) { + return; + } + + RETURN_STRING(UG(default_locale), 1); +} + +PHP_FUNCTION(icu_loc_set_default) +{ + char *locale; + int locale_len; + char *canonicalized = NULL; + UErrorCode status = U_ZERO_ERROR; + + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &locale, &locale_len) == FAILURE) { + return; + } + + php_canonicalize_locale_id(&canonicalized, &locale_len, locale, &status); + /* + * UTODO: is this right? canonicalization does not seem to perform locale + * validation. + */ + if (U_FAILURE(status)) { + php_error(E_WARNING, "Invalid locale: %s\n", locale); + RETURN_FALSE; + } + /* don't bother if locales are identical */ + if (!strcmp(UG(default_locale), canonicalized)) { + efree(canonicalized); + RETURN_FALSE; + } + efree(UG(default_locale)); + UG(default_locale) = canonicalized; + zend_reset_locale_deps(TSRMLS_C); + RETURN_TRUE; +} + +#endif /* HAVE_UNICODE */ + + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + * vim600: noet sw=4 ts=4 fdm=marker + * vim<600: noet sw=4 ts=4 + */ diff --git a/ext/unicode/package.xml b/ext/unicode/package.xml new file mode 100644 index 0000000000..4a0606b2c6 --- /dev/null +++ b/ext/unicode/package.xml @@ -0,0 +1,34 @@ +<?xml version="1.0" encoding="iso-8859-1"?> +<!DOCTYPE package SYSTEM "http://pear.php.net/dtd/package-1.0"> +<package> + <name>unicode</name> + <summary>ICU API extension</summary> + <license>PHP</license> + + <maintainers> + <maintainer> + <user>andrei</user> + <name>Andrei Zmievski</name> + <email>andrei@php.net</email> + <role>lead</role> + </maintainer> + </maintainers> + + <release> + <version>1.0</version> + <date>2005-04-18</date> + <state>unknown</state> + </release> + + <filelist> + <dir role="doc" name="/"> + <file role="doc">EXPERIMENTAL</file> + <file role="doc">CREDITS</file> + <file role="src">config.m4</file> + <file role="src">unicode.dsp</file> + <file role="src">config.w32</file> + <file role="src">unicode.c</file> + <file role="src">php_unicode.h</file> + </dir> + </filelist> +</package> diff --git a/ext/unicode/php_unicode.h b/ext/unicode/php_unicode.h new file mode 100644 index 0000000000..40b5f3670a --- /dev/null +++ b/ext/unicode/php_unicode.h @@ -0,0 +1,79 @@ +/* + +----------------------------------------------------------------------+ + | This source file is subject to version 3.0 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_0.txt. | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Andrei Zmievski <andrei@php.net> | + +----------------------------------------------------------------------+ +*/ + +/* $ Id: $ */ + +#ifndef PHP_UNICODE_H +#define PHP_UNICODE_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <php.h> + +#ifdef HAVE_UNICODE + +#include <php_ini.h> +#include <SAPI.h> +#include <ext/standard/info.h> + + +extern zend_module_entry unicode_module_entry; +#define phpext_unicode_ptr &unicode_module_entry + +#ifdef PHP_WIN32 +#define PHP_UNICODE_API __declspec(dllexport) +#else +#define PHP_UNICODE_API +#endif + +PHP_MINIT_FUNCTION(unicode); +PHP_MSHUTDOWN_FUNCTION(unicode); +PHP_RINIT_FUNCTION(unicode); +PHP_RSHUTDOWN_FUNCTION(unicode); +PHP_MINFO_FUNCTION(unicode); + +#ifdef ZTS +#include "TSRM.h" +#endif + +PHP_FUNCTION(icu_loc_get_default); +PHP_FUNCTION(icu_loc_set_default); + +extern php_stream_filter_factory php_unicode_filter_factory; + +#ifdef __cplusplus +} // extern "C" +#endif + +#include <zend_unicode.h> +#include <unicode/uloc.h> +#endif /* PHP_HAVE_UNICODE */ + +#endif /* PHP_UNICODE_H */ + + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + * vim600: noet sw=4 ts=4 fdm=marker + * vim<600: noet sw=4 ts=4 + */ diff --git a/ext/unicode/unicode.c b/ext/unicode/unicode.c new file mode 100644 index 0000000000..77c102228d --- /dev/null +++ b/ext/unicode/unicode.c @@ -0,0 +1,120 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.0 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_0.txt. | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Andrei Zmievski <andrei@php.net> | + +----------------------------------------------------------------------+ +*/ + +/* $ Id: $ */ + +#include "php_unicode.h" + +#if HAVE_UNICODE + +/* {{{ unicode_functions[] */ +function_entry unicode_functions[] = { + PHP_FE(icu_loc_get_default, NULL) + PHP_FE(icu_loc_set_default, NULL) + { NULL, NULL, NULL } +}; +/* }}} */ + + +/* {{{ unicode_module_entry + */ +zend_module_entry unicode_module_entry = { + STANDARD_MODULE_HEADER, + "unicode", + unicode_functions, + PHP_MINIT(unicode), /* Replace with NULL if there is nothing to do at php startup */ + PHP_MSHUTDOWN(unicode), /* Replace with NULL if there is nothing to do at php shutdown */ + PHP_RINIT(unicode), /* Replace with NULL if there is nothing to do at request start */ + PHP_RSHUTDOWN(unicode), /* Replace with NULL if there is nothing to do at request end */ + PHP_MINFO(unicode), + "1.0", + STANDARD_MODULE_PROPERTIES +}; +/* }}} */ + +#ifdef COMPILE_DL_UNICODE +ZEND_GET_MODULE(unicode) +#endif + + +/* {{{ PHP_MINIT_FUNCTION */ +PHP_MINIT_FUNCTION(unicode) +{ + if (php_stream_filter_register_factory("unicode.*", &php_unicode_filter_factory TSRMLS_CC) == FAILURE) { + return FAILURE; + } + /* add your stuff here */ + + return SUCCESS; +} +/* }}} */ + + +/* {{{ PHP_MSHUTDOWN_FUNCTION */ +PHP_MSHUTDOWN_FUNCTION(unicode) +{ + if (php_stream_filter_unregister_factory("unicode.*" TSRMLS_CC) == FAILURE) { + return FAILURE; + } + /* add your stuff here */ + + + return SUCCESS; +} +/* }}} */ + + +/* {{{ PHP_RINIT_FUNCTION */ +PHP_RINIT_FUNCTION(unicode) +{ + return SUCCESS; +} +/* }}} */ + + +/* {{{ PHP_RSHUTDOWN_FUNCTION */ +PHP_RSHUTDOWN_FUNCTION(unicode) +{ + return SUCCESS; +} +/* }}} */ + + +/* {{{ PHP_MINFO_FUNCTION */ +PHP_MINFO_FUNCTION(unicode) +{ + php_info_print_box_start(0); + php_printf("ICU API extension\n"); + php_printf("Based on ICU library %s\n", U_COPYRIGHT_STRING); + php_printf("ICU Version %s\n", U_ICU_VERSION); + php_info_print_box_end(); + /* add your stuff here */ + +} +/* }}} */ + + +#endif /* HAVE_UNICODE */ + + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + * vim600: noet sw=4 ts=4 fdm=marker + * vim<600: noet sw=4 ts=4 + */ diff --git a/ext/unicode/unicode.dsp b/ext/unicode/unicode.dsp new file mode 100644 index 0000000000..efc8ab6eae --- /dev/null +++ b/ext/unicode/unicode.dsp @@ -0,0 +1,112 @@ +# Microsoft Developer Studio Project File - Name="unicode" - Package Owner=<4>
+# Microsoft Developer Studio Generated Build File, Format Version 6.00
+# ** DO NOT EDIT **
+
+# TARGTYPE "Win32 (x86) Dynamic-Link Library" 0x0102
+
+CFG=unicode - Win32 Debug_TS
+!MESSAGE This is not a valid makefile. To build this project using NMAKE,
+!MESSAGE use the Export Makefile command and run
+!MESSAGE
+!MESSAGE NMAKE /f "unicode.mak".
+!MESSAGE
+!MESSAGE You can specify a configuration when running NMAKE
+!MESSAGE by defining the macro CFG on the command line. For example:
+!MESSAGE
+!MESSAGE NMAKE /f "unicode.mak" CFG="unicode - Win32 Debug_TS"
+!MESSAGE
+!MESSAGE Possible choices for configuration are:
+!MESSAGE
+!MESSAGE "unicode - Win32 Release_TS" (based on "Win32 (x86) Dynamic-Link Library")
+!MESSAGE "unicode - Win32 Debug_TS" (based on "Win32 (x86) Dynamic-Link Library")
+!MESSAGE
+
+# Begin Project
+# PROP AllowPerConfigDependencies 0
+# PROP Scc_ProjName ""
+# PROP Scc_LocalPath ""
+CPP=cl.exe
+MTL=midl.exe
+RSC=rc.exe
+
+!IF "$(CFG)" == "unicode - Win32 Release_TS"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 0
+# PROP BASE Output_Dir "Release_TS"
+# PROP BASE Intermediate_Dir "Release_TS"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 0
+# PROP Output_Dir "Release_TS"
+# PROP Intermediate_Dir "Release_TS"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "UNICODE_EXPORTS" /YX /FD /c
+# ADD CPP /nologo /MT /W3 /GX /O2 /I "..\.." /I "..\..\Zend" /I "..\..\TSRM" /I "..\..\main" /D "WIN32" /D "PHP_EXPORTS" /D "COMPILE_DL_UNICODE" /D ZTS=1 /D HAVE_UNICODE=1 /D ZEND_DEBUG=0 /D "NDEBUG" /D "_WINDOWS" /D "ZEND_WIN32" /D "PHP_WIN32" /YX /FD /c
+# ADD BASE MTL /nologo /D "NDEBUG" /mktyplib203 /win32
+# ADD MTL /nologo /D "NDEBUG" /mktyplib203 /win32
+# ADD BASE RSC /l 0x407 /d "NDEBUG"
+# ADD RSC /l 0x407 /d "NDEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /machine:I386
+# ADD LINK32 php4ts.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /machine:I386 /out:"..\..\Release_TS\php_unicode.dll" /libpath:"..\..\Release_TS" /libpath:"..\..\Release_TS_Inline"
+
+!ELSEIF "$(CFG)" == "unicode - Win32 Debug_TS"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 1
+# PROP BASE Output_Dir "Debug_TS"
+# PROP BASE Intermediate_Dir "Debug_TS"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 1
+# PROP Output_Dir "Debug_TS"
+# PROP Intermediate_Dir "Debug_TS"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+# ADD BASE CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "UNICODE_EXPORTS" /YX /FD /GZ /c
+# ADD CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /I "..\.." /I "..\..\Zend" /I "..\..\TSRM" /I "..\..\main" /D ZEND_DEBUG=1 /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /D "PHP_EXPORTS" /D "COMPILE_DL_UNICODE" /D ZTS=1 /D "ZEND_WIN32" /D "PHP_WIN32" /D HAVE_UNICODE=1 /YX /FD /GZ /c
+# ADD BASE MTL /nologo /D "_DEBUG" /mktyplib203 /win32
+# ADD MTL /nologo /D "_DEBUG" /mktyplib203 /win32
+# ADD BASE RSC /l 0x407 /d "_DEBUG"
+# ADD RSC /l 0x407 /d "_DEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /debug /machine:I386 /pdbtype:sept
+# ADD LINK32 php4ts_debug.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /dll /debug /machine:I386 /out:"..\..\Debug_TS\php_unicode.dll" /pdbtype:sept /libpath:"..\..\Debug_TS"
+
+!ENDIF
+
+# Begin Target
+
+# Name "unicode - Win32 Release_TS"
+# Name "unicode - Win32 Debug_TS"
+
+# Begin Group "Source Files"
+
+# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
+
+# Begin Source File
+
+SOURCE=.\unicode.c
+# End Source File
+
+# End Group
+
+# Begin Group "Header Files"
+
+# PROP Default_Filter "h;hpp;hxx;hm;inl"
+
+# Begin Source File
+
+SOURCE=.\php_unicode.h
+# End Source File
+# End Group
+# End Target
+# End Project
diff --git a/ext/unicode/unicode_filter.c b/ext/unicode/unicode_filter.c new file mode 100644 index 0000000000..fc0574ddc9 --- /dev/null +++ b/ext/unicode/unicode_filter.c @@ -0,0 +1,321 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.0 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_0.txt. | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Sara Golemon (pollita@php.net) | + +----------------------------------------------------------------------+ +*/ + +/* $Id$ */ + + +#include "php.h" +#include <unicode/ucnv.h> + +/* {{{ data structure */ +typedef struct _php_unicode_filter_data { + char is_persistent; + UConverter *conv; + + char to_unicode; +} php_unicode_filter_data; +/* }}} */ + +/* {{{ unicode.* filter implementation */ + +/* unicode.to.* -- Expects String -- Returns Unicode */ +static php_stream_filter_status_t php_unicode_to_string_filter( + php_stream *stream, + php_stream_filter *thisfilter, + php_stream_bucket_brigade *buckets_in, + php_stream_bucket_brigade *buckets_out, + size_t *bytes_consumed, + int flags + TSRMLS_DC) +{ + php_unicode_filter_data *data; + php_stream_filter_status_t exit_status = PSFS_FEED_ME; + size_t consumed = 0; + + if (!thisfilter || !thisfilter->abstract) { + /* Should never happen */ + return PSFS_ERR_FATAL; + } + + data = (php_unicode_filter_data *)(thisfilter->abstract); + while (buckets_in->head) { + php_stream_bucket *bucket = buckets_in->head; + UChar *src = bucket->buf.ustr.val; + + php_stream_bucket_unlink(bucket TSRMLS_CC); + if (!bucket->is_unicode) { + /* Already ASCII, can't really do anything with it */ + consumed += bucket->buf.str.len; + php_stream_bucket_append(buckets_out, bucket TSRMLS_CC); + exit_status = PSFS_PASS_ON; + continue; + } + + while (src < (bucket->buf.ustr.val + bucket->buf.ustr.len)) { + int remaining = bucket->buf.ustr.len - (src - bucket->buf.ustr.val); + char *destp, *destbuf; + int32_t destlen = UCNV_GET_MAX_BYTES_FOR_STRING(remaining, ucnv_getMaxCharSize(data->conv)); + UErrorCode errCode = U_ZERO_ERROR; + php_stream_bucket *new_bucket; + + destp = destbuf = (char *)pemalloc(destlen, data->is_persistent); + + ucnv_fromUnicode(data->conv, &destp, destbuf + destlen, (const UChar**)&src, src + remaining, NULL, FALSE, &errCode); + new_bucket = php_stream_bucket_new(stream, destbuf, destp - destbuf, 1, data->is_persistent TSRMLS_CC); + php_stream_bucket_append(buckets_out, new_bucket TSRMLS_CC); + exit_status = PSFS_PASS_ON; + } + consumed += UBYTES(bucket->buf.ustr.len); + php_stream_bucket_delref(bucket TSRMLS_CC); + } + + if (flags & PSFS_FLAG_FLUSH_CLOSE) { + UErrorCode errCode = U_ZERO_ERROR; + char d[64], *dest = d, *destp = d + 64; + /* Spit it out! */ + + ucnv_fromUnicode(data->conv, &dest, destp, NULL, NULL, NULL, TRUE, &errCode); + if (dest > d) { + php_stream_bucket *bucket = php_stream_bucket_new(stream, d, dest - d, 0, 0 TSRMLS_CC); + php_stream_bucket_append(buckets_out, bucket TSRMLS_CC); + exit_status = PSFS_PASS_ON; + } + } + + if (bytes_consumed) { + *bytes_consumed = consumed; + } + + return exit_status; +} + +/* unicode.from.* -- Expects Unicode -- Returns String */ +static php_stream_filter_status_t php_unicode_from_string_filter( + php_stream *stream, + php_stream_filter *thisfilter, + php_stream_bucket_brigade *buckets_in, + php_stream_bucket_brigade *buckets_out, + size_t *bytes_consumed, + int flags + TSRMLS_DC) +{ + php_unicode_filter_data *data; + php_stream_filter_status_t exit_status = PSFS_FEED_ME; + size_t consumed = 0; + + if (!thisfilter || !thisfilter->abstract) { + /* Should never happen */ + return PSFS_ERR_FATAL; + } + + data = (php_unicode_filter_data *)(thisfilter->abstract); + while (buckets_in->head) { + php_stream_bucket *bucket = buckets_in->head; + char *src = bucket->buf.str.val; + + php_stream_bucket_unlink(bucket TSRMLS_CC); + if (bucket->is_unicode) { + /* already in unicode, nothing to do */ + consumed += UBYTES(bucket->buf.ustr.len); + php_stream_bucket_append(buckets_out, bucket TSRMLS_CC); + exit_status = PSFS_PASS_ON; + continue; + } + + while (src < (bucket->buf.str.val + bucket->buf.str.len)) { + int remaining = bucket->buf.str.len - (src - bucket->buf.str.val); + UChar *destp, *destbuf; + int32_t destlen = UCNV_GET_MAX_BYTES_FOR_STRING(remaining, ucnv_getMaxCharSize(data->conv)); + UErrorCode errCode = U_ZERO_ERROR; + php_stream_bucket *new_bucket; + + destp = destbuf = (UChar *)pemalloc(destlen, data->is_persistent); + + ucnv_toUnicode(data->conv, &destp, destbuf + destlen, (const char**)&src, src + remaining, NULL, FALSE, &errCode); + + new_bucket = php_stream_bucket_new_unicode(stream, destbuf, destp - destbuf, 1, data->is_persistent TSRMLS_CC); + php_stream_bucket_append(buckets_out, new_bucket TSRMLS_CC); + exit_status = PSFS_PASS_ON; + } + consumed += bucket->buf.str.len; + php_stream_bucket_delref(bucket TSRMLS_CC); + } + + if (flags & PSFS_FLAG_FLUSH_CLOSE) { + UErrorCode errCode = U_ZERO_ERROR; + UChar d[64], *dest = d, *destp = d + 64; + /* Spit it out! */ + + ucnv_toUnicode(data->conv, &dest, destp, NULL, NULL, NULL, TRUE, &errCode); + if (dest > d) { + php_stream_bucket *bucket = php_stream_bucket_new_unicode(stream, d, dest - d, 0, 0 TSRMLS_CC); + php_stream_bucket_append(buckets_out, bucket TSRMLS_CC); + exit_status = PSFS_PASS_ON; + } + } + + if (bytes_consumed) { + *bytes_consumed = consumed; + } + + return exit_status; +} + +/* unicode.tidy.* -- Expects anything -- Returns whatever is preferred by subsequent filters + Can be used to "magically" fix-up bucket messes */ +static php_stream_filter_status_t php_unicode_tidy_filter( + php_stream *stream, + php_stream_filter *thisfilter, + php_stream_bucket_brigade *buckets_in, + php_stream_bucket_brigade *buckets_out, + size_t *bytes_consumed, + int flags + TSRMLS_DC) +{ + php_unicode_filter_data *data; + int prefer_unicode = php_stream_filter_output_prefer_unicode(thisfilter); + + if (!thisfilter || !thisfilter->abstract) { + /* Should never happen */ + return PSFS_ERR_FATAL; + } + + data = (php_unicode_filter_data *)(thisfilter->abstract); + + if (prefer_unicode) { + if (!data->to_unicode) { + ucnv_resetToUnicode(data->conv); + data->to_unicode = prefer_unicode; + } + return php_unicode_from_string_filter(stream, thisfilter, buckets_in, buckets_out, bytes_consumed, flags TSRMLS_CC); + } else { + if (data->to_unicode) { + ucnv_resetFromUnicode(data->conv); + data->to_unicode = prefer_unicode; + } + return php_unicode_to_string_filter(stream, thisfilter, buckets_in, buckets_out, bytes_consumed, flags TSRMLS_CC); + } +} + +static void php_unicode_filter_dtor(php_stream_filter *thisfilter TSRMLS_DC) +{ + if (thisfilter && thisfilter->abstract) { + php_unicode_filter_data *data = (php_unicode_filter_data *)thisfilter->abstract; + ucnv_close(data->conv); + pefree(data, data->is_persistent); + } +} + +static php_stream_filter_ops php_unicode_to_string_filter_ops = { + php_unicode_to_string_filter, + php_unicode_filter_dtor, + "unicode.to.*", + PSFO_FLAG_ACCEPTS_UNICODE | PSFO_FLAG_OUTPUTS_STRING +}; + +static php_stream_filter_ops php_unicode_from_string_filter_ops = { + php_unicode_from_string_filter, + php_unicode_filter_dtor, + "unicode.from.*", + PSFO_FLAG_ACCEPTS_STRING | PSFO_FLAG_OUTPUTS_UNICODE +}; + +static php_stream_filter_ops php_unicode_tidy_filter_ops = { + php_unicode_tidy_filter, + php_unicode_filter_dtor, + "unicode.tidy.*", + PSFO_FLAG_ACCEPTS_ANY | PSFO_FLAG_OUTPUTS_ANY +}; +/* }}} */ + + +/* {{{ unicode.* factory */ + +static php_stream_filter *php_unicode_filter_create(const char *filtername, zval *filterparams, int persistent TSRMLS_DC) +{ + php_unicode_filter_data *data; + const char *charset, *direction; + php_stream_filter_ops *fops; + UErrorCode ucnvError = U_ZERO_ERROR; + char to_unicode = 0; + + if (strncasecmp(filtername, "unicode.", sizeof("unicode.") - 1)) { + /* Never happens */ + return NULL; + } + + direction = filtername + sizeof("unicode.") - 1; + if (strncmp(direction, "to.", sizeof("to.") - 1) == 0) { + fops = &php_unicode_to_string_filter_ops; + charset = direction + sizeof("to.") - 1; + } else if (strncmp(direction, "from.", sizeof("from.") - 1) == 0) { + fops = &php_unicode_from_string_filter_ops; + to_unicode = 1; + charset = direction + sizeof("from.") - 1; + } else if (strncmp(direction, "tidy.", sizeof("tidy.") - 1) == 0) { + fops = &php_unicode_tidy_filter_ops; + charset = direction + sizeof("tidy.") - 1; + } else if (strcmp(direction, "tidy") == 0) { + fops = &php_unicode_tidy_filter_ops; + charset = "utf8"; + } else { + /* Shouldn't happen */ + return NULL; + } + + /* Create this filter */ + data = (php_unicode_filter_data *)pecalloc(1, sizeof(php_unicode_filter_data), persistent); + if (!data) { + php_error_docref(NULL TSRMLS_CC, E_ERROR, "Failed allocating %d bytes.", sizeof(php_unicode_filter_data)); + return NULL; + } + + data->conv = ucnv_open(charset, &ucnvError); + data->to_unicode = to_unicode; + if (!data->conv) { + char *reason = "Unknown Error"; + pefree(data, persistent); + switch (ucnvError) { + case U_MEMORY_ALLOCATION_ERROR: + reason = "unable to allocate memory"; + break; + case U_FILE_ACCESS_ERROR: + reason = "file access error"; + break; + default: + ; + } + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to open charset converter, %s", reason); + return NULL; + } + + return php_stream_filter_alloc(fops, data, persistent); +} + +php_stream_filter_factory php_unicode_filter_factory = { + php_unicode_filter_create +}; +/* }}} */ + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + * vim600: sw=4 ts=4 fdm=marker + * vim<600: sw=4 ts=4 + */ + diff --git a/ext/xml/xml.c b/ext/xml/xml.c index eb607f9537..fc0627d252 100644 --- a/ext/xml/xml.c +++ b/ext/xml/xml.c @@ -436,7 +436,7 @@ static zval *xml_call_handler(xml_parser *parser, zval *handler, zend_function * zend_hash_index_find(Z_ARRVAL_P(handler), 1, (void **) &method) == SUCCESS && Z_TYPE_PP(obj) == IS_OBJECT && Z_TYPE_PP(method) == IS_STRING) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call handler %s::%s()", Z_OBJCE_PP(obj)->name, Z_STRVAL_PP(method)); + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call handler %v::%R()", Z_OBJCE_PP(obj)->name, Z_TYPE_PP(method), Z_UNIVAL_PP(method)); } else php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call handler"); } diff --git a/ext/zlib/zlib_filter.c b/ext/zlib/zlib_filter.c index c14437afb0..9bdf74e66d 100644 --- a/ext/zlib/zlib_filter.c +++ b/ext/zlib/zlib_filter.c @@ -79,13 +79,20 @@ static php_stream_filter_status_t php_zlib_inflate_filter( while (buckets_in->head) { size_t bin = 0, desired; + bucket = buckets_in->head; + + if (bucket->is_unicode) { + /* inflation not allowed for unicode data */ + return PSFS_ERR_FATAL; + } + bucket = php_stream_bucket_make_writeable(buckets_in->head TSRMLS_CC); - while (bin < bucket->buflen) { - desired = bucket->buflen - bin; + while (bin < bucket->buf.str.len) { + desired = bucket->buf.str.len - bin; if (desired > data->inbuf_len) { desired = data->inbuf_len; } - memcpy(data->strm.next_in, bucket->buf + bin, desired); + memcpy(data->strm.next_in, bucket->buf.str.val + bin, desired); data->strm.avail_in = desired; status = inflate(&(data->strm), flags & PSFS_FLAG_FLUSH_CLOSE ? Z_FINISH : Z_SYNC_FLUSH); @@ -97,7 +104,6 @@ static php_stream_filter_status_t php_zlib_inflate_filter( desired -= data->strm.avail_in; /* desired becomes what we consumed this round through */ data->strm.next_in = data->inbuf; data->strm.avail_in = 0; - consumed += desired; bin += desired; if (data->strm.avail_out < data->outbuf_len) { @@ -110,6 +116,7 @@ static php_stream_filter_status_t php_zlib_inflate_filter( exit_status = PSFS_PASS_ON; } } + consumed += bucket->buf.str.len; php_stream_bucket_delref(bucket TSRMLS_CC); } @@ -151,7 +158,8 @@ static void php_zlib_inflate_dtor(php_stream_filter *thisfilter TSRMLS_DC) static php_stream_filter_ops php_zlib_inflate_ops = { php_zlib_inflate_filter, php_zlib_inflate_dtor, - "zlib.inflate" + "zlib.inflate", + PSFO_FLAG_ACCEPTS_STRING | PSFO_FLAG_OUTPUTS_STRING }; /* }}} */ @@ -186,14 +194,21 @@ static php_stream_filter_status_t php_zlib_deflate_filter( while (buckets_in->head) { size_t bin = 0, desired; - bucket = php_stream_bucket_make_writeable(buckets_in->head TSRMLS_CC); + bucket = buckets_in->head; - while (bin < bucket->buflen) { - desired = bucket->buflen - bin; + if (bucket->is_unicode) { + /* inflation not allowed for unicode data */ + return PSFS_ERR_FATAL; + } + + bucket = php_stream_bucket_make_writeable(bucket TSRMLS_CC); + + while (bin < bucket->buf.str.len) { + desired = bucket->buf.str.len - bin; if (desired > data->inbuf_len) { desired = data->inbuf_len; } - memcpy(data->strm.next_in, bucket->buf + bin, desired); + memcpy(data->strm.next_in, bucket->buf.str.val + bin, desired); data->strm.avail_in = desired; status = deflate(&(data->strm), flags & PSFS_FLAG_FLUSH_CLOSE ? Z_FULL_FLUSH : (flags & PSFS_FLAG_FLUSH_INC ? Z_SYNC_FLUSH : Z_NO_FLUSH)); @@ -205,7 +220,6 @@ static php_stream_filter_status_t php_zlib_deflate_filter( desired -= data->strm.avail_in; /* desired becomes what we consumed this round through */ data->strm.next_in = data->inbuf; data->strm.avail_in = 0; - consumed += desired; bin += desired; if (data->strm.avail_out < data->outbuf_len) { @@ -219,6 +233,7 @@ static php_stream_filter_status_t php_zlib_deflate_filter( exit_status = PSFS_PASS_ON; } } + consumed += bucket->buf.str.len; php_stream_bucket_delref(bucket TSRMLS_CC); } @@ -242,6 +257,7 @@ static php_stream_filter_status_t php_zlib_deflate_filter( if (bytes_consumed) { *bytes_consumed = consumed; } + return exit_status; } @@ -259,7 +275,8 @@ static void php_zlib_deflate_dtor(php_stream_filter *thisfilter TSRMLS_DC) static php_stream_filter_ops php_zlib_deflate_ops = { php_zlib_deflate_filter, php_zlib_deflate_dtor, - "zlib.deflate" + "zlib.deflate", + PSFO_FLAG_ACCEPTS_STRING | PSFO_FLAG_OUTPUTS_STRING }; /* }}} */ |
