diff options
Diffstat (limited to 'main')
| -rw-r--r-- | main/SAPI.c | 22 | ||||
| -rw-r--r-- | main/SAPI.h | 1 | ||||
| -rw-r--r-- | main/main.c | 61 | ||||
| -rw-r--r-- | main/output.c | 2 | ||||
| -rw-r--r-- | main/php.h | 16 | ||||
| -rwxr-xr-x | main/php_streams.h | 30 | ||||
| -rw-r--r-- | main/php_variables.c | 299 | ||||
| -rw-r--r-- | main/php_variables.h | 2 | ||||
| -rw-r--r-- | main/rfc1867.c | 766 | ||||
| -rw-r--r-- | main/spprintf.c | 58 | ||||
| -rw-r--r-- | main/spprintf.h | 4 | ||||
| -rw-r--r-- | main/streams/cast.c | 7 | ||||
| -rw-r--r-- | main/streams/filter.c | 379 | ||||
| -rw-r--r-- | main/streams/php_stream_context.h | 3 | ||||
| -rw-r--r-- | main/streams/php_stream_filter_api.h | 54 | ||||
| -rwxr-xr-x | main/streams/streams.c | 996 |
16 files changed, 2361 insertions, 339 deletions
diff --git a/main/SAPI.c b/main/SAPI.c index a444a6b898..e12717d338 100644 --- a/main/SAPI.c +++ b/main/SAPI.c @@ -225,11 +225,32 @@ SAPI_API SAPI_POST_READER_FUNC(sapi_read_standard_form_data) } +/* {{{ sapi_update_default_charset */ +SAPI_API void sapi_update_default_charset(TSRMLS_D) +{ + if (UG(unicode)) { + const char *canonical_name = NULL; + UErrorCode status = U_ZERO_ERROR; + + canonical_name = ucnv_getName(ZEND_U_CONVERTER(UG(output_encoding_conv)), &status); + SG(default_charset) = (char *)ucnv_getStandardName(canonical_name, "MIME", &status); + } else { + SG(default_charset) = zend_ini_string("default_charset", sizeof("default_charset"), 0); + } +} +/* }}} */ + + SAPI_API char *sapi_get_default_content_type(TSRMLS_D) { char *mimetype, *charset, *content_type; mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE; + /* + * Apache SAPI may invoke this function directly, before php_request_startup() is + * called, so we need to update the default charset explicitly. + */ + sapi_update_default_charset(TSRMLS_C); charset = SG(default_charset) ? SG(default_charset) : SAPI_DEFAULT_CHARSET; if (strncasecmp(mimetype, "text/", 5) == 0 && *charset) { @@ -271,6 +292,7 @@ SAPI_API size_t sapi_apply_default_charset(char **mimetype, size_t len TSRMLS_DC { char *charset, *newtype; size_t newlen; + charset = SG(default_charset) ? SG(default_charset) : SAPI_DEFAULT_CHARSET; if (*mimetype != NULL) { diff --git a/main/SAPI.h b/main/SAPI.h index 64f403d08e..4f069116c8 100644 --- a/main/SAPI.h +++ b/main/SAPI.h @@ -197,6 +197,7 @@ SAPI_API char *sapi_getenv(char *name, size_t name_len TSRMLS_DC); SAPI_API char *sapi_get_default_content_type(TSRMLS_D); SAPI_API void sapi_get_default_content_type_header(sapi_header_struct *default_header TSRMLS_DC); SAPI_API size_t sapi_apply_default_charset(char **mimetype, size_t len TSRMLS_DC); +SAPI_API void sapi_update_default_charset(TSRMLS_D); SAPI_API void sapi_activate_headers_only(TSRMLS_D); SAPI_API int sapi_get_fd(int *fd TSRMLS_DC); diff --git a/main/main.c b/main/main.c index 71bcd366cb..8c0b9f59f2 100644 --- a/main/main.c +++ b/main/main.c @@ -205,6 +205,30 @@ static PHP_INI_MH(OnUpdateTimeout) zend_set_timeout(EG(timeout_seconds)); return SUCCESS; } + +static ZEND_INI_MH(OnUpdateOutputEncoding) +{ + if (new_value) { + if (zend_set_converter_encoding(&UG(output_encoding_conv), new_value) == FAILURE) { + zend_error(E_CORE_ERROR, "Unrecognized encoding '%s' used for %s", new_value ? new_value : "null", entry->name); + return FAILURE; + } + } else { + if (UG(output_encoding_conv)) { + ucnv_close(UG(output_encoding_conv)); + } + UG(output_encoding_conv) = NULL; + } + if (UG(output_encoding_conv)) { + zend_set_converter_error_mode(UG(output_encoding_conv), UG(from_u_error_mode)); + zend_set_converter_subst_char(UG(output_encoding_conv), UG(subst_char), UG(subst_char_len)); + if (stage == ZEND_INI_STAGE_RUNTIME) { + sapi_update_default_charset(TSRMLS_C); + } + } + + return SUCCESS; +} /* }}} */ /* Need to convert to strings and make use of: @@ -289,6 +313,7 @@ PHP_INI_BEGIN() STD_PHP_INI_ENTRY("doc_root", NULL, PHP_INI_SYSTEM, OnUpdateStringUnempty, doc_root, php_core_globals, core_globals) STD_PHP_INI_ENTRY("default_charset", SAPI_DEFAULT_CHARSET, PHP_INI_ALL, OnUpdateString, default_charset, sapi_globals_struct,sapi_globals) STD_PHP_INI_ENTRY("default_mimetype", SAPI_DEFAULT_MIMETYPE, PHP_INI_ALL, OnUpdateString, default_mimetype, sapi_globals_struct,sapi_globals) + ZEND_INI_ENTRY("unicode.output_encoding", NULL, ZEND_INI_ALL, OnUpdateOutputEncoding) STD_PHP_INI_ENTRY("error_log", NULL, PHP_INI_ALL, OnUpdateString, error_log, php_core_globals, core_globals) STD_PHP_INI_ENTRY("extension_dir", PHP_EXTENSION_DIR, PHP_INI_SYSTEM, OnUpdateStringUnempty, extension_dir, php_core_globals, core_globals) STD_PHP_INI_ENTRY("include_path", PHP_INCLUDE_PATH, PHP_INI_ALL, OnUpdateStringUnempty, include_path, php_core_globals, core_globals) @@ -432,10 +457,10 @@ PHPAPI void php_verror(const char *docref, const char *params, int type, const c int buffer_len = 0; char *space; char *class_name = get_active_class_name(&space TSRMLS_CC); - char *function; + char *function = NULL; char *origin; char *message; - int is_function = 0; + char *stage; /* get error text into buffer and escape for html if necessary */ buffer_len = vspprintf(&buffer, 0, format, args); @@ -449,23 +474,22 @@ PHPAPI void php_verror(const char *docref, const char *params, int type, const c /* which function caused the problem if any at all */ if (php_during_module_startup()) { - function = "PHP Startup"; + stage = "PHP Startup"; } else if (php_during_module_shutdown()) { - function = "PHP Shutdown"; + stage = "PHP Shutdown"; } else { function = get_active_function_name(TSRMLS_C); - if (!function || !strlen(function)) { - function = "Unknown"; - } else { - is_function = 1; + if (function && !USTR_LEN(function)) { + stage = "Unknown"; + function = NULL; } } /* if we still have memory then format the origin */ - if (is_function) { - spprintf(&origin, 0, "%s%s%s(%s)", class_name, space, function, params); + if (function) { + spprintf(&origin, 0, "%v%s%v(%s)", class_name, space, function, params); } else { - spprintf(&origin, 0, "%s", function); + spprintf(&origin, 0, "%v", stage); } /* origin and buffer available, so lets come up with the error message */ @@ -475,8 +499,8 @@ PHPAPI void php_verror(const char *docref, const char *params, int type, const c } /* no docref given but function is known (the default) */ - if (!docref && is_function) { - spprintf(&docref_buf, 0, "function.%s", function); + if (!docref && function) { + spprintf(&docref_buf, 0, "function.%v", function); while((p = strchr(docref_buf, '_')) != NULL) { *p = '-'; } @@ -487,7 +511,7 @@ PHPAPI void php_verror(const char *docref, const char *params, int type, const c * - we show erroes in html mode OR * - the user wants to see the links anyway */ - if (docref && is_function && (PG(html_errors) || strlen(PG(docref_root)))) { + if (docref && function && (PG(html_errors) || strlen(PG(docref_root)))) { if (strncmp(docref, "http://", 7)) { /* We don't have 'http://' so we use docref_root */ @@ -1061,6 +1085,8 @@ int php_request_startup(TSRMLS_D) /* We turn this off in php_execute_script() */ /* PG(during_request_startup) = 0; */ + sapi_update_default_charset(TSRMLS_C); + php_hash_environment(TSRMLS_C); zend_activate_modules(TSRMLS_C); PG(modules_activated)=1; @@ -1315,6 +1341,7 @@ int php_module_startup(sapi_module_struct *sf, zend_module_entry *additional_mod zend_utility_values zuv; int module_number=0; /* for REGISTER_INI_ENTRIES() */ char *php_os; + zend_bool orig_unicode; #ifdef ZTS zend_executor_globals *executor_globals; void ***tsrm_ls; @@ -1423,6 +1450,9 @@ int php_module_startup(sapi_module_struct *sf, zend_module_entry *additional_mod REGISTER_INI_ENTRIES(); zend_register_standard_ini_entries(TSRMLS_C); + orig_unicode = UG(unicode); + UG(unicode) = 0; + /* Disable realpath cache if safe_mode or open_basedir are set */ if (PG(safe_mode) || (PG(open_basedir) && *PG(open_basedir))) { CWDG(realpath_cache_size_limit) = 0; @@ -1507,9 +1537,8 @@ int php_module_startup(sapi_module_struct *sf, zend_module_entry *additional_mod /* start Zend extensions */ zend_startup_extensions(); -#ifdef ZTS + UG(unicode) = orig_unicode; zend_post_startup(TSRMLS_C); -#endif module_initialized = 1; sapi_deactivate(TSRMLS_C); diff --git a/main/output.c b/main/output.c index b197cb48b5..af4f6a5ed9 100644 --- a/main/output.c +++ b/main/output.c @@ -516,7 +516,7 @@ static int php_ob_init(uint initial_size, uint block_size, zval *output_handler, } } } else if (output_handler && output_handler->type == IS_OBJECT) { - php_error_docref(NULL TSRMLS_CC, E_ERROR, "No method name given: use ob_start(array($object,'method')) to specify instance $object and the name of a method of class %s to use as output handler", Z_OBJCE_P(output_handler)->name); + php_error_docref(NULL TSRMLS_CC, E_ERROR, "No method name given: use ob_start(array($object,'method')) to specify instance $object and the name of a method of class %v to use as output handler", Z_OBJCE_P(output_handler)->name); result = FAILURE; } else { result = php_ob_init_named(initial_size, block_size, OB_DEFAULT_HANDLER_NAME, NULL, chunk_size, erase TSRMLS_CC); diff --git a/main/php.h b/main/php.h index b40585fd9a..1ad05e46ce 100644 --- a/main/php.h +++ b/main/php.h @@ -270,10 +270,9 @@ ssize_t pread(int, void *, size_t, off64_t); BEGIN_EXTERN_C() void phperror(char *error); PHPAPI int php_write(void *buf, uint size TSRMLS_DC); -PHPAPI int php_printf(const char *format, ...) PHP_ATTRIBUTE_FORMAT(printf, 1, - 2); +PHPAPI int php_printf(const char *format, ...); PHPAPI void php_log_err(char *log_message TSRMLS_DC); -int Debug(char *format, ...) PHP_ATTRIBUTE_FORMAT(printf, 1, 2); +int Debug(char *format, ...); int cfgparse(void); END_EXTERN_C() @@ -289,7 +288,7 @@ BEGIN_EXTERN_C() PHPAPI void php_set_error_handling(error_handling_t error_handling, zend_class_entry *exception_class TSRMLS_DC); #define php_std_error_handling() php_set_error_handling(EH_NORMAL, NULL TSRMLS_CC) -PHPAPI void php_verror(const char *docref, const char *params, int type, const char *format, va_list args TSRMLS_DC) PHP_ATTRIBUTE_FORMAT(printf, 4, 0); +PHPAPI void php_verror(const char *docref, const char *params, int type, const char *format, va_list args TSRMLS_DC); #ifdef ZTS #define PHP_ATTR_FMT_OFFSET 1 @@ -298,12 +297,9 @@ PHPAPI void php_verror(const char *docref, const char *params, int type, const c #endif /* PHPAPI void php_error(int type, const char *format, ...); */ -PHPAPI void php_error_docref0(const char *docref TSRMLS_DC, int type, const char *format, ...) - PHP_ATTRIBUTE_FORMAT(printf, PHP_ATTR_FMT_OFFSET + 3, PHP_ATTR_FMT_OFFSET + 4); -PHPAPI void php_error_docref1(const char *docref TSRMLS_DC, const char *param1, int type, const char *format, ...) - PHP_ATTRIBUTE_FORMAT(printf, PHP_ATTR_FMT_OFFSET + 4, PHP_ATTR_FMT_OFFSET + 5); -PHPAPI void php_error_docref2(const char *docref TSRMLS_DC, const char *param1, const char *param2, int type, const char *format, ...) - PHP_ATTRIBUTE_FORMAT(printf, PHP_ATTR_FMT_OFFSET + 5, PHP_ATTR_FMT_OFFSET + 6); +PHPAPI void php_error_docref0(const char *docref TSRMLS_DC, int type, const char *format, ...); +PHPAPI void php_error_docref1(const char *docref TSRMLS_DC, const char *param1, int type, const char *format, ...); +PHPAPI void php_error_docref2(const char *docref TSRMLS_DC, const char *param1, const char *param2, int type, const char *format, ...); END_EXTERN_C() #define php_error_docref php_error_docref0 diff --git a/main/php_streams.h b/main/php_streams.h index 30d254dfcf..1b1df1d1d0 100755 --- a/main/php_streams.h +++ b/main/php_streams.h @@ -208,10 +208,9 @@ struct _php_stream { /* buffer */ off_t position; /* of underlying stream */ - unsigned char *readbuf; - size_t readbuflen; - off_t readpos; - off_t writepos; + + php_stream_bucket_brigade readbuf; + off_t readbuf_ofs, readbuf_avail; /* how much data to read when filling buffer */ size_t chunk_size; @@ -275,13 +274,26 @@ PHPAPI int _php_stream_seek(php_stream *stream, off_t offset, int whence TSRMLS_ PHPAPI off_t _php_stream_tell(php_stream *stream TSRMLS_DC); #define php_stream_tell(stream) _php_stream_tell((stream) TSRMLS_CC) +/* Convert using runtime_encoding if necessary -- return string */ PHPAPI size_t _php_stream_read(php_stream *stream, char *buf, size_t count TSRMLS_DC); #define php_stream_read(stream, buf, count) _php_stream_read((stream), (buf), (count) TSRMLS_CC) +/* Convert using runtime_encoding if necessary -- return unicode */ +PHPAPI size_t _php_stream_read_unicode(php_stream *stream, UChar *buf, int32_t size TSRMLS_DC); +#define php_stream_read_unicode(stream, buf, size) _php_stream_read_unicode((stream), (buf), (size) TSRMLS_CC) + +/* Read count data points (char or UChar) until buffered type changes -- return single type without conversion */ +PHPAPI void *_php_stream_u_read(php_stream *stream, void *buf, int32_t *pnum_bytes, int32_t *pnum_chars, int *pis_unicode TSRMLS_DC); +#define php_stream_u_read(stream, buf, pnum_bytes, pnum_chars, pis_unicode) \ + _php_stream_u_read((stream), (buf), (pnum_bytes), (pnum_chars), (pis_unicode) TSRMLS_CC) + PHPAPI size_t _php_stream_write(php_stream *stream, const char *buf, size_t count TSRMLS_DC); #define php_stream_write_string(stream, str) _php_stream_write(stream, str, strlen(str) TSRMLS_CC) #define php_stream_write(stream, buf, count) _php_stream_write(stream, (buf), (count) TSRMLS_CC) +PHPAPI size_t _php_stream_u_write(php_stream *stream, const UChar *buf, int32_t count TSRMLS_DC); +#define php_stream_u_write(stream, buf, count) _php_stream_u_write((stream), (buf), (count) TSRMLS_CC) + PHPAPI size_t _php_stream_printf(php_stream *stream TSRMLS_DC, const char *fmt, ...); /* php_stream_printf macro & function require TSRMLS_CC */ #define php_stream_printf _php_stream_printf @@ -298,16 +310,24 @@ PHPAPI int _php_stream_putc(php_stream *stream, int c TSRMLS_DC); PHPAPI int _php_stream_flush(php_stream *stream, int closing TSRMLS_DC); #define php_stream_flush(stream) _php_stream_flush((stream), 0 TSRMLS_CC) +PHPAPI void _php_stream_flush_readbuf(php_stream *stream TSRMLS_DC); +#define php_stream_flush_readbuf(stream) _php_stream_flush_readbuf((stream) TSRMLS_CC) + PHPAPI char *_php_stream_get_line(php_stream *stream, char *buf, size_t maxlen, size_t *returned_len TSRMLS_DC); #define php_stream_gets(stream, buf, maxlen) _php_stream_get_line((stream), (buf), (maxlen), NULL TSRMLS_CC) - #define php_stream_get_line(stream, buf, maxlen, retlen) _php_stream_get_line((stream), (buf), (maxlen), (retlen) TSRMLS_CC) PHPAPI char *php_stream_get_record(php_stream *stream, size_t maxlen, size_t *returned_len, char *delim, size_t delim_len TSRMLS_DC); +PHPAPI UChar *_php_stream_u_get_line(php_stream *stream, UChar *buf, int32_t *pmax_u16, int32_t *pmax_chars, int *pis_unicode TSRMLS_DC); +#define php_stream_u_get_line(stream, buf, max_u16, max_chars, is_unicode) _php_stream_u_get_line((stream), (buf), (max_u16), (max_chars), (is_unicode) TSRMLS_CC) + /* CAREFUL! this is equivalent to puts NOT fputs! */ PHPAPI int _php_stream_puts(php_stream *stream, char *buf TSRMLS_DC); #define php_stream_puts(stream, buf) _php_stream_puts((stream), (buf) TSRMLS_CC) +PHPAPI int _php_stream_will_read_unicode(php_stream *stream TSRMLS_DC); +#define php_stream_will_read_unicode(stream) _php_stream_will_read_unicode((stream) TSRMLS_CC); + PHPAPI int _php_stream_stat(php_stream *stream, php_stream_statbuf *ssb TSRMLS_DC); #define php_stream_stat(stream, ssb) _php_stream_stat((stream), (ssb) TSRMLS_CC) diff --git a/main/php_variables.c b/main/php_variables.c index 507f0526ba..3562dd4d24 100644 --- a/main/php_variables.c +++ b/main/php_variables.c @@ -57,6 +57,25 @@ PHPAPI void php_register_variable_safe(char *var, char *strval, int str_len, zva php_register_variable_ex(var, &new_entry, track_vars_array TSRMLS_CC); } +PHPAPI void php_u_register_variable_safe(UChar *var, UChar *strval, int32_t str_len, zval *track_vars_array TSRMLS_DC) +{ + zval new_entry; + assert(strval != NULL); + + /* Prepare value */ + Z_USTRLEN(new_entry) = str_len; + if (PG(magic_quotes_gpc)) { + /* UTODO implement php_u_addslashes() */ + //Z_USTRVAL(new_entry) = php_addslashes(strval, Z_USTRLEN(new_entry), &Z_USTRLEN(new_entry), 0 TSRMLS_CC); + Z_USTRVAL(new_entry) = eustrndup(strval, Z_USTRLEN(new_entry)); + } else { + Z_USTRVAL(new_entry) = eustrndup(strval, Z_USTRLEN(new_entry)); + } + Z_TYPE(new_entry) = IS_UNICODE; + + php_u_register_variable_ex(var, &new_entry, track_vars_array TSRMLS_CC); +} + PHPAPI void php_register_variable_ex(char *var, zval *val, zval *track_vars_array TSRMLS_DC) { char *p = NULL; @@ -212,31 +231,197 @@ plain_var: } } +PHPAPI void php_u_register_variable_ex(UChar *var, zval *val, pval *track_vars_array TSRMLS_DC) +{ + UChar *p = NULL; + UChar *ip; /* index pointer */ + UChar *index; + int32_t var_len, index_len; + zval *gpc_element, **gpc_element_p; + zend_bool is_array; + HashTable *symtable1=NULL; + + assert(var != NULL); + + if (track_vars_array) { + symtable1 = Z_ARRVAL_P(track_vars_array); + } else if (PG(register_globals)) { + symtable1 = EG(active_symbol_table); + } + if (!symtable1) { + /* Nothing to do */ + zval_dtor(val); + return; + } + + /* + * Prepare variable name + */ + ip = u_strchr(var, 0x5b /*'['*/); + if (ip) { + is_array = 1; + *ip = 0; + } else { + is_array = 0; + } + /* ignore leading spaces in the variable name */ + while (*var && *var==0x20 /*' '*/) { + var++; + } + var_len = u_strlen(var); + if (var_len==0) { /* empty variable name, or variable name with a space in it */ + zval_dtor(val); + return; + } + /* ensure that we don't have spaces or dots in the variable name (not binary safe) */ + for (p=var; *p; p++) { + switch(*p) { + case 0x20: /*' '*/ + case 0x2e: /*'.'*/ + *p=0x5f; /*'_'*/ + break; + } + } + + index = var; + index_len = var_len; + + while (1) { + if (is_array) { + UChar *escaped_index = NULL, *index_s; + int32_t new_idx_len = 0; + + ip++; + index_s = ip; + if (u_isspace(*ip)) { + ip++; + } + if (*ip==0x5d /*']'*/) { + index_s = NULL; + } else { + ip = u_strchr(ip, 0x5d /*']'*/); + if (!ip) { + /* PHP variables cannot contain '[' in their names, so we replace the character with a '_' */ + *(index_s - 1) = 0x5f; /*'_'*/ + + index_len = var_len = 0; + if (index) { + index_len = var_len = u_strlen(index); + } + goto plain_var; + return; + } + *ip = 0; + new_idx_len = u_strlen(index_s); + } + + if (!index) { + MAKE_STD_ZVAL(gpc_element); + array_init(gpc_element); + zend_hash_next_index_insert(symtable1, &gpc_element, sizeof(zval *), (void **) &gpc_element_p); + } else { + if (PG(magic_quotes_gpc) && (index!=var)) { + /* UTODO fix for magic_quotes_gpc case */ + /* no need to addslashes() the index if it's the main variable name */ + //escaped_index = php_addslashes(index, index_len, &index_len, 0 TSRMLS_CC); + escaped_index = index; + } else { + escaped_index = index; + } + if (zend_u_symtable_find(symtable1, IS_UNICODE, escaped_index, index_len+1, (void **) &gpc_element_p)==FAILURE + || Z_TYPE_PP(gpc_element_p) != IS_ARRAY) { + MAKE_STD_ZVAL(gpc_element); + array_init(gpc_element); + zend_u_symtable_update(symtable1, IS_UNICODE, escaped_index, index_len+1, &gpc_element, sizeof(zval *), (void **) &gpc_element_p); + } + if (index!=escaped_index) { + efree(escaped_index); + } + } + symtable1 = Z_ARRVAL_PP(gpc_element_p); + /* ip pointed to the '[' character, now obtain the key */ + index = index_s; + index_len = new_idx_len; + + ip++; + if (*ip==0x5b /*'['*/) { + is_array = 1; + *ip = 0; + } else { + is_array = 0; + } + } else { +plain_var: + MAKE_STD_ZVAL(gpc_element); + gpc_element->value = val->value; + Z_TYPE_P(gpc_element) = Z_TYPE_P(val); + if (!index) { + zend_hash_next_index_insert(symtable1, &gpc_element, sizeof(zval *), (void **) &gpc_element_p); + } else { + /* UTODO fix for php_addslashes case */ + //char *escaped_index = php_addslashes(index, index_len, &index_len, 0 TSRMLS_CC); + UChar *escaped_index = index; + zend_u_symtable_update(symtable1, IS_UNICODE, escaped_index, index_len+1, &gpc_element, sizeof(zval *), (void **) &gpc_element_p); + //efree(escaped_index); + } + break; + } + } +} + SAPI_API SAPI_POST_HANDLER_FUNC(php_std_post_handler) { char *var, *val; char *strtok_buf = NULL; zval *array_ptr = (zval *) arg; + UConverter *input_conv = UG(http_input_encoding_conv); if (SG(request_info).post_data == NULL) { return; } + if (!input_conv) { + input_conv = ZEND_U_CONVERTER(UG(output_encoding_conv)); + } + var = php_strtok_r(SG(request_info).post_data, "&", &strtok_buf); while (var) { val = strchr(var, '='); if (val) { /* have a value */ - unsigned int val_len, new_val_len; - - *val++ = '\0'; - php_url_decode(var, strlen(var)); - val_len = php_url_decode(val, strlen(val)); - val = estrndup(val, val_len); - if (sapi_module.input_filter(PARSE_POST, var, &val, val_len, &new_val_len TSRMLS_CC)) { - php_register_variable_safe(var, val, new_val_len, array_ptr TSRMLS_CC); + if (UG(unicode)) { + UChar *u_var, *u_val; + int32_t u_var_len, u_val_len; + int32_t var_len; + int32_t val_len; + UErrorCode status1 = U_ZERO_ERROR, status2 = U_ZERO_ERROR; + + *val++ = '\0'; + var_len = strlen(var); + php_url_decode(var, var_len); + val_len = php_url_decode(val, strlen(val)); + zend_convert_to_unicode(input_conv, &u_var, &u_var_len, var, var_len, &status1); + zend_convert_to_unicode(input_conv, &u_val, &u_val_len, val, val_len, &status2); + if (U_SUCCESS(status1) && U_SUCCESS(status2)) { + /* UTODO add input filtering */ + php_u_register_variable_safe(u_var, u_val, u_val_len, array_ptr TSRMLS_CC); + } else { + /* UTODO set a user-accessible flag to indicate that conversion failed? */ + } + efree(u_var); + efree(u_val); + } else { + unsigned int val_len, new_val_len; + + *val++ = '\0'; + php_url_decode(var, strlen(var)); + val_len = php_url_decode(val, strlen(val)); + val = estrndup(val, val_len); + if (sapi_module.input_filter(PARSE_POST, var, &val, val_len, &new_val_len TSRMLS_CC)) { + php_register_variable_safe(var, val, new_val_len, array_ptr TSRMLS_CC); + } + efree(val); } - efree(val); } var = php_strtok_r(NULL, "&", &strtok_buf); } @@ -256,6 +441,7 @@ SAPI_API SAPI_TREAT_DATA_FUNC(php_default_treat_data) zval *array_ptr; int free_buffer = 0; char *strtok_buf = NULL; + UConverter *input_conv = UG(http_input_encoding_conv); switch (arg) { case PARSE_POST: @@ -330,34 +516,91 @@ SAPI_API SAPI_TREAT_DATA_FUNC(php_default_treat_data) break; } + if (!input_conv) { + input_conv = ZEND_U_CONVERTER(UG(output_encoding_conv)); + } + var = php_strtok_r(res, separator, &strtok_buf); while (var) { - val = strchr(var, '='); - if (val) { /* have a value */ - int val_len; - unsigned int new_val_len; + int32_t var_len; + val = strchr(var, '='); + if (val) { *val++ = '\0'; - php_url_decode(var, strlen(var)); - val_len = php_url_decode(val, strlen(val)); - val = estrndup(val, val_len); - if (sapi_module.input_filter(arg, var, &val, val_len, &new_val_len TSRMLS_CC)) { - php_register_variable_safe(var, val, new_val_len, array_ptr TSRMLS_CC); + } + var_len = strlen(var); + php_url_decode(var, var_len); + + if (UG(unicode)) { + UChar *u_var, *u_val; + int32_t u_var_len, u_val_len; + UErrorCode status = U_ZERO_ERROR; + + zend_convert_to_unicode(input_conv, &u_var, &u_var_len, var, var_len, &status); + if (U_FAILURE(status)) { + /* UTODO set a user-accessible flag to indicate that conversion failed? */ + efree(u_var); + goto next_var; + } + + if (val) { /* have a value */ + int val_len; + unsigned int new_val_len; + + val_len = php_url_decode(val, strlen(val)); + zend_convert_to_unicode(input_conv, &u_val, &u_val_len, val, val_len, &status); + if (U_FAILURE(status)) { + /* UTODO set a user-accessible flag to indicate that conversion failed? */ + efree(u_var); + efree(u_val); + goto next_var; + } + php_u_register_variable_safe(u_var, u_val, u_val_len, array_ptr TSRMLS_CC); + /* UTODO need to make input_filter Unicode aware */ + /* + if (sapi_module.input_filter(arg, var, &val, val_len, &new_val_len TSRMLS_CC)) { + php_register_variable_safe(var, val, new_val_len, array_ptr TSRMLS_CC); + } + */ + efree(u_var); + efree(u_val); + } else { + u_val_len = 0; + u_val = USTR_MAKE(""); + php_u_register_variable_safe(u_var, u_val, u_val_len, array_ptr TSRMLS_CC); + /* + if (sapi_module.input_filter(arg, var, &val, val_len, &new_val_len TSRMLS_CC)) { + php_register_variable_safe(var, val, new_val_len, array_ptr TSRMLS_CC); + } + */ + efree(u_val); } - efree(val); } else { - int val_len; - unsigned int new_val_len; - - php_url_decode(var, strlen(var)); - val_len = 0; - val = estrndup("", val_len); - if (sapi_module.input_filter(arg, var, &val, val_len, &new_val_len TSRMLS_CC)) { - php_register_variable_safe(var, val, new_val_len, array_ptr TSRMLS_CC); + if (val) { /* have a value */ + int val_len; + unsigned int new_val_len; + + *val++ = '\0'; + val_len = php_url_decode(val, strlen(val)); + val = estrndup(val, val_len); + if (sapi_module.input_filter(arg, var, &val, val_len, &new_val_len TSRMLS_CC)) { + php_register_variable_safe(var, val, new_val_len, array_ptr TSRMLS_CC); + } + efree(val); + } else { + int val_len; + unsigned int new_val_len; + + val_len = 0; + val = estrndup("", val_len); + if (sapi_module.input_filter(arg, var, &val, val_len, &new_val_len TSRMLS_CC)) { + php_register_variable_safe(var, val, new_val_len, array_ptr TSRMLS_CC); + } + efree(val); } - efree(val); } +next_var: var = php_strtok_r(NULL, separator, &strtok_buf); } diff --git a/main/php_variables.h b/main/php_variables.h index d76fb7ad3e..232b00c5ba 100644 --- a/main/php_variables.h +++ b/main/php_variables.h @@ -38,6 +38,8 @@ PHPAPI void php_register_variable(char *var, char *val, pval *track_vars_array T /* binary-safe version */ PHPAPI void php_register_variable_safe(char *var, char *val, int val_len, pval *track_vars_array TSRMLS_DC); PHPAPI void php_register_variable_ex(char *var, zval *val, pval *track_vars_array TSRMLS_DC); +PHPAPI void php_u_register_variable_safe(UChar *var, UChar *strval, int32_t str_len, zval *track_vars_array TSRMLS_DC); +PHPAPI void php_u_register_variable_ex(UChar *var, zval *val, pval *track_vars_array TSRMLS_DC); int php_hash_environment(TSRMLS_D); END_EXTERN_C() diff --git a/main/rfc1867.c b/main/rfc1867.c index 6bc69ca04e..a0cb6e9998 100644 --- a/main/rfc1867.c +++ b/main/rfc1867.c @@ -203,6 +203,67 @@ static void normalize_protected_variable(char *varname TSRMLS_DC) } +static void normalize_u_protected_variable(UChar *varname TSRMLS_DC) +{ + UChar *s=varname, *index=NULL, *indexend=NULL, *p; + + /* overjump leading space */ + while (*s == 0x20 /*' '*/) { + s++; + } + + /* and remove it */ + if (s != varname) { + u_memmove(varname, s, u_strlen(s)+1); + } + + for (p=varname; *p && *p != 0x5b /*'['*/; p++) { + switch(*p) { + case 0x20: /*' '*/ + case 0x2e: /*'.'*/ + *p=0x5f; /*'_'*/ + break; + } + } + + /* find index */ + index = u_strchr(varname, 0x5b /*'['*/); + if (index) { + index++; + s=index; + } else { + return; + } + + /* done? */ + while (index) { + + while (*index == 0x20 /*' '*/ || + *index == 0x0d /*'\r'*/ || + *index == 0x0a /*'\n'*/ || + *index == 0x09 /*'\t'*/) { + index++; + } + indexend = u_strchr(index, 0x5d /*']'*/); + indexend = indexend ? indexend + 1 : index + u_strlen(index); + + if (s != index) { + u_memmove(s, index, u_strlen(index)+1); + s += indexend-index; + } else { + s = indexend; + } + + if (*s == 0x5b /*'['*/) { + s++; + index = s; + } else { + index = NULL; + } + } + *s++ = 0; +} + static void add_protected_variable(char *varname TSRMLS_DC) { int dummy=1; @@ -219,6 +280,13 @@ static zend_bool is_protected_variable(char *varname TSRMLS_DC) } +static zend_bool is_u_protected_variable(UChar *varname TSRMLS_DC) +{ + normalize_u_protected_variable(varname TSRMLS_CC); + return zend_u_hash_exists(&PG(rfc1867_protected_variables), IS_UNICODE, varname, u_strlen(varname)+1); +} + + static void safe_php_register_variable(char *var, char *strval, zval *track_vars_array, zend_bool override_protection TSRMLS_DC) { if (override_protection || !is_protected_variable(var TSRMLS_CC)) { @@ -235,6 +303,14 @@ static void safe_php_register_variable_ex(char *var, zval *val, zval *track_vars } +static void safe_u_php_register_variable(UChar *var, UChar *str_val, int32_t str_len, zval *track_vars_array, zend_bool override_protection TSRMLS_DC) +{ + if (override_protection || !is_u_protected_variable(var TSRMLS_CC)) { + php_u_register_variable_safe(var, str_val, str_len, track_vars_array TSRMLS_CC); + } +} + + static void register_http_post_files_variable(char *strvar, char *val, zval *http_post_files, zend_bool override_protection TSRMLS_DC) { int register_globals = PG(register_globals); @@ -270,6 +346,32 @@ void destroy_uploaded_files_hash(TSRMLS_D) } +static inline UChar *php_ap_to_unicode(char *in, int32_t in_len, int32_t *out_len TSRMLS_DC) +{ + UErrorCode status = U_ZERO_ERROR; + UChar *buf; + int32_t buf_len = 0; + UConverter *input_conv = UG(http_input_encoding_conv); + + if (!input_conv) { + input_conv = ZEND_U_CONVERTER(UG(output_encoding_conv)); + } + + input_conv = ZEND_U_CONVERTER(UG(output_encoding_conv)); + zend_convert_to_unicode(input_conv, &buf, &buf_len, in, in_len, &status); + if (U_SUCCESS(status)) { + if (out_len) + *out_len = buf_len; + return buf; + } else { + efree(buf); + if (out_len) + *out_len = 0; + return NULL; + } +} + + /* * Following code is based on apache_multipart_buffer.c from libapreq-0.33 package. * @@ -306,6 +408,7 @@ typedef struct { static int fill_buffer(multipart_buffer *self TSRMLS_DC) { int bytes_to_read, total_read = 0, actual_read = 0; + static zend_bool done = 0; /* shift the existing data if necessary */ if (self->bytes_in_buffer > 0 && self->buf_begin != self->buffer) { @@ -331,6 +434,10 @@ static int fill_buffer(multipart_buffer *self TSRMLS_DC) total_read += actual_read; bytes_to_read -= actual_read; } else { + if (!done) { + fprintf(stderr, "\n###################\n%s\n#################\n", self->buffer); + done = 1; + } break; } } @@ -544,6 +651,45 @@ static char *php_mime_get_hdr_value(zend_llist header, char *key) } +static UChar *php_u_ap_getword(UChar **line, UChar stop TSRMLS_DC) +{ + UChar *pos = *line, quote; + UChar *res; + + while (*pos && *pos != stop) { + + if ((quote = *pos) == '"' || quote == '\'') { + ++pos; + while (*pos && *pos != quote) { + if (*pos == '\\' && pos[1] && pos[1] == quote) { + pos += 2; + } else { + ++pos; + } + } + if (*pos) { + ++pos; + } + } else ++pos; + + } + if (*pos == '\0') { + res = eustrdup(*line); + *line += u_strlen(*line); + return res; + } + + res = eustrndup(*line, pos - *line); + + while (*pos == stop) { + ++pos; + } + + *line = pos; + return res; +} + + static char *php_ap_getword(char **line, char stop) { char *pos = *line, quote; @@ -583,6 +729,25 @@ static char *php_ap_getword(char **line, char stop) } +static UChar *substring_u_conf(UChar *start, int32_t len, UChar quote TSRMLS_DC) +{ + UChar *result = eumalloc(len + 2); + UChar *resp = result; + int32_t i; + + for (i = 0; i < len; ++i) { + if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) { + *resp++ = start[++i]; + } else { + *resp++ = start[i]; + } + } + + *resp++ = 0; + return result; +} + + static char *substring_conf(char *start, int len, char quote TSRMLS_DC) { char *result = emalloc(len + 2); @@ -614,6 +779,61 @@ static char *substring_conf(char *start, int len, char quote TSRMLS_DC) } +static UChar *php_u_ap_getword_conf(UChar **line TSRMLS_DC) +{ + UChar *str = *line, *strend, *res, quote; + + while (*str && u_isspace(*str)) { + ++str; + } + + if (!*str) { + *line = str; + return USTR_MAKE(""); + } + + if ((quote = *str) == '"' || quote == '\'') { + strend = str + 1; +look_for_quote: + while (*strend && *strend != quote) { + if (*strend == '\\' && strend[1] && strend[1] == quote) { + strend += 2; + } else { + ++strend; + } + } + if (*strend && *strend == quote) { + UChar p = *(strend + 1); + if (p != '\r' && p != '\n' && p != '\0') { + strend++; + goto look_for_quote; + } + } + + res = substring_u_conf(str + 1, strend - str - 1, quote TSRMLS_CC); + + if (*strend == quote) { + ++strend; + } + + } else { + + strend = str; + while (*strend && !u_isspace(*strend)) { + ++strend; + } + res = substring_u_conf(str, strend - str, 0 TSRMLS_CC); + } + + while (*strend && u_isspace(*strend)) { + ++strend; + } + + *line = strend; + return res; +} + + static char *php_ap_getword_conf(char **line TSRMLS_DC) { char *str = *line, *strend, *res, quote; @@ -769,13 +989,515 @@ static char *multipart_buffer_read_body(multipart_buffer *self TSRMLS_DC) return out; } +static SAPI_POST_HANDLER_FUNC(rfc1867_post_handler_unicode) +{ + char *boundary, *s=NULL, *boundary_end = NULL, *start_arr=NULL, *array_index=NULL; + char *temp_filename=NULL, *lbuf=NULL, *abuf=NULL; + int boundary_len=0, total_bytes=0, cancel_upload=0, is_arr_upload=0, array_len=0; + int max_file_size=0, skip_upload=0, anonindex=0, is_anonymous; + zval *http_post_files=NULL; HashTable *uploaded_files=NULL; +#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING) + int str_len = 0, num_vars = 0, num_vars_max = 2*10, *len_list = NULL; + char **val_list = NULL; +#endif + zend_bool magic_quotes_gpc; + multipart_buffer *mbuff; + zval *array_ptr = (zval *) arg; + FILE *fp; + zend_llist header; + UConverter *input_conv = UG(http_input_encoding_conv); + U_STRING_DECL(name_key, "name", 4); + U_STRING_DECL(filename_key, "filename", 8); + U_STRING_DECL(maxfilesize_key, "MAX_FILE_SIZE", 13); + static zend_bool did_string_init = FALSE; -/* - * The combined READER/HANDLER - * - */ + if (SG(request_info).content_length > SG(post_max_size)) { + sapi_module.sapi_error(E_WARNING, "POST Content-Length of %ld bytes exceeds the limit of %ld bytes", SG(request_info).content_length, SG(post_max_size)); + return; + } -SAPI_API SAPI_POST_HANDLER_FUNC(rfc1867_post_handler) + /* Get the boundary */ + boundary = strstr(content_type_dup, "boundary"); + if (!boundary || !(boundary=strchr(boundary, '='))) { + sapi_module.sapi_error(E_WARNING, "Missing boundary in multipart/form-data POST data"); + return; + } + + boundary++; + boundary_len = strlen(boundary); + + if (boundary[0] == '"') { + boundary++; + boundary_end = strchr(boundary, '"'); + if (!boundary_end) { + sapi_module.sapi_error(E_WARNING, "Invalid boundary in multipart/form-data POST data"); + return; + } + } else { + /* search for the end of the boundary */ + boundary_end = strchr(boundary, ','); + } + if (boundary_end) { + boundary_end[0] = '\0'; + boundary_len = boundary_end-boundary; + } + + /* Initialize the buffer */ + if (!(mbuff = multipart_buffer_new(boundary, boundary_len))) { + sapi_module.sapi_error(E_WARNING, "Unable to initialize the input buffer"); + return; + } + + /* Initialize $_FILES[] */ + zend_u_hash_init(&PG(rfc1867_protected_variables), 5, NULL, NULL, 0, 1); + + ALLOC_HASHTABLE(uploaded_files); + zend_u_hash_init(uploaded_files, 5, NULL, (dtor_func_t) free_estring, 0, 1); + SG(rfc1867_uploaded_files) = uploaded_files; + + ALLOC_ZVAL(http_post_files); + array_init(http_post_files); + INIT_PZVAL(http_post_files); + PG(http_globals)[TRACK_VARS_FILES] = http_post_files; + +#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING) + if (php_mb_encoding_translation(TSRMLS_C)) { + val_list = (char **)ecalloc(num_vars_max+2, sizeof(char *)); + len_list = (int *)ecalloc(num_vars_max+2, sizeof(int)); + } +#endif + zend_llist_init(&header, sizeof(mime_header_entry), (llist_dtor_func_t) php_free_hdr_entry, 0); + + if (!did_string_init) { + U_STRING_INIT(name_key, "name", 4); + U_STRING_INIT(filename_key, "filename", 8); + U_STRING_INIT(maxfilesize_key, "MAX_FILE_SIZE", 13); + did_string_init = TRUE; + } + + if (!input_conv) { + input_conv = ZEND_U_CONVERTER(UG(output_encoding_conv)); + } + + while (!multipart_buffer_eof(mbuff TSRMLS_CC)) + { + char buff[FILLUNIT]; + char *cd=NULL, *tmp=NULL; + int blen=0, wlen=0; + UChar *param = NULL, *filename = NULL; + int32_t param_len; + + zend_llist_clean(&header); + + if (!multipart_buffer_headers(mbuff, &header TSRMLS_CC)) { + SAFE_RETURN; + } + + if ((cd = php_mime_get_hdr_value(header, "Content-Disposition"))) { + UChar *pair = NULL; + UChar *ucd = NULL, *ucd_start = NULL; + int end=0; + + while (isspace(*cd)) { + ++cd; + } + + ucd_start = php_ap_to_unicode(cd, strlen(cd), NULL TSRMLS_CC); + if (!ucd) { + /* UTODO error condition */ + } + ucd = ucd_start; + + while (*ucd && (pair = php_u_ap_getword(&ucd, ';' TSRMLS_CC))) + { + UChar *key=NULL, *word = pair; + + while (u_isspace(*ucd)) { + ++ucd; + } + + if (u_strchr(pair, '=')) { + key = php_u_ap_getword(&pair, '=' TSRMLS_CC); + + if (!u_strcasecmp(key, name_key, 0)) { + if (param) { + efree(param); + } + param = php_u_ap_getword_conf(&pair TSRMLS_CC); + } else if (!u_strcasecmp(key, filename_key, 0)) { + if (filename) { + efree(filename); + } + filename = php_u_ap_getword_conf(&pair TSRMLS_CC); + } + } + if (key) { + efree(key); + } + efree(word); + } + + efree(ucd_start); + + /* Normal form variable, safe to read all data into memory */ + if (!filename && param) { + UChar *u_val; + int32_t u_val_len; + UErrorCode status = U_ZERO_ERROR; + + char *value = multipart_buffer_read_body(mbuff TSRMLS_CC); + unsigned int new_val_len; /* Dummy variable */ + + if (value) { + /* UTODO use 'charset' parameter for conversion */ + zend_convert_to_unicode(input_conv, &u_val, &u_val_len, value, strlen(value), &status); + if (U_FAILURE(status)) { + /* UTODO set a user-accessible flag to indicate that conversion failed? */ + goto var_done; + } + } else { + u_val = USTR_MAKE(""); + } + + /* UTODO use input filtering */ + //if (sapi_module.input_filter(PARSE_POST, param, &value, strlen(value), &new_val_len TSRMLS_CC)) { + safe_u_php_register_variable(param, u_val, u_val_len, array_ptr, 0 TSRMLS_CC); + //} + if (!u_strcasecmp(param, maxfilesize_key, 0)) { + max_file_size = zend_u_strtol(u_val, NULL, 10); + } + +var_done: + efree(param); + efree(value); + efree(u_val); + continue; + } + + /* If file_uploads=off, skip the file part */ + if (!PG(file_uploads)) { + skip_upload = 1; + } + + /* Return with an error if the posted data is garbled */ + if (!param && !filename) { + sapi_module.sapi_error(E_WARNING, "File Upload Mime headers garbled"); + SAFE_RETURN; + } + + if (!param) { + is_anonymous = 1; + param = eumalloc(MAX_SIZE_ANONNAME); + u_snprintf(param, MAX_SIZE_ANONNAME, "%u", anonindex++); + } else { + is_anonymous = 0; + } + param_len = u_strlen(param); + + /* New Rule: never repair potential malicious user input */ + if (!skip_upload) { + UChar32 c = 0; + int32_t ic; + long l = 0; + + for (ic = 0; ic < param_len; ) { + U16_NEXT(param, ic, param_len, c); + if (c == 0x5b /*'['*/) { + l++; + } else if (c == 0x5d /*']'*/) { + l--; + U16_NEXT(param, ic, param_len, c); + if (ic < param_len && c != 0x5b /*'['*/) { + skip_upload = 1; + break; + } else { + /* decrement index so that the same character is retrieved again */ + ic--; + } + } + if (l < 0) { + skip_upload = 1; + break; + } + } + } + + total_bytes = cancel_upload = 0; + + if (!skip_upload) { + /* Handle file */ + fp = php_open_temporary_file(PG(upload_tmp_dir), "php", &temp_filename TSRMLS_CC); + if (!fp) { + sapi_module.sapi_error(E_WARNING, "File upload error - unable to create a temporary file"); + cancel_upload = UPLOAD_ERROR_E; + } + } + if (skip_upload) { + efree(param); + efree(filename); + continue; + } + + if(u_strlen(filename) == 0) { +#if DEBUG_FILE_UPLOAD + sapi_module.sapi_error(E_NOTICE, "No file uploaded"); +#endif + cancel_upload = UPLOAD_ERROR_D; + } + + end = 0; + while (!cancel_upload && (blen = multipart_buffer_read(mbuff, buff, sizeof(buff), &end TSRMLS_CC))) + { + if (PG(upload_max_filesize) > 0 && total_bytes > PG(upload_max_filesize)) { +#if DEBUG_FILE_UPLOAD + sapi_module.sapi_error(E_NOTICE, "upload_max_filesize of %ld bytes exceeded - file [%s=%s] not saved", PG(upload_max_filesize), param, filename); +#endif + cancel_upload = UPLOAD_ERROR_A; + } else if (max_file_size && (total_bytes > max_file_size)) { +#if DEBUG_FILE_UPLOAD + sapi_module.sapi_error(E_NOTICE, "MAX_FILE_SIZE of %ld bytes exceeded - file [%s=%s] not saved", max_file_size, param, filename); +#endif + cancel_upload = UPLOAD_ERROR_B; + } else if (blen > 0) { + wlen = fwrite(buff, 1, blen, fp); + + if (wlen < blen) { +#if DEBUG_FILE_UPLOAD + sapi_module.sapi_error(E_NOTICE, "Only %d bytes were written, expected to write %d", wlen, blen); +#endif + cancel_upload = UPLOAD_ERROR_C; + } else { + total_bytes += wlen; + } + } + } + if (fp) { /* may not be initialized if file could not be created */ + fclose(fp); + } + if (!cancel_upload && !end) { +#if DEBUG_FILE_UPLOAD + sapi_module.sapi_error(E_NOTICE, "Missing mime boundary at the end of the data for file %s", strlen(filename) > 0 ? filename : ""); +#endif + cancel_upload = UPLOAD_ERROR_C; + } +#if DEBUG_FILE_UPLOAD + if(strlen(filename) > 0 && total_bytes == 0 && !cancel_upload) { + sapi_module.sapi_error(E_WARNING, "Uploaded file size 0 - file [%s=%s] not saved", param, filename); + cancel_upload = 5; + } +#endif + + if (cancel_upload) { + if (temp_filename) { + if (cancel_upload != UPLOAD_ERROR_E) { /* file creation failed */ + unlink(temp_filename); + } + efree(temp_filename); + } + temp_filename=""; + } else { + zend_hash_add(SG(rfc1867_uploaded_files), temp_filename, strlen(temp_filename) + 1, &temp_filename, sizeof(char *), NULL); + } + + /* is_arr_upload is true when name of file upload field + * ends in [.*] + * start_arr is set to point to 1st [ + */ + is_arr_upload = (start_arr = strchr(param,'[')) && (param[strlen(param)-1] == ']'); + + if (is_arr_upload) { + array_len = strlen(start_arr); + if (array_index) { + efree(array_index); + } + array_index = estrndup(start_arr+1, array_len-2); + } + + /* Add $foo_name */ + if (lbuf) { + efree(lbuf); + } + lbuf = (char *) emalloc(strlen(param) + MAX_SIZE_OF_INDEX + 1); + + if (is_arr_upload) { + if (abuf) efree(abuf); + abuf = estrndup(param, strlen(param)-array_len); + sprintf(lbuf, "%s_name[%s]", abuf, array_index); + } else { + sprintf(lbuf, "%s_name", param); + } + +#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING) + if (php_mb_encoding_translation(TSRMLS_C)) { + if (num_vars>=num_vars_max){ + php_mb_gpc_realloc_buffer(&val_list, &len_list, &num_vars_max, + 1 TSRMLS_CC); + } + val_list[num_vars] = filename; + len_list[num_vars] = strlen(filename); + num_vars++; + if(php_mb_gpc_encoding_detector(val_list, len_list, num_vars, NULL TSRMLS_CC) == SUCCESS) { + str_len = strlen(filename); + php_mb_gpc_encoding_converter(&filename, &str_len, 1, NULL, NULL TSRMLS_CC); + } + s = php_mb_strrchr(filename, '\\' TSRMLS_CC); + if ((tmp = php_mb_strrchr(filename, '/' TSRMLS_CC)) > s) { + s = tmp; + } + num_vars--; + goto filedone; + } +#endif + /* The \ check should technically be needed for win32 systems only where + * it is a valid path separator. However, IE in all it's wisdom always sends + * the full path of the file on the user's filesystem, which means that unless + * the user does basename() they get a bogus file name. Until IE's user base drops + * to nill or problem is fixed this code must remain enabled for all systems. + */ + s = strrchr(filename, '\\'); + if ((tmp = strrchr(filename, '/')) > s) { + s = tmp; + } +#ifdef PHP_WIN32 + if (PG(magic_quotes_gpc)) { + s = s ? s : filename; + tmp = strrchr(s, '\''); + s = tmp > s ? tmp : s; + tmp = strrchr(s, '"'); + s = tmp > s ? tmp : s; + } +#endif + +#if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING) +filedone: +#endif + + if (!is_anonymous) { + if (s && s > filename) { + safe_php_register_variable(lbuf, s+1, NULL, 0 TSRMLS_CC); + } else { + safe_php_register_variable(lbuf, filename, NULL, 0 TSRMLS_CC); + } + } + + /* Add $foo[name] */ + if (is_arr_upload) { + sprintf(lbuf, "%s[name][%s]", abuf, array_index); + } else { + sprintf(lbuf, "%s[name]", param); + } + if (s && s > filename) { + register_http_post_files_variable(lbuf, s+1, http_post_files, 0 TSRMLS_CC); + } else { + register_http_post_files_variable(lbuf, filename, http_post_files, 0 TSRMLS_CC); + } + efree(filename); + s = NULL; + + /* Possible Content-Type: */ + if (cancel_upload || !(cd = php_mime_get_hdr_value(header, "Content-Type"))) { + cd = ""; + } else { + /* fix for Opera 6.01 */ + s = strchr(cd, ';'); + if (s != NULL) { + *s = '\0'; + } + } + + /* Add $foo_type */ + if (is_arr_upload) { + sprintf(lbuf, "%s_type[%s]", abuf, array_index); + } else { + sprintf(lbuf, "%s_type", param); + } + if (!is_anonymous) { + safe_php_register_variable(lbuf, cd, NULL, 0 TSRMLS_CC); + } + + /* Add $foo[type] */ + if (is_arr_upload) { + sprintf(lbuf, "%s[type][%s]", abuf, array_index); + } else { + sprintf(lbuf, "%s[type]", param); + } + register_http_post_files_variable(lbuf, cd, http_post_files, 0 TSRMLS_CC); + + /* Restore Content-Type Header */ + if (s != NULL) { + *s = ';'; + } + s = ""; + + /* Initialize variables */ + add_protected_variable(param TSRMLS_CC); + + magic_quotes_gpc = PG(magic_quotes_gpc); + PG(magic_quotes_gpc) = 0; + /* if param is of form xxx[.*] this will cut it to xxx */ + if (!is_anonymous) { + safe_php_register_variable(param, temp_filename, NULL, 1 TSRMLS_CC); + } + + /* Add $foo[tmp_name] */ + if (is_arr_upload) { + sprintf(lbuf, "%s[tmp_name][%s]", abuf, array_index); + } else { + sprintf(lbuf, "%s[tmp_name]", param); + } + add_protected_variable(lbuf TSRMLS_CC); + register_http_post_files_variable(lbuf, temp_filename, http_post_files, 1 TSRMLS_CC); + + PG(magic_quotes_gpc) = magic_quotes_gpc; + + { + zval file_size, error_type; + + error_type.value.lval = cancel_upload; + error_type.type = IS_LONG; + + /* Add $foo[error] */ + if (cancel_upload) { + file_size.value.lval = 0; + file_size.type = IS_LONG; + } else { + file_size.value.lval = total_bytes; + file_size.type = IS_LONG; + } + + if (is_arr_upload) { + sprintf(lbuf, "%s[error][%s]", abuf, array_index); + } else { + sprintf(lbuf, "%s[error]", param); + } + register_http_post_files_variable_ex(lbuf, &error_type, http_post_files, 0 TSRMLS_CC); + + /* Add $foo_size */ + if (is_arr_upload) { + sprintf(lbuf, "%s_size[%s]", abuf, array_index); + } else { + sprintf(lbuf, "%s_size", param); + } + if (!is_anonymous) { + safe_php_register_variable_ex(lbuf, &file_size, NULL, 0 TSRMLS_CC); + } + + /* Add $foo[size] */ + if (is_arr_upload) { + sprintf(lbuf, "%s[size][%s]", abuf, array_index); + } else { + sprintf(lbuf, "%s[size]", param); + } + register_http_post_files_variable_ex(lbuf, &file_size, http_post_files, 0 TSRMLS_CC); + } + efree(param); + } + } + + SAFE_RETURN; +} + +static SAPI_POST_HANDLER_FUNC(rfc1867_post_handler_legacy) { char *boundary, *s=NULL, *boundary_end = NULL, *start_arr=NULL, *array_index=NULL; char *temp_filename=NULL, *lbuf=NULL, *abuf=NULL; @@ -879,7 +1601,7 @@ SAPI_API SAPI_POST_HANDLER_FUNC(rfc1867_post_handler) if (strchr(pair, '=')) { key = php_ap_getword(&pair, '='); - + if (!strcasecmp(key, "name")) { if (param) { efree(param); @@ -939,7 +1661,7 @@ SAPI_API SAPI_POST_HANDLER_FUNC(rfc1867_post_handler) sapi_module.sapi_error(E_WARNING, "File Upload Mime headers garbled"); SAFE_RETURN; } - + if (!param) { is_anonymous = 1; param = emalloc(MAX_SIZE_ANONNAME); @@ -947,12 +1669,12 @@ SAPI_API SAPI_POST_HANDLER_FUNC(rfc1867_post_handler) } else { is_anonymous = 0; } - + /* New Rule: never repair potential malicious user input */ if (!skip_upload) { char *tmp = param; long c = 0; - + while (*tmp) { if (*tmp == '[') { c++; @@ -1061,13 +1783,13 @@ SAPI_API SAPI_POST_HANDLER_FUNC(rfc1867_post_handler) } array_index = estrndup(start_arr+1, array_len-2); } - + /* Add $foo_name */ if (lbuf) { efree(lbuf); } lbuf = (char *) emalloc(strlen(param) + MAX_SIZE_OF_INDEX + 1); - + if (is_arr_upload) { if (abuf) efree(abuf); abuf = estrndup(param, strlen(param)-array_len); @@ -1120,7 +1842,7 @@ SAPI_API SAPI_POST_HANDLER_FUNC(rfc1867_post_handler) #if HAVE_MBSTRING && !defined(COMPILE_DL_MBSTRING) filedone: #endif - + if (!is_anonymous) { if (s && s > filename) { safe_php_register_variable(lbuf, s+1, NULL, 0 TSRMLS_CC); @@ -1142,7 +1864,7 @@ filedone: } efree(filename); s = NULL; - + /* Possible Content-Type: */ if (cancel_upload || !(cd = php_mime_get_hdr_value(header, "Content-Type"))) { cd = ""; @@ -1187,7 +1909,7 @@ filedone: if (!is_anonymous) { safe_php_register_variable(param, temp_filename, NULL, 1 TSRMLS_CC); } - + /* Add $foo[tmp_name] */ if (is_arr_upload) { sprintf(lbuf, "%s[tmp_name][%s]", abuf, array_index); @@ -1213,7 +1935,7 @@ filedone: file_size.value.lval = total_bytes; file_size.type = IS_LONG; } - + if (is_arr_upload) { sprintf(lbuf, "%s[error][%s]", abuf, array_index); } else { @@ -1247,6 +1969,20 @@ filedone: } /* + * The combined READER/HANDLER + * + */ + +SAPI_API SAPI_POST_HANDLER_FUNC(rfc1867_post_handler) +{ + if (UG(unicode)) { + rfc1867_post_handler_unicode(content_type_dup, arg TSRMLS_CC); + } else { + rfc1867_post_handler_legacy(content_type_dup, arg TSRMLS_CC); + } +} + +/* * Local variables: * tab-width: 4 * c-basic-offset: 4 diff --git a/main/spprintf.c b/main/spprintf.c index d671aa89d9..0af4257cad 100644 --- a/main/spprintf.c +++ b/main/spprintf.c @@ -178,8 +178,10 @@ static void xbuf_format_converter(smart_str *xbuf, const char *fmt, va_list ap) { register char *s = NULL; + register UChar *u = NULL; char *q; int s_len; + int32_t u_len; register int min_width = 0; int precision = 0; @@ -195,6 +197,7 @@ static void xbuf_format_converter(smart_str *xbuf, const char *fmt, va_list ap) char num_buf[NUM_BUF_SIZE]; char char_buf[2]; /* for printing %% and %<unknown> */ + zend_bool free_s; /* free string if allocated here */ /* * Flag variables @@ -207,6 +210,8 @@ static void xbuf_format_converter(smart_str *xbuf, const char *fmt, va_list ap) boolean_e adjust_width; bool_int is_negative; + TSRMLS_FETCH(); + while (*fmt) { if (*fmt != '%') { INS_CHAR(xbuf, *fmt); @@ -218,6 +223,7 @@ static void xbuf_format_converter(smart_str *xbuf, const char *fmt, va_list ap) alternate_form = print_sign = print_blank = NO; pad_char = ' '; prefix_char = NUL; + free_s = 0; fmt++; @@ -511,8 +517,58 @@ static void xbuf_format_converter(smart_str *xbuf, const char *fmt, va_list ap) } break; + case 'v': + if (UG(unicode)) { + goto fmt_unicode; + } else { + goto fmt_string; + } + break; + + case 'R': + { + int type = va_arg(ap, int); + if (type != IS_UNICODE) { + if (alternate_form) { + va_arg(ap, UConverter *); + } + goto fmt_string; + } + } + /* break omitted */ + + case 'r': +fmt_unicode: + { + UConverter *conv = ZEND_U_CONVERTER(UG(output_encoding_conv)); + UErrorCode status = U_ZERO_ERROR; + char *res = NULL; + + if (alternate_form) { + conv = va_arg(ap, UConverter *); + } + + u = va_arg(ap, UChar *); + if (u == NULL) { + s = S_NULL; + s_len = S_NULL_LEN; + break; + } + + u_len = u_strlen(u); + zend_convert_from_unicode(conv, &res, &s_len, u, u_len, &status); + if (U_FAILURE(status)) { + php_error(E_WARNING, "Could not convert Unicode to printable form in s[np]printf call"); + return; + } + s = res; + free_s = 1; + pad_char = ' '; + break; + } case 's': +fmt_string: s = va_arg(ap, char *); if (s != NULL) { s_len = strlen(s); @@ -525,7 +581,6 @@ static void xbuf_format_converter(smart_str *xbuf, const char *fmt, va_list ap) pad_char = ' '; break; - case 'f': case 'e': case 'E': @@ -705,6 +760,7 @@ fmt_error: * Print the string s. */ INS_STRING(xbuf, s, s_len); + if (free_s) efree(s); if (adjust_width && adjust == LEFT && min_width > s_len) PAD(xbuf, min_width - s_len, pad_char); diff --git a/main/spprintf.h b/main/spprintf.h index 8762451d32..d578331670 100644 --- a/main/spprintf.h +++ b/main/spprintf.h @@ -37,9 +37,9 @@ There is also snprintf: See difference explained in snprintf.h #include "snprintf.h" BEGIN_EXTERN_C() -PHPAPI int spprintf( char **pbuf, size_t max_len, const char *format, ...) PHP_ATTRIBUTE_FORMAT(printf, 3, 4); +PHPAPI int spprintf( char **pbuf, size_t max_len, const char *format, ...); -PHPAPI int vspprintf(char **pbuf, size_t max_len, const char *format, va_list ap) PHP_ATTRIBUTE_FORMAT(printf, 3, 0); +PHPAPI int vspprintf(char **pbuf, size_t max_len, const char *format, va_list ap); END_EXTERN_C() #endif /* SNPRINTF_H */ diff --git a/main/streams/cast.c b/main/streams/cast.c index 1bdb78baa1..6297b6269e 100644 --- a/main/streams/cast.c +++ b/main/streams/cast.c @@ -150,7 +150,8 @@ PHPAPI int _php_stream_cast(php_stream *stream, int castas, void **ret, int show off_t dummy; stream->ops->seek(stream, stream->position, SEEK_SET, &dummy TSRMLS_CC); - stream->readpos = stream->writepos = 0; + + php_stream_flush_readbuf(stream); } } @@ -258,7 +259,7 @@ PHPAPI int _php_stream_cast(php_stream *stream, int castas, void **ret, int show exit_success: - if ((stream->writepos - stream->readpos) > 0 && + if ((stream->readbuf_avail) > 0 && stream->fclose_stdiocast != PHP_STREAM_FCLOSE_FOPENCOOKIE && (flags & PHP_STREAM_CAST_INTERNAL) == 0) { /* the data we have buffered will be lost to the third party library that @@ -267,7 +268,7 @@ exit_success: php_error_docref(NULL TSRMLS_CC, E_WARNING, "%ld bytes of buffered data lost during stream conversion!", - (long)(stream->writepos - stream->readpos)); + stream->readbuf_avail); } if (castas == PHP_STREAM_AS_STDIO && ret) diff --git a/main/streams/filter.c b/main/streams/filter.c index 6cb785f717..c45a1ac0fe 100644 --- a/main/streams/filter.c +++ b/main/streams/filter.c @@ -85,21 +85,59 @@ PHPAPI php_stream_bucket *php_stream_bucket_new(php_stream *stream, char *buf, s if (is_persistent && !buf_persistent) { /* all data in a persistent bucket must also be persistent */ - bucket->buf = pemalloc(buflen, 1); + bucket->buf.str.val = pemalloc(buflen, 1); - if (bucket->buf == NULL) { + if (bucket->buf.str.val == NULL) { pefree(bucket, 1); return NULL; } - memcpy(bucket->buf, buf, buflen); - bucket->buflen = buflen; + memcpy(bucket->buf.str.val, buf, buflen); + bucket->buf.str.len = buflen; bucket->own_buf = 1; } else { - bucket->buf = buf; - bucket->buflen = buflen; + bucket->buf.str.val = buf; + bucket->buf.str.len = buflen; bucket->own_buf = own_buf; } + bucket->is_unicode = 0; + bucket->is_persistent = is_persistent; + bucket->refcount = 1; + + return bucket; +} + +PHPAPI php_stream_bucket *php_stream_bucket_new_unicode(php_stream *stream, UChar *buf, int32_t buflen, int own_buf, int buf_persistent TSRMLS_DC) +{ + int is_persistent = php_stream_is_persistent(stream); + php_stream_bucket *bucket; + + bucket = (php_stream_bucket*)pemalloc(sizeof(php_stream_bucket), is_persistent); + + if (bucket == NULL) { + return NULL; + } + + bucket->next = bucket->prev = NULL; + + if (is_persistent && !buf_persistent) { + /* all data in a persistent bucket must also be persistent */ + bucket->buf.ustr.val = safe_pemalloc(sizeof(UChar), buflen, 0, 1); + + if (bucket->buf.ustr.val == NULL) { + pefree(bucket, 1); + return NULL; + } + + memcpy(bucket->buf.ustr.val, buf, buflen); + bucket->buf.ustr.len = buflen; + bucket->own_buf = 1; + } else { + bucket->buf.ustr.val = buf; + bucket->buf.ustr.len = buflen; + bucket->own_buf = own_buf; + } + bucket->is_unicode = 1; bucket->is_persistent = is_persistent; bucket->refcount = 1; @@ -126,8 +164,13 @@ PHPAPI php_stream_bucket *php_stream_bucket_make_writeable(php_stream_bucket *bu retval = (php_stream_bucket*)pemalloc(sizeof(php_stream_bucket), bucket->is_persistent); memcpy(retval, bucket, sizeof(*retval)); - retval->buf = pemalloc(retval->buflen, retval->is_persistent); - memcpy(retval->buf, bucket->buf, retval->buflen); + if (bucket->is_unicode) { + retval->buf.ustr.val = safe_pemalloc(sizeof(UChar), retval->buf.ustr.len, 0, retval->is_persistent); + memcpy(retval->buf.ustr.val, bucket->buf.ustr.val, retval->buf.ustr.len * sizeof(UChar)); + } else { + retval->buf.str.val = pemalloc(retval->buf.str.len, retval->is_persistent); + memcpy(retval->buf.str.val, bucket->buf.str.val, retval->buf.str.len); + } retval->refcount = 1; retval->own_buf = 1; @@ -146,32 +189,58 @@ PHPAPI int php_stream_bucket_split(php_stream_bucket *in, php_stream_bucket **le goto exit_fail; } - (*left)->buf = pemalloc(length, in->is_persistent); - (*left)->buflen = length; - memcpy((*left)->buf, in->buf, length); + if (in->is_unicode) { + (*left)->buf.ustr.val = safe_pemalloc(sizeof(UChar), length, 0, in->is_persistent); + (*left)->buf.ustr.len = length; + memcpy((*left)->buf.str.val, in->buf.str.val, length * sizeof(UChar)); + + (*right)->buf.ustr.len = in->buf.ustr.len - length; + (*right)->buf.ustr.val = pemalloc((*right)->buf.ustr.len, in->is_persistent); + memcpy((*right)->buf.ustr.val, in->buf.ustr.val + (length * sizeof(UChar)), (*right)->buf.str.len * sizeof(UChar)); + } else { + (*left)->buf.str.val = pemalloc(length, in->is_persistent); + (*left)->buf.str.len = length; + memcpy((*left)->buf.str.val, in->buf.str.val, length); + + (*right)->buf.str.len = in->buf.str.len - length; + (*right)->buf.str.val = pemalloc((*right)->buf.str.len, in->is_persistent); + memcpy((*right)->buf.str.val, in->buf.str.val + length, (*right)->buf.str.len); + } + (*left)->refcount = 1; (*left)->own_buf = 1; (*left)->is_persistent = in->is_persistent; - - (*right)->buflen = in->buflen - length; - (*right)->buf = pemalloc((*right)->buflen, in->is_persistent); - memcpy((*right)->buf, in->buf + length, (*right)->buflen); + (*left)->is_unicode = in->is_unicode; + (*right)->refcount = 1; (*right)->own_buf = 1; (*right)->is_persistent = in->is_persistent; + (*right)->is_unicode = in->is_unicode; return SUCCESS; exit_fail: if (*right) { - if ((*right)->buf) { - pefree((*right)->buf, in->is_persistent); + if ((*right)->is_unicode) { + if ((*right)->buf.ustr.val) { + pefree((*right)->buf.ustr.val, in->is_persistent); + } + } else { + if ((*right)->buf.str.val) { + pefree((*right)->buf.str.val, in->is_persistent); + } } pefree(*right, in->is_persistent); } if (*left) { - if ((*left)->buf) { - pefree((*left)->buf, in->is_persistent); + if ((*left)->is_unicode) { + if ((*left)->buf.ustr.val) { + pefree((*left)->buf.ustr.val, in->is_persistent); + } + } else { + if ((*left)->buf.str.val) { + pefree((*left)->buf.str.val, in->is_persistent); + } } pefree(*left, in->is_persistent); } @@ -182,7 +251,7 @@ PHPAPI void php_stream_bucket_delref(php_stream_bucket *bucket TSRMLS_DC) { if (--bucket->refcount == 0) { if (bucket->own_buf) { - pefree(bucket->buf, bucket->is_persistent); + pefree(bucket->is_unicode ? bucket->buf.ustr.val : bucket->buf.str.val, bucket->is_persistent); } pefree(bucket, bucket->is_persistent); } @@ -335,27 +404,30 @@ PHPAPI void _php_stream_filter_append(php_stream_filter_chain *chain, php_stream chain->tail = filter; filter->chain = chain; - if (&(stream->readfilters) == chain && (stream->writepos - stream->readpos) > 0) { + if (&(stream->readfilters) == chain && (stream->readbuf_avail) > 0) { /* Let's going ahead and wind anything in the buffer through this filter */ - php_stream_bucket_brigade brig_in = { NULL, NULL }, brig_out = { NULL, NULL }; - php_stream_bucket_brigade *brig_inp = &brig_in, *brig_outp = &brig_out; + php_stream_bucket_brigade brig_out = { NULL, NULL }; + php_stream_bucket_brigade *brig_outp = &brig_out; php_stream_filter_status_t status; php_stream_bucket *bucket; - size_t consumed = 0; - bucket = php_stream_bucket_new(stream, stream->readbuf + stream->readpos, stream->writepos - stream->readpos, 0, 0 TSRMLS_CC); - php_stream_bucket_append(brig_inp, bucket TSRMLS_CC); - status = filter->fops->filter(stream, filter, brig_inp, brig_outp, &consumed, PSFS_FLAG_NORMAL TSRMLS_CC); - - if (stream->readpos + consumed > stream->writepos || consumed < 0) { - /* No behaving filter should cause this. */ - status = PSFS_ERR_FATAL; + if (stream->readbuf_ofs) { + /* Mask readbuf_ofs from filter */ + bucket = stream->readbuf.head; + if (bucket->is_unicode) { + bucket->buf.ustr.len -= stream->readbuf_ofs; + memmove(bucket->buf.ustr.val, bucket->buf.ustr.val + (stream->readbuf_ofs * sizeof(UChar)), bucket->buf.ustr.len * sizeof(UChar)); + } else { + bucket->buf.str.len -= stream->readbuf_ofs; + memmove(bucket->buf.str.val, bucket->buf.str.val + stream->readbuf_ofs, bucket->buf.str.len); + } } + status = filter->fops->filter(stream, filter, &stream->readbuf, brig_outp, NULL, PSFS_FLAG_NORMAL TSRMLS_CC); + switch (status) { case PSFS_ERR_FATAL: - /* If this first cycle simply fails then there's something wrong with the filter. - Pull the filter off the chain and leave the read buffer alone. */ + /* filter is fundamentally broken, invalidate readbuf and strip the filter */ if (chain->head == filter) { chain->head = NULL; chain->tail = NULL; @@ -363,46 +435,87 @@ PHPAPI void _php_stream_filter_append(php_stream_filter_chain *chain, php_stream filter->prev->next = NULL; chain->tail = filter->prev; } - php_stream_bucket_unlink(bucket TSRMLS_CC); - php_stream_bucket_delref(bucket TSRMLS_CC); - php_error_docref(NULL TSRMLS_CC, E_WARNING, "Filter failed to process pre-buffered data. Not adding to filterchain."); + php_stream_flush_readbuf(stream); + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Filter failed to process pre-buffered data"); + /* Passthru -- Anything successfully filtered can go back on the readbuf */ + case PSFS_PASS_ON: + stream->readbuf_ofs = stream->readbuf_avail = 0; + + /* Merge brig_out */ + while((bucket = brig_out.head)) { + php_stream_bucket_unlink(bucket TSRMLS_CC); + php_stream_bucket_append(&stream->readbuf, bucket TSRMLS_CC); + } + + /* Count available bytes */ + for(bucket = stream->readbuf.head; bucket; bucket = bucket->next) { + stream->readbuf_avail += bucket->is_unicode ? bucket->buf.ustr.len : bucket->buf.str.len; + } break; case PSFS_FEED_ME: /* We don't actually need data yet, leave this filter in a feed me state until data is needed. Reset stream's internal read buffer since the filter is "holding" it. */ - stream->readpos = 0; - stream->writepos = 0; + stream->readbuf.head = stream->readbuf.tail = NULL; + stream->readbuf_avail = stream->readbuf_ofs = 0; break; - case PSFS_PASS_ON: - /* Put any filtered data onto the readbuffer stack. - Previously read data has been at least partially consumed. */ - stream->readpos += consumed; - - if (stream->writepos == stream->readpos) { - /* Entirely consumed */ - stream->writepos = 0; - stream->readpos = 0; - } + } + } +} - while (brig_outp->head) { - bucket = brig_outp->head; - /* Grow buffer to hold this bucket if need be. - TODO: See warning in main/stream/streams.c::php_stream_fill_read_buffer */ - if (stream->readbuflen - stream->writepos < bucket->buflen) { - stream->readbuflen += bucket->buflen; - stream->readbuf = perealloc(stream->readbuf, stream->readbuflen, stream->is_persistent); - } - memcpy(stream->readbuf + stream->writepos, bucket->buf, bucket->buflen); - stream->writepos += bucket->buflen; +PHPAPI int _php_stream_filter_check_chain(php_stream_filter_chain *chain TSRMLS_DC) +{ + php_stream_filter *filter; + long last_output = PSFO_FLAG_OUTPUTS_ANY; - php_stream_bucket_unlink(bucket TSRMLS_CC); - php_stream_bucket_delref(bucket TSRMLS_CC); - } - break; + for(filter = chain->head; filter; filter = filter->next) { + if ((((filter->fops->flags & PSFO_FLAG_ACCEPT_MASK) << PSFO_FLAG_ACCEPT_SHIFT) & last_output) == 0) { + /* Nothing which the last filter outputs is accepted by this filter */ + return FAILURE; + } + if (filter->fops->flags & PSFO_FLAG_OUTPUTS_SAME) { + continue; + } + if (filter->fops->flags & PSFO_FLAG_OUTPUTS_OPPOSITE) { + last_output = ((last_output & PSFO_FLAG_OUTPUTS_STRING) ? PSFO_FLAG_OUTPUTS_UNICODE : 0) | + ((last_output & PSFO_FLAG_OUTPUTS_UNICODE) ? PSFO_FLAG_OUTPUTS_STRING : 0); + continue; + } + last_output = filter->fops->flags & PSFO_FLAG_OUTPUTS_ANY; + } + + return SUCCESS; +} + +PHPAPI int _php_stream_filter_output_prefer_unicode(php_stream_filter *filter TSRMLS_DC) +{ + php_stream_filter_chain *chain = filter->chain; + php_stream_filter *f; + int inverted = 0; + int preferred = (chain = &chain->stream->readfilters ? 1 : 0); + + for (f = filter->next; f ; f = f->next) { + if ((f->fops->flags & PSFO_FLAG_ACCEPTS_STRING) == 0) { + return inverted ^= 1; + } + if ((f->fops->flags & PSFO_FLAG_ACCEPTS_UNICODE) == 0) { + return inverted; + } + if (((f->fops->flags & PSFO_FLAG_OUTPUTS_SAME) == 0) && + ((f->fops->flags & PSFO_FLAG_OUTPUTS_OPPOSITE) == 0)) { + /* Input type for next filter won't effect output -- Might as well go for unicode */ + return inverted ^ 1; + } + if (f->fops->flags & PSFO_FLAG_OUTPUTS_SAME) { + continue; + } + if (f->fops->flags & PSFO_FLAG_OUTPUTS_OPPOSITE) { + inverted ^= 1; + continue; } } + return preferred ^ inverted; } PHPAPI int _php_stream_filter_flush(php_stream_filter *filter, int finish TSRMLS_DC) @@ -449,7 +562,7 @@ PHPAPI int _php_stream_filter_flush(php_stream_filter *filter, int finish TSRMLS Do something with it */ for(bucket = inp->head; bucket; bucket = bucket->next) { - flushed_size += bucket->buflen; + flushed_size += bucket->is_unicode ? bucket->buf.ustr.len : bucket->buf.str.len; } if (flushed_size == 0) { @@ -458,27 +571,33 @@ PHPAPI int _php_stream_filter_flush(php_stream_filter *filter, int finish TSRMLS } if (chain == &(stream->readfilters)) { - /* Dump any newly flushed data to the read buffer */ - if (stream->readpos > 0) { - /* Back the buffer up */ - memcpy(stream->readbuf, stream->readbuf + stream->readpos, stream->writepos - stream->readpos); - stream->readpos = 0; - stream->writepos -= stream->readpos; - } - if (flushed_size > (stream->readbuflen - stream->writepos)) { - /* Grow the buffer */ - stream->readbuf = perealloc(stream->readbuf, stream->writepos + flushed_size + stream->chunk_size, stream->is_persistent); - } - while ((bucket = inp->head)) { - memcpy(stream->readbuf + stream->writepos, bucket->buf, bucket->buflen); - stream->writepos += bucket->buflen; - php_stream_bucket_unlink(bucket TSRMLS_CC); - php_stream_bucket_delref(bucket TSRMLS_CC); + if (stream->readbuf.head) { + /* Merge inp with readbuf */ + for(bucket = inp->head; bucket; bucket = bucket->next) { + php_stream_bucket_unlink(bucket TSRMLS_CC); + php_stream_bucket_append(&stream->readbuf, bucket TSRMLS_CC); + stream->readbuf_avail += bucket->is_unicode ? bucket->buf.ustr.len : bucket->buf.str.len; + } + } else { + /* Just plop it in */ + stream->readbuf = *inp; + stream->readbuf_avail = flushed_size; + stream->readbuf_ofs = 0; } } else if (chain == &(stream->writefilters)) { /* Send flushed data to the stream */ while ((bucket = inp->head)) { - stream->ops->write(stream, bucket->buf, bucket->buflen TSRMLS_CC); + char *data; + int datalen; + + if (bucket->is_unicode) { + data = bucket->buf.ustr.val; + datalen = bucket->buf.ustr.len * sizeof(UChar); + } else { + data = bucket->buf.str.val; + datalen = bucket->buf.str.len; + } + stream->ops->write(stream, data, datalen TSRMLS_CC); php_stream_bucket_unlink(bucket TSRMLS_CC); php_stream_bucket_delref(bucket TSRMLS_CC); } @@ -511,6 +630,106 @@ PHPAPI php_stream_filter *php_stream_filter_remove(php_stream_filter *filter, in return filter; } +PHPAPI int php_stream_bucket_tounicode(php_stream *stream, php_stream_bucket **pbucket, off_t *offset TSRMLS_DC) +{ + int is_persistent = php_stream_is_persistent(stream); + php_stream_bucket *bucket = *pbucket, *prior = bucket->prev, *next = bucket->next; + php_stream_bucket_brigade *brigade = bucket->brigade; + UErrorCode status = U_ZERO_ERROR; + UChar *val; + int32_t len; + + if (bucket->is_unicode) { + return SUCCESS; + } + + zend_convert_to_unicode(ZEND_U_CONVERTER(UG(runtime_encoding_conv)), &val, &len, bucket->buf.str.val, bucket->buf.str.len, &status); + + if (U_FAILURE(status)) { + efree(val); + return FAILURE; + } + + php_stream_bucket_unlink(bucket TSRMLS_CC); + php_stream_bucket_delref(bucket TSRMLS_CC); + + /* convert from unicode defaults to non-persistent */ + bucket = php_stream_bucket_new_unicode(stream, val, len, 1, is_persistent TSRMLS_CC); + if (is_persistent) { + efree(val); + } + + bucket->brigade = brigade; + bucket->prev = prior; + bucket->next = next; + + if (prior) { + prior->next = bucket; + } else if (brigade) { + brigade->head = bucket; + } + + if (next) { + next->prev = bucket; + } else if (brigade) { + brigade->tail = bucket; + } + + *pbucket = bucket; + + return SUCCESS; +} + +PHPAPI int php_stream_bucket_tostring(php_stream *stream, php_stream_bucket **pbucket, off_t *offset TSRMLS_DC) +{ + int is_persistent = php_stream_is_persistent(stream); + php_stream_bucket *bucket = *pbucket, *prior = bucket->prev, *next = bucket->next; + php_stream_bucket_brigade *brigade = bucket->brigade; + UErrorCode status = U_ZERO_ERROR; + char *val; + int len; + + if (!bucket->is_unicode) { + return SUCCESS; + } + + zend_convert_from_unicode(ZEND_U_CONVERTER(UG(runtime_encoding_conv)), &val, &len, bucket->buf.ustr.val, bucket->buf.ustr.len, &status); + + if (U_FAILURE(status)) { + efree(val); + return FAILURE; + } + + php_stream_bucket_unlink(bucket TSRMLS_CC); + php_stream_bucket_delref(bucket TSRMLS_CC); + + /* convert from unicode defaults to non-persistent */ + bucket = php_stream_bucket_new(stream, val, len, 1, is_persistent TSRMLS_CC); + if (is_persistent) { + efree(val); + } + + bucket->brigade = brigade; + bucket->prev = prior; + bucket->next = next; + + if (prior) { + prior->next = bucket; + } else if (brigade) { + brigade->head = bucket; + } + + if (next) { + next->prev = bucket; + } else if (brigade) { + brigade->tail = bucket; + } + + *pbucket = bucket; + + return SUCCESS; +} + /* * Local variables: * tab-width: 4 diff --git a/main/streams/php_stream_context.h b/main/streams/php_stream_context.h index 7263aa4511..bfd6d0df1a 100644 --- a/main/streams/php_stream_context.h +++ b/main/streams/php_stream_context.h @@ -52,6 +52,9 @@ struct _php_stream_notifier { struct _php_stream_context { php_stream_notifier *notifier; + char *output_encoding; /* unicode->string character set */ + char *input_encoding; /* string->unicode character set */ + int default_mode; /* default fopen mode -- PHP_FILE_BINARY vs. PHP_FILE_TEXT -- potentially support other fpc() flags later */ zval *options; /* hash keyed by wrapper family or specific wrapper */ zval *links; /* hash keyed by hostent for connection pooling */ int rsrc_id; /* used for auto-cleanup */ diff --git a/main/streams/php_stream_filter_api.h b/main/streams/php_stream_filter_api.h index 114ad93957..52bdc91653 100644 --- a/main/streams/php_stream_filter_api.h +++ b/main/streams/php_stream_filter_api.h @@ -45,12 +45,22 @@ struct _php_stream_bucket { php_stream_bucket *next, *prev; php_stream_bucket_brigade *brigade; - char *buf; - size_t buflen; + union { + struct { + char *val; + size_t len; + } str; + struct { + UChar *val; + int32_t len; + } ustr; + } buf; + /* if non-zero, buf should be pefreed when the bucket is destroyed */ - int own_buf; - int is_persistent; - + char own_buf; + char is_persistent; + char is_unicode; + /* destroy this struct when refcount falls to zero */ int refcount; }; @@ -68,6 +78,7 @@ typedef enum { /* Buckets API. */ BEGIN_EXTERN_C() PHPAPI php_stream_bucket *php_stream_bucket_new(php_stream *stream, char *buf, size_t buflen, int own_buf, int buf_persistent TSRMLS_DC); +PHPAPI php_stream_bucket *php_stream_bucket_new_unicode(php_stream *stream, UChar *buf, int32_t buflen, int own_buf, int buf_persistent TSRMLS_DC); PHPAPI int php_stream_bucket_split(php_stream_bucket *in, php_stream_bucket **left, php_stream_bucket **right, size_t length TSRMLS_DC); PHPAPI void php_stream_bucket_delref(php_stream_bucket *bucket TSRMLS_DC); #define php_stream_bucket_addref(bucket) (bucket)->refcount++ @@ -75,11 +86,30 @@ PHPAPI void php_stream_bucket_prepend(php_stream_bucket_brigade *brigade, php_st PHPAPI void php_stream_bucket_append(php_stream_bucket_brigade *brigade, php_stream_bucket *bucket TSRMLS_DC); PHPAPI void php_stream_bucket_unlink(php_stream_bucket *bucket TSRMLS_DC); PHPAPI php_stream_bucket *php_stream_bucket_make_writeable(php_stream_bucket *bucket TSRMLS_DC); +PHPAPI int php_stream_bucket_tounicode(php_stream *stream, php_stream_bucket **pbucket, off_t *offset TSRMLS_DC); +PHPAPI int php_stream_bucket_tostring(php_stream *stream, php_stream_bucket **pbucket, off_t *offset TSRMLS_DC); END_EXTERN_C() -#define PSFS_FLAG_NORMAL 0 /* regular read/write */ -#define PSFS_FLAG_FLUSH_INC 1 /* an incremental flush */ -#define PSFS_FLAG_FLUSH_CLOSE 2 /* final flush prior to closing */ +#define PSFS_FLAG_NORMAL 0 /* regular read/write */ +#define PSFS_FLAG_FLUSH_INC 1 /* an incremental flush */ +#define PSFS_FLAG_FLUSH_CLOSE 2 /* final flush prior to closing */ + +#define PSFO_FLAG_ACCEPTS_STRING (1<<0) /* can process non-unicode buckets */ +#define PSFO_FLAG_ACCEPTS_UNICODE (1<<1) /* can process unicode buckets */ +#define PSFO_FLAG_ACCEPTS_ANY (PSFO_FLAG_ACCEPTS_STRING | PSFO_FLAG_ACCEPTS_UNICODE) + +#define PSFO_FLAG_OUTPUTS_STRING (1<<2) /* can produce non-unicode buckets */ +#define PSFO_FLAG_OUTPUTS_UNICODE (1<<3) /* can produce unicode buckets */ +#define PSFO_FLAG_OUTPUTS_ANY (PSFO_FLAG_OUTPUTS_STRING | PSFO_FLAG_OUTPUTS_UNICODE) + +/* produces buckets of the same type as provided */ +#define PSFO_FLAG_OUTPUTS_SAME ((1<<4) | PSFO_FLAG_ACCEPTS_ANY | PSFO_FLAG_OUTPUTS_ANY) + +/* produces buckets of the opposite type as provided */ +#define PSFO_FLAG_OUTPUTS_OPPOSITE ((1<<5) | PSFO_FLAG_ACCEPTS_ANY | PSFO_FLAG_OUTPUTS_ANY) + +#define PSFO_FLAG_ACCEPT_MASK PSFO_FLAG_ACCEPTS_ANY +#define PSFO_FLAG_ACCEPT_SHIFT 2 /* For comparing filter to filter bucket passing compatability */ typedef struct _php_stream_filter_ops { @@ -95,7 +125,8 @@ typedef struct _php_stream_filter_ops { void (*dtor)(php_stream_filter *thisfilter TSRMLS_DC); const char *label; - + + int flags; } php_stream_filter_ops; typedef struct _php_stream_filter_chain { @@ -120,12 +151,15 @@ struct _php_stream_filter { /* filters are auto_registered when they're applied */ int rsrc_id; + int flags; }; /* stack filter onto a stream */ BEGIN_EXTERN_C() PHPAPI void _php_stream_filter_prepend(php_stream_filter_chain *chain, php_stream_filter *filter TSRMLS_DC); PHPAPI void _php_stream_filter_append(php_stream_filter_chain *chain, php_stream_filter *filter TSRMLS_DC); +PHPAPI int _php_stream_filter_check_chain(php_stream_filter_chain *chain TSRMLS_DC); +PHPAPI int _php_stream_filter_output_prefer_unicode(php_stream_filter *filter TSRMLS_DC); PHPAPI int _php_stream_filter_flush(php_stream_filter *filter, int finish TSRMLS_DC); PHPAPI php_stream_filter *php_stream_filter_remove(php_stream_filter *filter, int call_dtor TSRMLS_DC); PHPAPI void php_stream_filter_free(php_stream_filter *filter TSRMLS_DC); @@ -136,6 +170,8 @@ END_EXTERN_C() #define php_stream_filter_prepend(chain, filter) _php_stream_filter_prepend((chain), (filter) TSRMLS_CC) #define php_stream_filter_append(chain, filter) _php_stream_filter_append((chain), (filter) TSRMLS_CC) #define php_stream_filter_flush(filter, finish) _php_stream_filter_flush((filter), (finish) TSRMLS_CC) +#define php_stream_filter_check_chain(chain) _php_stream_filter_check_chain((chain) TSRMLS_CC) +#define php_stream_filter_output_prefer_unicode(filter) _php_stream_filter_output_prefer_unicode((filter) TSRMLS_CC) #define php_stream_is_filtered(stream) ((stream)->readfilters.head || (stream)->writefilters.head) diff --git a/main/streams/streams.c b/main/streams/streams.c index d3ede8fc44..fd69a4b36f 100755 --- a/main/streams/streams.c +++ b/main/streams/streams.c @@ -368,9 +368,11 @@ fprintf(stderr, "stream_free: %s:%p[%s] preserve_handle=%d release_cast=%d remov stream->wrapperdata = NULL; } - if (stream->readbuf) { - pefree(stream->readbuf, stream->is_persistent); - stream->readbuf = NULL; + while (stream->readbuf.head) { + php_stream_bucket *bucket = stream->readbuf.head; + + php_stream_bucket_unlink(bucket TSRMLS_CC); + php_stream_bucket_delref(bucket TSRMLS_CC); } if (stream->is_persistent && (close_options & PHP_STREAM_FREE_PERSISTENT)) { @@ -422,8 +424,6 @@ fprintf(stderr, "stream_free: %s:%p[%s] preserve_handle=%d release_cast=%d remov static void php_stream_fill_read_buffer(php_stream *stream, size_t size TSRMLS_DC) { - /* allocate/fill the buffer */ - if (stream->readfilters.head) { char *chunk_buf; int err_flag = 0; @@ -433,7 +433,7 @@ static void php_stream_fill_read_buffer(php_stream *stream, size_t size TSRMLS_D /* allocate a buffer for reading chunks */ chunk_buf = emalloc(stream->chunk_size); - while (!err_flag && (stream->writepos - stream->readpos < (off_t)size)) { + while (!err_flag && (stream->readbuf_avail < (off_t)size)) { size_t justread = 0; int flags; php_stream_bucket *bucket; @@ -475,22 +475,38 @@ static void php_stream_fill_read_buffer(php_stream *stream, size_t size TSRMLS_D /* we get here when the last filter in the chain has data to pass on. * in this situation, we are passing the brig_in brigade into the * stream read buffer */ - while (brig_inp->head) { - bucket = brig_inp->head; - /* grow buffer to hold this bucket - * TODO: this can fail for persistent streams */ - if (stream->readbuflen - stream->writepos < bucket->buflen) { - stream->readbuflen += bucket->buflen; - stream->readbuf = perealloc(stream->readbuf, stream->readbuflen, - stream->is_persistent); - } - memcpy(stream->readbuf + stream->writepos, bucket->buf, bucket->buflen); - stream->writepos += bucket->buflen; - + while ((bucket = brig_inp->head)) { + php_stream_bucket *tail = stream->readbuf.tail; php_stream_bucket_unlink(bucket TSRMLS_CC); - php_stream_bucket_delref(bucket TSRMLS_CC); + if (bucket->is_unicode && + U16_IS_SURROGATE(*bucket->buf.ustr.val) && + !U16_IS_SURROGATE_LEAD(*bucket->buf.ustr.val) && + tail && tail->is_unicode && + U16_IS_SURROGATE(tail->buf.ustr.val[tail->buf.ustr.len - 1]) && + U16_IS_SURROGATE_LEAD(tail->buf.ustr.val[tail->buf.ustr.len - 1])) { + /* Surrogate pair got split between buckets -- Unlikely */ + UChar *tmp; + + tmp = peumalloc(bucket->buf.ustr.len + 1, bucket->is_persistent); + *tmp = stream->readbuf.tail->buf.ustr.val[--tail->buf.ustr.len]; + memmove(tmp + UBYTES(1), bucket->buf.ustr.val, UBYTES(bucket->buf.ustr.len)); + pefree(bucket->buf.ustr.val, bucket->is_persistent); + bucket->buf.ustr.val = tmp; + + if (tail->buf.ustr.len <= 0) { + /* Tail was only a one UChar bucket */ + php_stream_bucket_unlink(tail TSRMLS_CC); + php_stream_bucket_delref(tail TSRMLS_CC); + } else if (tail == stream->readbuf.head && (tail->buf.ustr.len <= stream->readbuf_ofs)) { + /* Tail was head and last char was only unused portion */ + php_stream_bucket_unlink(tail TSRMLS_CC); + php_stream_bucket_delref(tail TSRMLS_CC); + stream->readbuf_ofs = 0; + } + } + php_stream_bucket_append(&stream->readbuf, bucket TSRMLS_CC); + stream->readbuf_avail += bucket->is_unicode ? bucket->buf.ustr.len : bucket->buf.str.len; } - break; case PSFS_FEED_ME: @@ -520,30 +536,22 @@ static void php_stream_fill_read_buffer(php_stream *stream, size_t size TSRMLS_D } else { /* is there enough data in the buffer ? */ - if (stream->writepos - stream->readpos < (off_t)size) { + if (stream->readbuf_avail < (off_t)size) { + char *chunk_buf; size_t justread = 0; + int is_persistent = php_stream_is_persistent(stream); - /* reduce buffer memory consumption if possible, to avoid a realloc */ - if (stream->readbuf && stream->readbuflen - stream->writepos < stream->chunk_size) { - memmove(stream->readbuf, stream->readbuf + stream->readpos, stream->readbuflen - stream->readpos); - stream->writepos -= stream->readpos; - stream->readpos = 0; - } - - /* grow the buffer if required - * TODO: this can fail for persistent streams */ - if (stream->readbuflen - stream->writepos < stream->chunk_size) { - stream->readbuflen += stream->chunk_size; - stream->readbuf = perealloc(stream->readbuf, stream->readbuflen, - stream->is_persistent); - } + chunk_buf = pemalloc(stream->chunk_size, is_persistent); + justread = stream->ops->read(stream, chunk_buf, stream->chunk_size TSRMLS_CC); - justread = stream->ops->read(stream, stream->readbuf + stream->writepos, - stream->readbuflen - stream->writepos - TSRMLS_CC); + if (justread == (size_t)-1 || justread == 0) { + pefree(chunk_buf, is_persistent); + } else { + php_stream_bucket *bucket; - if (justread != (size_t)-1) { - stream->writepos += justread; + bucket = php_stream_bucket_new(stream, chunk_buf, justread, 1, is_persistent TSRMLS_CC); + php_stream_bucket_append(&stream->readbuf, bucket TSRMLS_CC); + stream->readbuf_avail += justread; } } } @@ -551,23 +559,32 @@ static void php_stream_fill_read_buffer(php_stream *stream, size_t size TSRMLS_D PHPAPI size_t _php_stream_read(php_stream *stream, char *buf, size_t size TSRMLS_DC) { + php_stream_bucket *bucket; size_t toread = 0, didread = 0; while (size > 0) { - /* take from the read buffer first. * It is possible that a buffered stream was switched to non-buffered, so we * drain the remainder of the buffer before using the "raw" read mode for * the excess */ - if (stream->writepos > stream->readpos) { - toread = stream->writepos - stream->readpos; + while (size > 0 && (bucket = stream->readbuf.head)) { + if (bucket->is_unicode) { + /* This is an string read func, convert to string first */ + php_stream_bucket_tostring(stream, &bucket, &stream->readbuf_ofs TSRMLS_CC); + } + toread = bucket->buf.str.len - stream->readbuf_ofs; if (toread > size) { toread = size; } - - memcpy(buf, stream->readbuf + stream->readpos, toread); - stream->readpos += toread; + memcpy(buf, bucket->buf.str.val + stream->readbuf_ofs, toread); + stream->readbuf_ofs += toread; + stream->readbuf_avail -= toread; + if (stream->readbuf_ofs >= bucket->buf.str.len) { + php_stream_bucket_unlink(bucket TSRMLS_CC); + php_stream_bucket_delref(bucket TSRMLS_CC); + stream->readbuf_ofs = 0; + } size -= toread; buf += toread; didread += toread; @@ -578,32 +595,90 @@ PHPAPI size_t _php_stream_read(php_stream *stream, char *buf, size_t size TSRMLS break; } + /* just break anyway, to avoid greedy read */ + if (didread > 0 && (stream->wrapper != &php_plain_files_wrapper)) { + break; + } + if (!stream->readfilters.head && (stream->flags & PHP_STREAM_FLAG_NO_BUFFER || stream->chunk_size == 1)) { toread = stream->ops->read(stream, buf, size TSRMLS_CC); - } else { - php_stream_fill_read_buffer(stream, size TSRMLS_CC); + if (toread <= 0) { + break; + } + buf += toread; + size -= toread; + didread += toread; + continue; + } + + php_stream_fill_read_buffer(stream, size TSRMLS_CC); + if (stream->readbuf_avail <= 0) { + /* EOF, or temporary end of data (for non-blocking mode). */ + break; + } + } + + if (didread > 0) { + stream->position += didread; + } + return didread; +} - toread = stream->writepos - stream->readpos; +PHPAPI size_t _php_stream_read_unicode(php_stream *stream, UChar *buf, int32_t size TSRMLS_DC) +{ + php_stream_bucket *bucket; + size_t toread = 0, didread = 0; + + while (size > 0) { + /* take from the read buffer first. + * It is possible that a buffered stream was switched to non-buffered, so we + * drain the remainder of the buffer before using the "raw" read mode for + * the excess */ + + while (size > 0 && (bucket = stream->readbuf.head)) { + UChar lastchar = 0; + + if (!bucket->is_unicode) { + /* This is a unicode read func, convert to unicode first */ + php_stream_bucket_tounicode(stream, &bucket, &stream->readbuf_ofs TSRMLS_CC); + } + toread = bucket->buf.ustr.len - stream->readbuf_ofs; if (toread > size) { toread = size; } - - if (toread > 0) { - memcpy(buf, stream->readbuf + stream->readpos, toread); - stream->readpos += toread; + lastchar = *(bucket->buf.ustr.val + stream->readbuf_ofs + toread - 1); + if (U16_IS_SURROGATE(lastchar) && U16_IS_SURROGATE_LEAD(lastchar)) { + toread--; + /* The only time we should encounter a split surrogate is when the buffer size is truncating the data + In this case, reduce size along with toread to avoid getting stuck */ + size--; + } + memcpy(buf, bucket->buf.ustr.val + stream->readbuf_ofs, toread * sizeof(UChar)); + stream->readbuf_ofs += toread; + stream->readbuf_avail -= toread; + if (stream->readbuf_ofs >= bucket->buf.ustr.len) { + php_stream_bucket_unlink(bucket TSRMLS_CC); + php_stream_bucket_delref(bucket TSRMLS_CC); + stream->readbuf_ofs = 0; } - } - if (toread > 0) { - didread += toread; - buf += toread; size -= toread; - } else { - /* EOF, or temporary end of data (for non-blocking mode). */ + buf += toread; + didread += toread; + } + + /* ignore eof here; the underlying state might have changed */ + if (size == 0) { break; } /* just break anyway, to avoid greedy read */ - if (stream->wrapper != &php_plain_files_wrapper) { + if (didread > 0 && (stream->wrapper != &php_plain_files_wrapper)) { + break; + } + + php_stream_fill_read_buffer(stream, size * sizeof(UChar) TSRMLS_CC); + if (stream->readbuf_avail <= 0) { + /* EOF, or temporary end of data (for non-blocking mode). */ break; } } @@ -615,10 +690,182 @@ PHPAPI size_t _php_stream_read(php_stream *stream, char *buf, size_t size TSRMLS return didread; } +/* buf mabe NULL (in which case it will be allocated) + num_bytes and num_chars must be initialized upon entry to maximum for each (-1 for no maximum) + num_bytes/num_chars will be set on exit to actual contents of buf + Will return unicode/string type dependent on the first character unit in the read buf + Will return as many characters as possible (and permitted by max lengths) without changing unicode/string type + Will not split surrogate pairs */ +PHPAPI void *_php_stream_u_read(php_stream *stream, void *buf, int32_t *pnum_bytes, int32_t *pnum_chars, int *pis_unicode TSRMLS_DC) +{ + int grow_mode = 0; + int32_t num_bytes = 0, num_chars = 0; + int32_t max_bytes = *pnum_bytes, max_chars = *pnum_chars; + int32_t buflen = buf ? max_bytes : 2048; + int32_t bufpos = 0; + int is_unicode; + php_stream_bucket *bucket; + + /* It's possible that we have a readbuf, but that it's only half of a surrogate pair */ + if (!stream->readbuf.head || + (stream->readbuf.head == stream->readbuf.tail && stream->readbuf.head->is_unicode && + (stream->readbuf.head->buf.ustr.len - stream->readbuf_ofs) == 1 && + U16_IS_SURROGATE(stream->readbuf.head->buf.ustr.val[stream->readbuf.head->buf.ustr.len-1]))) { + php_stream_fill_read_buffer(stream, max_bytes ? max_bytes : (max_chars ? max_chars : stream->chunk_size) TSRMLS_CC); + } + + + if (!stream->readbuf.head || + (stream->readbuf.head == stream->readbuf.tail && stream->readbuf.head->is_unicode && + (stream->readbuf.head->buf.ustr.len - stream->readbuf_ofs) == 1 && + U16_IS_SURROGATE(stream->readbuf.head->buf.ustr.val[stream->readbuf.head->buf.ustr.len-1]))) { + /* Nothing to return */ + *pnum_bytes = 0; + *pnum_chars = 0; + *pis_unicode = 0; + return NULL; + } + + + if (!buf) { + grow_mode = 1; + buf = emalloc(buflen); + } + + is_unicode = stream->readbuf.head->is_unicode; + if (is_unicode) { + /* normalize byte boundary */ + if (max_bytes >= 0 && (max_bytes % sizeof(UChar))) { + max_bytes -= (max_bytes % sizeof(UChar)); + } + if (max_bytes >= 0 && max_bytes < UBYTES(max_chars)) { + /* max_bytes needs to be at least twice max_chars when both are provided */ + max_chars = (max_bytes / sizeof(UChar)); + } + } else { + if (max_chars < 0 && max_bytes >= 0) { + max_chars = max_bytes; + } else if (max_chars >= 0 && grow_mode) { + max_bytes = max_chars; + } + } + + for (;;) { + if (buflen - bufpos < 1024 && max_bytes >= 0 && max_bytes > buflen) { + buflen += 1024; + if (buflen > max_bytes) { + buflen = max_bytes; + } + buf = erealloc(buf, buflen); + } + + if ((bucket = stream->readbuf.head)) { + if ((bucket->is_unicode && !is_unicode) || + (!bucket->is_unicode && is_unicode)) { + /* data type swap, exit now */ + break; + } + if (bucket->is_unicode) { + UChar *s = bucket->buf.ustr.val + stream->readbuf_ofs, *p; + int bytes_in_buf, chars_in_buf; + int32_t ofs = 0; + + chars_in_buf = u_countChar32(s, bucket->buf.ustr.len - stream->readbuf_ofs); + + if (chars_in_buf > max_chars && max_chars >= 0) { + chars_in_buf = max_chars; + } + /* u_countChar32 tells us that we won't overrun anyway */ + U16_FWD_N_UNSAFE(s, ofs, chars_in_buf); + p = s + ofs; + bytes_in_buf = UBYTES(ofs); + if (bytes_in_buf > (max_bytes - num_bytes)) { + bytes_in_buf = max_bytes - num_bytes; + bytes_in_buf -= bytes_in_buf & 1; /* normalize */ + p = s + (bytes_in_buf >> 1); + if (p > s && U16_IS_SURROGATE(p[-1]) && U16_IS_SURROGATE_LEAD(p[-1])) { + /* Don't split surrogate pairs */ + p--; + bytes_in_buf -= UBYTES(1); + } + if (bytes_in_buf <= 0) { + /* No room to copy data (surrogate pair) */ + break; + } + chars_in_buf = u_countChar32(s, p - s); + } + memcpy((char *)buf + num_bytes, s, bytes_in_buf); + num_bytes += bytes_in_buf; + num_chars += chars_in_buf; + stream->readbuf_ofs += p - s; + stream->readbuf_avail -= p - s; + if (stream->readbuf_ofs >= bucket->buf.ustr.len) { + php_stream_bucket_unlink(bucket TSRMLS_CC); + php_stream_bucket_delref(bucket TSRMLS_CC); + stream->readbuf_ofs = 0; + } else if (stream->readbuf_ofs == (bucket->buf.ustr.len - 1) && + U16_IS_SURROGATE(bucket->buf.ustr.val[bucket->buf.ustr.len - 1]) && + bucket->next && bucket->next->is_unicode) { + /* Only one char left in the bucket, avoid already split surrogates getting "stuck" -- Should never happen thanks to fill_read_buffer */ + php_stream_bucket *next_bucket = bucket->next; + + bucket->buf.ustr.val = peurealloc(bucket->buf.ustr.val, next_bucket->buf.ustr.len + 1, bucket->is_persistent); + bucket->buf.ustr.val[0] = bucket->buf.ustr.val[bucket->buf.ustr.len - 1]; + memcpy(bucket->buf.ustr.val + 1, next_bucket->buf.ustr.val, UBYTES(next_bucket->buf.ustr.len)); + php_stream_bucket_unlink(next_bucket TSRMLS_CC); + php_stream_bucket_delref(next_bucket TSRMLS_CC); + stream->readbuf_ofs = 0; + } else { + /* Reached max limits */ + break; + } + } else { + int want = (max_chars < 0 || max_chars >= buflen) ? (buflen - num_bytes) : (max_chars - num_chars); + int avail = bucket->buf.str.len - stream->readbuf_ofs; + + if (max_bytes >= 0 && want > max_bytes) { + want = max_bytes; + } + + if (want > avail) { + want = avail; + } + + memcpy((char *)buf + num_bytes, bucket->buf.str.val + stream->readbuf_ofs, want); + stream->readbuf_ofs += want; + stream->readbuf_avail -= want; + num_bytes += want; + num_chars += want; + if (stream->readbuf_ofs >= bucket->buf.str.len) { + php_stream_bucket_unlink(bucket TSRMLS_CC); + php_stream_bucket_delref(bucket TSRMLS_CC); + stream->readbuf_ofs = 0; + } else { + /* Reached max limit */ + break; + } + } + } else { + /* No more data */ + break; + } + } + /* Successful exit */ + *pnum_bytes = num_bytes; + *pnum_chars = num_chars; + *pis_unicode = is_unicode; + + if (num_chars == 0 && grow_mode) { + efree(buf); + buf = NULL; + } + return buf; +} + PHPAPI int _php_stream_eof(php_stream *stream TSRMLS_DC) { /* if there is data in the buffer, it's not EOF */ - if (stream->writepos - stream->readpos > 0) { + if (stream->readbuf_avail > 0) { return 0; } @@ -684,6 +931,8 @@ PHPAPI int _php_stream_stat(php_stream *stream, php_stream_statbuf *ssb TSRMLS_D return (stream->ops->stat)(stream, ssb TSRMLS_CC); } +/* buf != NULL Still used by file() in ext/standard/file.c + buf == NULL semantics no longer supported */ PHPAPI char *php_stream_locate_eol(php_stream *stream, char *buf, size_t buf_len TSRMLS_DC) { size_t avail; @@ -691,8 +940,7 @@ PHPAPI char *php_stream_locate_eol(php_stream *stream, char *buf, size_t buf_len char *readptr; if (!buf) { - readptr = stream->readbuf + stream->readpos; - avail = stream->writepos - stream->readpos; + return NULL; } else { readptr = buf; avail = buf_len; @@ -725,123 +973,366 @@ PHPAPI char *php_stream_locate_eol(php_stream *stream, char *buf, size_t buf_len /* If buf == NULL, the buffer will be allocated automatically and will be of an * appropriate length to hold the line, regardless of the line length, memory - * permitting */ + * permitting -- returned string will be up to (maxlen-1), last byte holding terminating NULL */ PHPAPI char *_php_stream_get_line(php_stream *stream, char *buf, size_t maxlen, size_t *returned_len TSRMLS_DC) { - size_t avail = 0; - size_t current_buf_size = 0; + php_stream_bucket *bucket; size_t total_copied = 0; - int grow_mode = 0; - char *bufstart = buf; + int growmode = 0; - if (buf == NULL) { - grow_mode = 1; - } else if (maxlen == 0) { - return NULL; + if (!buf) { + maxlen = stream->chunk_size + 1; + buf = emalloc(maxlen); + growmode = 1; } - /* - * If the underlying stream operations block when no new data is readable, - * we need to take extra precautions. - * - * If there is buffered data available, we check for a EOL. If it exists, - * we pass the data immediately back to the caller. This saves a call - * to the read implementation and will not block where blocking - * is not necessary at all. - * - * If the stream buffer contains more data than the caller requested, - * we can also avoid that costly step and simply return that data. - */ + /* Leave room for NULL */ + maxlen--; - for (;;) { - avail = stream->writepos - stream->readpos; + for(;;) { + /* Fill buf with buffered data + until no space is left in the buffer + or EOL is found */ + char lastchar = 0; - if (avail > 0) { - size_t cpysz = 0; - char *readptr; + /* consumed readbuf if possible */ + while ((bucket = stream->readbuf.head)) { char *eol; - int done = 0; + size_t tocopy; + size_t wanted = maxlen - total_copied; + int bucket_consumed = 0; - readptr = stream->readbuf + stream->readpos; - eol = php_stream_locate_eol(stream, NULL, 0 TSRMLS_CC); + if (bucket->is_unicode) { + /* This is a string read func, convert to string first */ + php_stream_bucket_tostring(stream, &bucket, &stream->readbuf_ofs TSRMLS_CC); + } - if (eol) { - cpysz = eol - readptr + 1; - done = 1; + if (stream->flags & PHP_STREAM_FLAG_DETECT_EOL && lastchar == '\r') { + /* Line ending was actually found in the last char of the last bucket + Since it was \r it could have been MAC or DOS */ + stream->flags ^= PHP_STREAM_FLAG_DETECT_EOL; + if (bucket->buf.str.val[stream->readbuf_ofs] == '\n') { + /* First byte here is a \n, put them together and you get DOS line endings */ + stream->readbuf_ofs++; + stream->readbuf_avail--; + buf[total_copied++] = '\n'; + /* unlikely -- It'd mean a one byte bucket -- possible though */ + if (stream->readbuf_ofs >= bucket->buf.str.len) { + stream->readbuf_ofs = 0; + php_stream_bucket_unlink(bucket TSRMLS_CC); + php_stream_bucket_delref(bucket TSRMLS_CC); + } + } else { + /* Seeing no \n in the first char of this bucket, we know it was MAC */ + stream->flags |= PHP_STREAM_FLAG_EOL_MAC; + } + goto exit_getline; + } else if (stream->flags & PHP_STREAM_FLAG_DETECT_EOL) { + char *cr, *lf; + lf = memchr(bucket->buf.str.val + stream->readbuf_ofs, '\n', bucket->buf.str.len - stream->readbuf_ofs); + cr = memchr(bucket->buf.str.val + stream->readbuf_ofs, '\r', bucket->buf.str.len - stream->readbuf_ofs); + eol = (cr && (!lf || cr < (lf - 1))) ? cr : lf; + } else if (stream->flags & PHP_STREAM_FLAG_EOL_MAC) { + eol = memchr(bucket->buf.str.val + stream->readbuf_ofs, '\r', bucket->buf.str.len - stream->readbuf_ofs); } else { - cpysz = avail; + eol = memchr(bucket->buf.str.val + stream->readbuf_ofs, '\n', bucket->buf.str.len - stream->readbuf_ofs); } - if (grow_mode) { - /* allow room for a NUL. If this realloc is really a realloc - * (ie: second time around), we get an extra byte. In most - * cases, with the default chunk size of 8K, we will only - * incur that overhead once. When people have lines longer - * than 8K, we waste 1 byte per additional 8K or so. - * That seems acceptable to me, to avoid making this code - * hard to follow */ - bufstart = erealloc(bufstart, current_buf_size + cpysz + 1); - current_buf_size += cpysz + 1; - buf = bufstart + total_copied; - } else { - if (cpysz >= maxlen - 1) { - cpysz = maxlen - 1; - done = 1; + /* No \r or \n found in bucket -- grab it all */ + if (!eol) { + eol = bucket->buf.str.val + bucket->buf.str.len - 1; + } + tocopy = eol - (bucket->buf.str.val + stream->readbuf_ofs) + 1; + + /* maxlen exceeded */ + if (tocopy > wanted && growmode) { + if (tocopy - wanted > stream->chunk_size) { + maxlen += tocopy - wanted; + } else { + maxlen += stream->chunk_size; } + buf = erealloc(buf, maxlen + 1); + wanted = maxlen - total_copied; } - memcpy(buf, readptr, cpysz); + if (tocopy > wanted) { + tocopy = wanted; + } - stream->position += cpysz; - stream->readpos += cpysz; - buf += cpysz; - maxlen -= cpysz; - total_copied += cpysz; + memcpy(buf + total_copied, bucket->buf.str.val + stream->readbuf_ofs, tocopy); + total_copied += tocopy; + stream->readbuf_ofs += tocopy; + stream->readbuf_avail -= tocopy; + lastchar = buf[total_copied-1]; - if (done) { - break; + if (stream->readbuf_ofs >= bucket->buf.str.len) { + stream->readbuf_ofs = 0; + php_stream_bucket_unlink(bucket TSRMLS_CC); + php_stream_bucket_delref(bucket TSRMLS_CC); + bucket_consumed = 1; } - } else if (stream->eof) { - break; - } else { - /* XXX: Should be fine to always read chunk_size */ - size_t toread; - - if (grow_mode) { - toread = stream->chunk_size; - } else { - toread = maxlen - 1; - if (toread > stream->chunk_size) { - toread = stream->chunk_size; - } + + if (total_copied >= maxlen) { + goto exit_getline; } - php_stream_fill_read_buffer(stream, toread TSRMLS_CC); + if (stream->flags & PHP_STREAM_FLAG_DETECT_EOL && + bucket_consumed && lastchar == '\r') { + /* Could be MAC, could be DOS... + Need to check the first char of the next bucket to be sure */ + continue; + } - if (stream->writepos - stream->readpos == 0) { - break; + if (lastchar == '\r' || lastchar == '\n') { + stream->flags ^= PHP_STREAM_FLAG_DETECT_EOL; + if (lastchar == '\r') { + /* if there were a \n in this bucket after the \r, we would be looking at it */ + stream->flags |= PHP_STREAM_FLAG_EOL_MAC; + } + goto exit_getline; } } - } - if (total_copied == 0) { - if (grow_mode) { - assert(bufstart == NULL); + if (stream->eof) { + if (total_copied == 0) { + if (growmode) { + efree(buf); + } + return NULL; + } + goto exit_getline; } - return NULL; + + if (maxlen - total_copied) { + size_t bufneeded = maxlen - total_copied; + + if (growmode) { + bufneeded = stream->chunk_size; + } + php_stream_fill_read_buffer(stream, bufneeded TSRMLS_CC); + } + } - buf[0] = '\0'; + exit_getline: + if (returned_len) { *returned_len = total_copied; } + buf[total_copied] = 0; + stream->position += total_copied; + + return buf; +} + +/* If buf == NULL, the buffer will be allocated automatically and will be of an + * appropriate length to hold the line, regardless of the line length, memory + * permitting -- returned string will be up to (maxlen-1), last byte holding terminating NULL */ +PHPAPI UChar *_php_stream_u_get_line(php_stream *stream, UChar *buf, int32_t *pmax_bytes, int32_t *pmax_chars, int *pis_unicode TSRMLS_DC) +{ + php_stream_bucket *bucket; + int32_t num_bytes = 0, num_chars = 0; + int32_t max_bytes = *pmax_bytes, max_chars = *pmax_chars; + int growmode = 0, is_unicode; + + while (!stream->readbuf.head) { + /* Nothing buffered, get an idea of the data type by polling */ + int32_t fillsize = (max_chars > 0) ? max_chars : ((max_bytes > 0) ? max_bytes : stream->chunk_size); + + php_stream_fill_read_buffer(stream, fillsize TSRMLS_CC); + if (!stream->readbuf.head) { + *pmax_bytes = 0; + *pmax_chars = 0; + *pis_unicode = 0; + return NULL; + } + } + + *pis_unicode = is_unicode = stream->readbuf.head->is_unicode; + + if (!is_unicode) { + /* Wrap normal get_line() */ + int returned_len; + char *retbuf = php_stream_get_line(stream, (char*)buf, max_chars, &returned_len); + + *pmax_chars = returned_len; + *pmax_bytes = returned_len; + return (UChar*)retbuf; + } + + /* Now act like php_stream_u_read(), but stopping at 000A, 000D, or 000D 000A */ + + if (!buf) { + max_bytes = UBYTES(257); + buf = emalloc(max_bytes); + growmode = 1; + } + + /* Leave room for NULL */ + max_bytes -= UBYTES(1); + + for(;;) { + /* Fill buf with buffered data + until no space is left in the buffer + or EOL is found */ + UChar lastchar = 0; + + /* consumed readbuf if possible */ + while ((bucket = stream->readbuf.head)) { + UChar *eol, *s; + int32_t want_chars = max_chars - num_chars; + int32_t want_bytes = max_bytes - num_bytes; + int32_t count_chars; + int32_t count_bytes; + int bucket_consumed = 0; + + if (!bucket->is_unicode) { + /* Done with unicode data, bail as though EOL was reached (even though it wasn't) */ + goto exit_ugetline; + } + + if (stream->flags & PHP_STREAM_FLAG_DETECT_EOL && lastchar == '\r') { + /* Line ending was actually found in the last char of the last bucket + Since it was \r it could have been MAC or DOS */ + stream->flags ^= PHP_STREAM_FLAG_DETECT_EOL; + if (bucket->buf.ustr.val[stream->readbuf_ofs] == '\n') { + /* First byte here is a \n, put them together and you get DOS line endings */ + stream->readbuf_ofs++; + stream->readbuf_avail--; + buf[num_bytes >> 1] = '\n'; /* Can't use num_chars here, surrogate pairs will foul it up */ + num_bytes += UBYTES(1); + num_chars++; + /* unlikely -- It'd mean a one UChar bucket -- possible though */ + if (stream->readbuf_ofs >= bucket->buf.ustr.len) { + stream->readbuf_ofs = 0; + php_stream_bucket_unlink(bucket TSRMLS_CC); + php_stream_bucket_delref(bucket TSRMLS_CC); + } + } else { + /* Seeing no \n in the first char of this bucket, we know it was MAC */ + stream->flags |= PHP_STREAM_FLAG_EOL_MAC; + } + goto exit_ugetline; + } else if (stream->flags & PHP_STREAM_FLAG_DETECT_EOL) { + UChar *cr, *lf; + lf = u_memchr(bucket->buf.ustr.val + stream->readbuf_ofs, '\n', bucket->buf.ustr.len - stream->readbuf_ofs); + cr = u_memchr(bucket->buf.ustr.val + stream->readbuf_ofs, '\r', bucket->buf.ustr.len - stream->readbuf_ofs); + eol = (cr && (!lf || cr < (lf - 1))) ? cr : lf; + } else if (stream->flags & PHP_STREAM_FLAG_EOL_MAC) { + eol = u_memchr(bucket->buf.ustr.val + stream->readbuf_ofs, '\r', bucket->buf.ustr.len - stream->readbuf_ofs); + } else { + eol = u_memchr(bucket->buf.ustr.val + stream->readbuf_ofs, '\n', bucket->buf.ustr.len - stream->readbuf_ofs); + } + + /* No \r or \n found in bucket -- grab it all */ + if (!eol) { + eol = bucket->buf.ustr.val + bucket->buf.ustr.len - 1; + } + s = bucket->buf.ustr.val + stream->readbuf_ofs; + + count_bytes = UBYTES(eol - s + 1); + if (count_bytes > want_bytes && growmode) { + max_bytes = num_bytes + count_bytes + UBYTES(256); + want_bytes = max_bytes - num_bytes; + buf = erealloc(buf, max_bytes + UBYTES(1)); + } else if (count_bytes > want_bytes) { + count_bytes = want_bytes; + } + if (U16_IS_SURROGATE(s[(count_bytes >> 1) - 1]) && + U16_IS_SURROGATE_LEAD(s[(count_bytes >> 1) - 1])) { + count_bytes -= UBYTES(1); + } + if (count_bytes <= 0) { + /* Not enough space in buffer, just break out */ + goto exit_ugetline; + } + count_chars = u_countChar32(s, count_bytes >> 1); + + if (max_chars >= 0 && count_chars > want_chars) { + count_chars = want_chars; + count_bytes = 0; + U16_FWD_N_UNSAFE(s, count_bytes, count_chars); + count_bytes <<= 1; /* translate U16 to bytes */ + } + + memcpy(buf + num_bytes, s, count_bytes); + num_bytes += count_bytes; + num_chars += count_chars; + stream->readbuf_ofs += count_bytes >> 1; + stream->readbuf_avail -= count_bytes >> 1; + + lastchar = buf[(num_bytes >> 1) - 1]; + + if (stream->readbuf_ofs >= bucket->buf.ustr.len) { + stream->readbuf_ofs = 0; + php_stream_bucket_unlink(bucket TSRMLS_CC); + php_stream_bucket_delref(bucket TSRMLS_CC); + bucket_consumed = 1; + } + + if ((max_bytes >= 0 && num_bytes >= max_bytes) || + (max_chars >= 0 && num_chars >= max_chars)) { + goto exit_ugetline; + } + + if (stream->flags & PHP_STREAM_FLAG_DETECT_EOL && + bucket_consumed && lastchar == '\r') { + /* Could be MAC, could be DOS... + Need to check the first char of the next bucket to be sure */ + continue; + } + + if (lastchar == '\r' || lastchar == '\n') { + stream->flags ^= PHP_STREAM_FLAG_DETECT_EOL; + if (lastchar == '\r') { + /* if there were a \n in this bucket after the \r, we would be looking at it */ + stream->flags |= PHP_STREAM_FLAG_EOL_MAC; + } + goto exit_ugetline; + } + } + + if (stream->eof) { + if (num_bytes == 0) { + if (growmode) { + efree(buf); + } + buf = NULL; + } + goto exit_ugetline; + } + + if (max_bytes - num_bytes) { + int32_t want_bytes = max_bytes - num_bytes; + + if (growmode) { + want_bytes = stream->chunk_size; + } + php_stream_fill_read_buffer(stream, want_bytes TSRMLS_CC); + } + + } + + exit_ugetline: - return bufstart; + *pmax_chars = num_chars; + *pmax_bytes = num_bytes; + *pis_unicode = is_unicode; + if (buf) { + buf[num_bytes >> 1] = 0; + } + stream->position += num_bytes; + + return buf; } PHPAPI char *php_stream_get_record(php_stream *stream, size_t maxlen, size_t *returned_len, char *delim, size_t delim_len TSRMLS_DC) { + /* UTODO: Needs desperate rewriting for unicode conversion */ + return NULL; + +#ifdef SMG_0 char *e, *buf; size_t toread; int skip = 0; @@ -852,15 +1343,15 @@ PHPAPI char *php_stream_get_record(php_stream *stream, size_t maxlen, size_t *re toread = maxlen; } else { if (delim_len == 1) { - e = memchr(stream->readbuf + stream->readpos, *delim, stream->writepos - stream->readpos); + e = memchr(stream->readbuf, *delim, stream->readbuf_len); } else { - e = php_memnstr(stream->readbuf + stream->readpos, delim, delim_len, (stream->readbuf + stream->writepos)); + e = php_memnstr(stream->readbuf, delim, delim_len, (stream->readbuf + stream->readbuflen)); } if (!e) { toread = maxlen; } else { - toread = e - (char *) stream->readbuf - stream->readpos; + toread = e - (char *) stream->readbuf; skip = 1; } } @@ -883,6 +1374,18 @@ PHPAPI char *php_stream_get_record(php_stream *stream, size_t maxlen, size_t *re efree(buf); return NULL; } +#endif +} + +PHPAPI void _php_stream_flush_readbuf(php_stream *stream TSRMLS_DC) +{ + php_stream_bucket *bucket; + + while ((bucket = stream->readbuf.head)) { + php_stream_bucket_unlink(bucket TSRMLS_CC); + php_stream_bucket_delref(bucket TSRMLS_CC); + } + stream->readbuf_ofs = stream->readbuf_avail = 0; } /* Writes a buffer directly to a stream, using multiple of the chunk size */ @@ -893,8 +1396,11 @@ static size_t _php_stream_write_buffer(php_stream *stream, const char *buf, size /* if we have a seekable stream we need to ensure that data is written at the * current stream->position. This means invalidating the read buffer and then * performing a low-level seek */ +/* UTODO: FIX this if (stream->ops->seek && (stream->flags & PHP_STREAM_FLAG_NO_SEEK) == 0 && stream->readpos != stream->writepos) { - stream->readpos = stream->writepos = 0; +*/ + if (stream->ops->seek && (stream->flags & PHP_STREAM_FLAG_NO_SEEK) == 0) { + php_stream_flush_readbuf(stream); stream->ops->seek(stream, stream->position, SEEK_SET, &stream->position TSRMLS_CC); } @@ -931,7 +1437,7 @@ static size_t _php_stream_write_buffer(php_stream *stream, const char *buf, size * This may trigger a real write to the stream. * Returns the number of bytes consumed from buf by the first filter in the chain. * */ -static size_t _php_stream_write_filtered(php_stream *stream, const char *buf, size_t count, int flags TSRMLS_DC) +static size_t _php_stream_write_filtered(php_stream *stream, const char *buf, size_t count, int flags, int is_unicode TSRMLS_DC) { size_t consumed = 0; php_stream_bucket *bucket; @@ -941,16 +1447,18 @@ static size_t _php_stream_write_filtered(php_stream *stream, const char *buf, si php_stream_filter *filter; if (buf) { - bucket = php_stream_bucket_new(stream, (char *)buf, count, 0, 0 TSRMLS_CC); - php_stream_bucket_append(&brig_in, bucket TSRMLS_CC); + if (is_unicode) { + bucket = php_stream_bucket_new_unicode(stream, (UChar *)buf, count, 0, 0 TSRMLS_CC); + } else { + bucket = php_stream_bucket_new(stream, (char *)buf, count, 0, 0 TSRMLS_CC); + } + php_stream_bucket_append(brig_inp, bucket TSRMLS_CC); } for (filter = stream->writefilters.head; filter; filter = filter->next) { /* for our return value, we are interested in the number of bytes consumed from * the first filter in the chain */ - status = filter->fops->filter(stream, filter, brig_inp, brig_outp, - filter == stream->writefilters.head ? &consumed : NULL, flags TSRMLS_CC); - + status = filter->fops->filter(stream, filter, brig_inp, brig_outp, (filter == stream->writefilters.head) ? &consumed : NULL, flags TSRMLS_CC); if (status != PSFS_PASS_ON) { break; } @@ -969,7 +1477,11 @@ static size_t _php_stream_write_filtered(php_stream *stream, const char *buf, si * underlying stream */ while (brig_inp->head) { bucket = brig_inp->head; - _php_stream_write_buffer(stream, bucket->buf, bucket->buflen TSRMLS_CC); + if (bucket->is_unicode) { + _php_stream_write_buffer(stream, (char *)bucket->buf.ustr.val, UBYTES(bucket->buf.ustr.len) TSRMLS_CC); + } else { + _php_stream_write_buffer(stream, bucket->buf.str.val, bucket->buf.str.len TSRMLS_CC); + } /* Potential error situation - eg: no space on device. Perhaps we should keep this brigade * hanging around and try to write it later. * At the moment, we just drop it on the floor @@ -992,12 +1504,53 @@ static size_t _php_stream_write_filtered(php_stream *stream, const char *buf, si return consumed; } +PHPAPI int _php_stream_will_read_unicode(php_stream *stream TSRMLS_DC) +{ + php_stream_filter *filter; + int inverted = 0; + + if (stream->readbuf.head) { + /* If there are buckets available, what do they hold */ + return stream->readbuf.head->is_unicode; + } + + if (!stream->readfilters.head) { + /* Not filtered == reads as string */ + return 0; + } + + for(filter = stream->readfilters.tail; filter; filter = filter->prev) { + if (filter->flags & PSFO_FLAG_OUTPUTS_SAME) { + continue; + } + if (filter->flags & PSFO_FLAG_OUTPUTS_OPPOSITE) { + inverted ^= 1; + continue; + } + if (filter->flags & PSFO_FLAG_OUTPUTS_ANY) { + /* Indeterminate */ + return -1; + } + if (filter->flags & PSFO_FLAG_OUTPUTS_STRING) { + /* If an inversion happens, it'll be unicode, otherwise string */ + return inverted; + } + if (filter->flags & PSFO_FLAG_OUTPUTS_UNICODE) { + /* If an inversion happens, it'll be string, otherwise unicode */ + return inverted ^ 1; + } + } + + /* string comes from stream so apply same logic as filter outputting string */ + return inverted; +} + PHPAPI int _php_stream_flush(php_stream *stream, int closing TSRMLS_DC) { int ret = 0; if (stream->writefilters.head) { - _php_stream_write_filtered(stream, NULL, 0, closing ? PSFS_FLAG_FLUSH_CLOSE : PSFS_FLAG_FLUSH_INC TSRMLS_CC); + _php_stream_write_filtered(stream, NULL, 0, closing ? PSFS_FLAG_FLUSH_CLOSE : PSFS_FLAG_FLUSH_INC, 0 TSRMLS_CC); } if (stream->ops->flush) { @@ -1014,12 +1567,33 @@ PHPAPI size_t _php_stream_write(php_stream *stream, const char *buf, size_t coun } if (stream->writefilters.head) { - return _php_stream_write_filtered(stream, buf, count, PSFS_FLAG_NORMAL TSRMLS_CC); + return _php_stream_write_filtered(stream, buf, count, PSFS_FLAG_NORMAL, 0 TSRMLS_CC); } else { return _php_stream_write_buffer(stream, buf, count TSRMLS_CC); } } +PHPAPI size_t _php_stream_u_write(php_stream *stream, const UChar *buf, int32_t count TSRMLS_DC) +{ + if (buf == NULL || count == 0 || stream->ops->write == NULL) { + return 0; + } + + if (stream->writefilters.head) { + return _php_stream_write_filtered(stream, (const char*)buf, count, PSFS_FLAG_NORMAL, 1 TSRMLS_CC); + } else { + int32_t ret; + + ret = _php_stream_write_buffer(stream, (const char*)buf, UBYTES(count) TSRMLS_CC); + + /* Return data points, not bytes */ + if (ret > 0) { + ret >>= 1; + } + return ret; + } +} + PHPAPI size_t _php_stream_printf(php_stream *stream TSRMLS_DC, const char *fmt, ...) { size_t count; @@ -1050,19 +1624,44 @@ PHPAPI int _php_stream_seek(php_stream *stream, off_t offset, int whence TSRMLS_ /* handle the case where we are in the buffer */ if ((stream->flags & PHP_STREAM_FLAG_NO_BUFFER) == 0) { switch(whence) { + case SEEK_SET: + if (offset < stream->position || + offset > stream->position + stream->readbuf_avail) { + break; + } + /* act like SEEK_CUR */ + whence = SEEK_CUR; + offset -= stream->position; + /* fall through */ case SEEK_CUR: - if (offset > 0 && offset < stream->writepos - stream->readpos) { - stream->readpos += offset; - stream->position += offset; - stream->eof = 0; + if (offset == 0) { + /* nothing to do */ return 0; } - break; - case SEEK_SET: - if (offset > stream->position && - offset < stream->position + stream->writepos - stream->readpos) { - stream->readpos += offset - stream->position; - stream->position = offset; + + if (offset > 0 && offset <= stream->readbuf_avail) { + php_stream_bucket *bucket; + + while (offset && (bucket = stream->readbuf.head)) { + int consume = bucket->buf.str.len - stream->readbuf_ofs; + + if (consume > offset) { + /* seeking within this bucket */ + stream->readbuf_ofs += offset; + stream->readbuf_avail -= offset; + stream->position += offset; + break; + } + + /* consume the remaining bucket */ + stream->position += consume; + stream->readbuf_ofs = 0; + stream->readbuf_avail -= consume; + offset -= consume; + + php_stream_bucket_unlink(bucket TSRMLS_CC); + php_stream_bucket_delref(bucket TSRMLS_CC); + } stream->eof = 0; return 0; } @@ -1077,7 +1676,7 @@ PHPAPI int _php_stream_seek(php_stream *stream, off_t offset, int whence TSRMLS_ if (stream->writefilters.head) { _php_stream_flush(stream, 0 TSRMLS_CC); } - + switch(whence) { case SEEK_CUR: offset = stream->position + offset; @@ -1092,7 +1691,7 @@ PHPAPI int _php_stream_seek(php_stream *stream, off_t offset, int whence TSRMLS_ } /* invalidate the buffer contents */ - stream->readpos = stream->writepos = 0; + php_stream_flush_readbuf(stream); return ret; } @@ -1748,7 +2347,8 @@ PHPAPI php_stream *_php_stream_open_wrapper_ex(char *path, char *mode, int optio char *path_to_open; int persistent = options & STREAM_OPEN_PERSISTENT; char *copy_of_path = NULL; - + int implicit_mode[16]; + int modelen = strlen(mode); if (opened_path) { *opened_path = NULL; @@ -1766,10 +2366,20 @@ PHPAPI php_stream *_php_stream_open_wrapper_ex(char *path, char *mode, int optio return NULL; } + memcpy(implicit_mode, mode, modelen); + if (context && context->default_mode && modelen < 15 && !strchr(mode, 't') && !strchr(mode, 'b')) { + if (context->default_mode & PHP_FILE_BINARY) { + implicit_mode[modelen++] = 'b'; + } else if (context->default_mode & PHP_FILE_TEXT) { + implicit_mode[modelen++] = 't'; + } + implicit_mode[modelen] = 0; + } + if (wrapper) { stream = wrapper->wops->stream_opener(wrapper, - path_to_open, mode, options ^ REPORT_ERRORS, + path_to_open, implicit_mode, options ^ REPORT_ERRORS, opened_path, context STREAMS_REL_CC TSRMLS_CC); /* if the caller asked for a persistent stream but the wrapper did not @@ -1783,6 +2393,7 @@ PHPAPI php_stream *_php_stream_open_wrapper_ex(char *path, char *mode, int optio if (stream) { stream->wrapper = wrapper; + memcpy(stream->mode, implicit_mode, modelen + 1); } } @@ -1829,6 +2440,45 @@ PHPAPI php_stream *_php_stream_open_wrapper_ex(char *path, char *mode, int optio } } + /* Output encoding on text mode streams defaults to utf8 unless specified in context parameter */ + if (stream && strchr(implicit_mode, 't') && (strchr(implicit_mode, 'w') || strchr(implicit_mode, 'a') || strchr(implicit_mode, '+'))) { + php_stream_filter *filter; + char *encoding = (context && context->output_encoding) ? context->output_encoding : "utf8"; + char *filtername; + int encoding_len = strlen(encoding); + + filtername = emalloc(encoding_len + sizeof("unicode.to.")); + memcpy(filtername, "unicode.to.", sizeof("unicode.to.") - 1); + memcpy(filtername + sizeof("unicode.to.") - 1, encoding, encoding_len + 1); + + filter = php_stream_filter_create(filtername, NULL, persistent TSRMLS_CC); + if (!filter) { + php_stream_wrapper_log_error(wrapper, options TSRMLS_CC, "Failed applying output encoding"); + } else { + php_stream_filter_append(&stream->writefilters, filter); + } + efree(filtername); + } + + if (stream && strchr(implicit_mode, 't') && (strchr(implicit_mode, 'r') || strchr(implicit_mode, '+'))) { + php_stream_filter *filter; + char *filtername; + char *encoding = (context && context->input_encoding) ? context->input_encoding : "utf8"; + int input_encoding_len = strlen(encoding); + + filtername = emalloc(input_encoding_len + sizeof("unicode.from.")); + memcpy(filtername, "unicode.from.", sizeof("unicode.from.") - 1); + memcpy(filtername + sizeof("unicode.from.") - 1, encoding, input_encoding_len + 1); + + filter = php_stream_filter_create(filtername, NULL, persistent TSRMLS_CC); + if (!filter) { + php_stream_wrapper_log_error(wrapper, options TSRMLS_CC, "Failed applying input encoding"); + } else { + php_stream_filter_append(&stream->readfilters, filter); + } + efree(filtername); + } + if (stream == NULL && (options & REPORT_ERRORS)) { php_stream_display_wrapper_errors(wrapper, path, "failed to open stream" TSRMLS_CC); } @@ -1838,6 +2488,8 @@ PHPAPI php_stream *_php_stream_open_wrapper_ex(char *path, char *mode, int optio pefree(copy_of_path, persistent); } #endif + + return stream; } /* }}} */ @@ -1867,6 +2519,12 @@ PHPAPI void php_stream_context_free(php_stream_context *context) php_stream_notification_free(context->notifier); context->notifier = NULL; } + if (context->input_encoding) { + efree(context->input_encoding); + } + if (context->output_encoding) { + efree(context->output_encoding); + } if (context->links) { zval_ptr_dtor(&context->links); context->links = NULL; |
