/* +----------------------------------------------------------------------+ | PHP Version 7 | +----------------------------------------------------------------------+ | Copyright (c) 1997-2018 The PHP Group | +----------------------------------------------------------------------+ | This source file is subject to version 3.01 of the PHP license, | | that is bundled with this package in the file LICENSE, and is | | available through the world-wide-web at the following url: | | http://www.php.net/license/3_01.txt | | If you did not receive a copy of the PHP license and are unable to | | obtain it through the world-wide-web, please send a note to | | license@php.net so we can mail you a copy immediately. | +----------------------------------------------------------------------+ | Author: Anatol Belski | +----------------------------------------------------------------------+ */ #include #include "php.h" #include "SAPI.h" #include ZEND_TLS const struct php_win32_cp *cur_cp = NULL; ZEND_TLS const struct php_win32_cp *orig_cp = NULL; ZEND_TLS const struct php_win32_cp *cur_out_cp = NULL; ZEND_TLS const struct php_win32_cp *orig_out_cp = NULL; ZEND_TLS const struct php_win32_cp *cur_in_cp = NULL; ZEND_TLS const struct php_win32_cp *orig_in_cp = NULL; #include "cp_enc_map.c" __forceinline static wchar_t *php_win32_cp_to_w_int(const char* in, size_t in_len, size_t *out_len, UINT cp, DWORD flags) {/*{{{*/ wchar_t *ret; int ret_len, tmp_len; if (!in || in_len > (size_t)INT_MAX) { SET_ERRNO_FROM_WIN32_CODE(ERROR_INVALID_PARAMETER); return NULL; } assert(in_len ? (in[in_len] == L'\0') : 1); tmp_len = !in_len ? -1 : (int)in_len + 1; ret_len = MultiByteToWideChar(cp, flags, in, tmp_len, NULL, 0); if (ret_len == 0) { SET_ERRNO_FROM_WIN32_CODE(GetLastError()); return NULL; } ret = malloc(ret_len * sizeof(wchar_t)); if (!ret) { SET_ERRNO_FROM_WIN32_CODE(ERROR_OUTOFMEMORY); return NULL; } tmp_len = MultiByteToWideChar(cp, flags, in, tmp_len, ret, ret_len); if (tmp_len == 0) { free(ret); SET_ERRNO_FROM_WIN32_CODE(GetLastError()); return NULL; } assert(ret ? tmp_len == ret_len : 1); assert(ret ? wcslen(ret) == ret_len - 1 : 1); ret[ret_len-1] = L'\0'; if (PHP_WIN32_CP_IGNORE_LEN_P != out_len) { *out_len = ret_len - 1; } return ret; }/*}}}*/ PW32CP wchar_t *php_win32_cp_conv_utf8_to_w(const char* in, size_t in_len, size_t *out_len) {/*{{{*/ return php_win32_cp_to_w_int(in, in_len, out_len, CP_UTF8, MB_ERR_INVALID_CHARS); }/*}}}*/ PW32CP wchar_t *php_win32_cp_conv_cur_to_w(const char* in, size_t in_len, size_t *out_len) {/*{{{*/ wchar_t *ret; ret = php_win32_cp_to_w_int(in, in_len, out_len, cur_cp->id, cur_cp->from_w_fl); return ret; }/*}}}*/ PW32CP wchar_t *php_win32_cp_conv_to_w(DWORD cp, DWORD flags, const char* in, size_t in_len, size_t *out_len) {/*{{{*/ return php_win32_cp_to_w_int(in, in_len, out_len, cp, flags); }/*}}}*/ #define ASCII_FAIL_RETURN() \ if (PHP_WIN32_CP_IGNORE_LEN_P != out_len) { \ *out_len = 0; \ } \ return NULL; PW32CP wchar_t *php_win32_cp_conv_ascii_to_w(const char* in, size_t in_len, size_t *out_len) {/*{{{*/ wchar_t *ret, *ret_idx; const char *idx = in, *end; assert(in && in_len ? in[in_len] == '\0' : 1); if (!in) { SET_ERRNO_FROM_WIN32_CODE(ERROR_INVALID_PARAMETER); return NULL; } else if (0 == in_len) { /* Not binary safe. */ in_len = strlen(in); } end = in + in_len; if (in_len > 15) { const char *aidx = (const char *)ZEND_SLIDE_TO_ALIGNED16(in); /* Process unaligned chunk. */ while (idx < aidx) { if (!__isascii(*idx) && '\0' != *idx) { ASCII_FAIL_RETURN() } idx++; } /* Process aligned chunk. */ while (end - idx > 15) { const __m128i block = _mm_load_si128((__m128i *)idx); if (_mm_movemask_epi8(block)) { ASCII_FAIL_RETURN() } idx += 16; } } /* Process the trailing part, or otherwise process string < 16 bytes. */ while (idx < end) { if (!__isascii(*idx) && '\0' != *idx) { ASCII_FAIL_RETURN() } idx++; } ret = malloc((in_len+1)*sizeof(wchar_t)); if (!ret) { SET_ERRNO_FROM_WIN32_CODE(ERROR_OUTOFMEMORY); return NULL; } ret_idx = ret; idx = in; /* Check and conversion could be merged. This however would be more expencive, if a non ASCII string was passed. TODO check whether the impact is acceptable. */ if (in_len > 15) { const char *aidx = (const char *)ZEND_SLIDE_TO_ALIGNED16(in); /* Process unaligned chunk. */ while (idx < aidx) { *ret_idx++ = (wchar_t)*idx++; } /* Process aligned chunk. */ if (end - idx > 15) { const __m128i mask = _mm_set1_epi32(0); while (end - idx > 15) { const __m128i block = _mm_load_si128((__m128i *)idx); { const __m128i lo = _mm_unpacklo_epi8(block, mask); _mm_storeu_si128((__m128i *)ret_idx, lo); } ret_idx += 8; { const __m128i hi = _mm_unpackhi_epi8(block, mask); _mm_storeu_si128((__m128i *)ret_idx, hi); } idx += 16; ret_idx += 8; } } } /* Process the trailing part, or otherwise process string < 16 bytes. */ while (idx < end) { *ret_idx++ = (wchar_t)*idx++; } ret[in_len] = L'\0'; assert(ret ? wcslen(ret) == in_len : 1); if (PHP_WIN32_CP_IGNORE_LEN_P != out_len) { *out_len = in_len; } return ret; }/*}}}*/ #undef ASCII_FAIL_RETURN __forceinline static char *php_win32_cp_from_w_int(const wchar_t* in, size_t in_len, size_t *out_len, UINT cp, DWORD flags) {/*{{{*/ int r; int target_len, tmp_len; char* target; if (!in || in_len > INT_MAX) { SET_ERRNO_FROM_WIN32_CODE(ERROR_INVALID_PARAMETER); return NULL; } assert(in_len ? in[in_len] == '\0' : 1); tmp_len = !in_len ? -1 : (int)in_len + 1; target_len = WideCharToMultiByte(cp, flags, in, tmp_len, NULL, 0, NULL, NULL); if (target_len == 0) { SET_ERRNO_FROM_WIN32_CODE(GetLastError()); return NULL; } target = malloc(target_len); if (target == NULL) { SET_ERRNO_FROM_WIN32_CODE(ERROR_OUTOFMEMORY); return NULL; } r = WideCharToMultiByte(cp, flags, in, tmp_len, target, target_len, NULL, NULL); if (r == 0) { free(target); SET_ERRNO_FROM_WIN32_CODE(GetLastError()); return NULL; } assert(target ? r == target_len : 1); assert(target ? strlen(target) == target_len - 1 : 1); target[target_len-1] = '\0'; if (PHP_WIN32_CP_IGNORE_LEN_P != out_len) { *out_len = target_len - 1; } return target; }/*}}}*/ PW32CP char *php_win32_cp_conv_w_to_utf8(const wchar_t* in, size_t in_len, size_t *out_len) {/*{{{*/ return php_win32_cp_from_w_int(in, in_len, out_len, CP_UTF8, WC_ERR_INVALID_CHARS); }/*}}}*/ PW32CP char *php_win32_cp_conv_w_to_cur(const wchar_t* in, size_t in_len, size_t *out_len) {/*{{{*/ char *ret; ret = php_win32_cp_from_w_int(in, in_len, out_len, cur_cp->id, cur_cp->from_w_fl); return ret; }/*}}}*/ PW32CP char *php_win32_cp_conv_from_w(DWORD cp, DWORD flags, const wchar_t* in, size_t in_len, size_t *out_len) {/*{{{*/ return php_win32_cp_from_w_int(in, in_len, out_len, cp, flags); }/*}}}*/ /* This is only usable after the startup phase*/ __forceinline static char *php_win32_cp_get_enc(void) {/*{{{*/ char *enc = NULL; const zend_encoding *zenc; if (PG(internal_encoding) && PG(internal_encoding)[0]) { enc = PG(internal_encoding); } else if (SG(default_charset) && SG(default_charset)[0] ) { enc = SG(default_charset); } else { zenc = zend_multibyte_get_internal_encoding(); if (zenc) { enc = (char *)zend_multibyte_get_encoding_name(zenc); } } return enc; }/*}}}*/ __forceinline static BOOL php_win32_cp_is_cli_sapi() {/*{{{*/ return strlen(sapi_module.name) >= sizeof("cli") - 1 && !strncmp(sapi_module.name, "cli", sizeof("cli") - 1); }/*}}}*/ PW32CP const struct php_win32_cp *php_win32_cp_get_current(void) {/*{{{*/ return cur_cp; }/*}}}*/ PW32CP const struct php_win32_cp *php_win32_cp_get_orig(void) {/*{{{*/ return orig_cp; }/*}}}*/ PW32CP const struct php_win32_cp *php_win32_cp_get_by_id(DWORD id) {/*{{{*/ size_t i; if (id < php_win32_cp_map[0].id) { switch (id) { case CP_ACP: id = GetACP(); break; case CP_OEMCP: id = GetOEMCP(); break; } } for (i = 0; i < sizeof(php_win32_cp_map)/sizeof(struct php_win32_cp); i++) { if (php_win32_cp_map[i].id == id) { return &php_win32_cp_map[i]; } } SET_ERRNO_FROM_WIN32_CODE(ERROR_NOT_FOUND); return NULL; }/*}}}*/ PW32CP const struct php_win32_cp *php_win32_cp_get_by_enc(const char *enc) {/*{{{*/ size_t enc_len = 0, i; if (!enc || !enc[0]) { return php_win32_cp_get_by_id(65001U); } enc_len = strlen(enc); for (i = 0; i < sizeof(php_win32_cp_map)/sizeof(struct php_win32_cp); i++) { const struct php_win32_cp *cp = &php_win32_cp_map[i]; if (!cp->name || !cp->name[0]) { continue; } if (0 == zend_binary_strcasecmp(enc, enc_len, cp->name, strlen(cp->name))) { return cp; } if (cp->enc) { char *start = cp->enc, *idx; idx = strpbrk(start, "|"); while (NULL != idx) { if (0 == zend_binary_strcasecmp(enc, enc_len, start, idx - start)) { return cp; } start = idx + 1; idx = strpbrk(start, "|"); } /* Last in the list, or single charset specified. */ if (0 == zend_binary_strcasecmp(enc, enc_len, start, strlen(start))) { return cp; } } } return php_win32_cp_get_by_id(GetACP()); }/*}}}*/ PW32CP const struct php_win32_cp *php_win32_cp_set_by_id(DWORD id) {/*{{{*/ const struct php_win32_cp *tmp; if (!IsValidCodePage(id)) { SET_ERRNO_FROM_WIN32_CODE(ERROR_INVALID_PARAMETER); return NULL; } tmp = php_win32_cp_get_by_id(id); if (tmp) { cur_cp = tmp; } return cur_cp; }/*}}}*/ PW32CP BOOL php_win32_cp_use_unicode(void) {/*{{{*/ return 65001 == cur_cp->id; }/*}}}*/ PW32CP wchar_t *php_win32_cp_env_any_to_w(const char* env) {/*{{{*/ wchar_t *envw = NULL, ew[32760]; char *cur = (char *)env, *prev; size_t bin_len = 0; if (!env) { SET_ERRNO_FROM_WIN32_CODE(ERROR_INVALID_PARAMETER); return NULL; } do { wchar_t *tmp; tmp = php_win32_cp_any_to_w(cur); if (tmp) { size_t tmp_len = wcslen(tmp) + 1; memmove(ew + bin_len, tmp, tmp_len * sizeof(wchar_t)); free(tmp); bin_len += tmp_len; } prev = cur; } while (NULL != (cur = strchr(prev, '\0')) && cur++ && *cur && bin_len + (cur - prev) < 32760); envw = (wchar_t *) malloc((bin_len + 3) * sizeof(wchar_t)); if (!envw) { SET_ERRNO_FROM_WIN32_CODE(ERROR_OUTOFMEMORY); return NULL; } memmove(envw, ew, bin_len * sizeof(wchar_t)); envw[bin_len] = L'\0'; envw[bin_len + 1] = L'\0'; envw[bin_len + 2] = L'\0'; return envw; }/*}}}*/ static BOOL php_win32_cp_cli_io_setup(void) {/*{{{*/ BOOL ret = TRUE; if (PG(input_encoding) && PG(input_encoding)[0]) { cur_in_cp = php_win32_cp_get_by_enc(PG(input_encoding)); if (!cur_in_cp) { cur_in_cp = cur_cp; } } else { cur_in_cp = cur_cp; } if (PG(output_encoding) && PG(output_encoding)[0]) { cur_out_cp = php_win32_cp_get_by_enc(PG(output_encoding)); if (!cur_out_cp) { cur_out_cp = cur_cp; } } else { cur_out_cp = cur_cp; } if(php_get_module_initialized()) { ret = SetConsoleCP(cur_in_cp->id) && SetConsoleOutputCP(cur_out_cp->id); } return ret; }/*}}}*/ PW32CP const struct php_win32_cp *php_win32_cp_do_setup(const char *enc) {/*{{{*/ if (!enc) { enc = php_win32_cp_get_enc(); } cur_cp = php_win32_cp_get_by_enc(enc); if (!orig_cp) { orig_cp = php_win32_cp_get_by_id(GetACP()); } if (php_win32_cp_is_cli_sapi()) { if (!orig_in_cp) { orig_in_cp = php_win32_cp_get_by_id(GetConsoleCP()); if (!orig_in_cp) { orig_in_cp = orig_cp; } } if (!orig_out_cp) { orig_out_cp = php_win32_cp_get_by_id(GetConsoleOutputCP()); if (!orig_out_cp) { orig_out_cp = orig_cp; } } php_win32_cp_cli_io_setup(); } return cur_cp; }/*}}}*/ PW32CP const struct php_win32_cp *php_win32_cp_do_update(const char *enc) {/*{{{*/ if (!enc) { enc = php_win32_cp_get_enc(); } cur_cp = php_win32_cp_get_by_enc(enc); if (php_win32_cp_is_cli_sapi()) { php_win32_cp_cli_do_setup(cur_cp->id); } return cur_cp; }/*}}}*/ PW32CP const struct php_win32_cp *php_win32_cp_shutdown(void) {/*{{{*/ return orig_cp; }/*}}}*/ /* php_win32_cp_setup() needs to have run before! */ PW32CP const struct php_win32_cp *php_win32_cp_cli_do_setup(DWORD id) {/*{{{*/ const struct php_win32_cp *cp; if (!orig_cp) { php_win32_cp_setup(); } if (id) { cp = php_win32_cp_set_by_id(id); } else { cp = cur_cp; } if (!cp) { return NULL; } if (php_win32_cp_cli_io_setup()) { return cp; } return cp; }/*}}}*/ PW32CP const struct php_win32_cp *php_win32_cp_cli_do_restore(DWORD id) {/*{{{*/ BOOL cli_io_restored = TRUE; if (orig_in_cp) { cli_io_restored = cli_io_restored && SetConsoleCP(orig_in_cp->id); } if (orig_out_cp) { cli_io_restored = cli_io_restored && SetConsoleOutputCP(orig_out_cp->id); } if (cli_io_restored && id) { return php_win32_cp_set_by_id(id); } return NULL; }/*}}}*/ /* Userspace functions, see basic_functions.* for arginfo and decls. */ /* {{{ proto bool sapi_windows_cp_set(int cp) * Set process codepage. */ PHP_FUNCTION(sapi_windows_cp_set) { zend_long id; const struct php_win32_cp *cp; if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &id) == FAILURE) { return; } if (ZEND_LONG_UINT_OVFL(id)) { php_error_docref(NULL, E_WARNING, "Argument %d is out of range", id); RETURN_FALSE; } if (php_win32_cp_is_cli_sapi()) { cp = php_win32_cp_cli_do_setup((DWORD)id); } else { cp = php_win32_cp_set_by_id((DWORD)id); } if (!cp) { php_error_docref(NULL, E_WARNING, "Failed to switch to codepage %d", id); RETURN_FALSE; } RETURN_BOOL(cp->id == id); } /* }}} */ /* {{{ proto int sapi_windows_cp_get([string kind]) * Get process codepage. */ PHP_FUNCTION(sapi_windows_cp_get) { char *kind; size_t kind_len = 0; if (zend_parse_parameters(ZEND_NUM_ARGS(), "|s", &kind, &kind_len) == FAILURE) { return; } if (kind_len == sizeof("ansi")-1 && !strncasecmp(kind, "ansi", kind_len)) { RETURN_LONG(GetACP()); } else if (kind_len == sizeof("oem")-1 && !strncasecmp(kind, "oem", kind_len)) { RETURN_LONG(GetOEMCP()); } else { const struct php_win32_cp *cp = php_win32_cp_get_current(); RETURN_LONG(cp->id); } } /* }}} */ /* {{{ proto bool sapi_windows_cp_is_utf8(void) * Indicates whether the codepage is UTF-8 compatible. */ PHP_FUNCTION(sapi_windows_cp_is_utf8) { if (zend_parse_parameters_none() == FAILURE) { return; } RETURN_BOOL(php_win32_cp_use_unicode()); } /* }}} */ /* {{{ proto string sapi_windows_cp_conv(int|string in_codepage, int|string out_codepage, string subject) * Convert string from one codepage to another. */ PHP_FUNCTION(sapi_windows_cp_conv) { char *subj, *ret; size_t subj_len, ret_len, tmpw_len; wchar_t *tmpw; const struct php_win32_cp *in_cp, *out_cp; zval *z_in_cp, *z_out_cp; if (zend_parse_parameters(ZEND_NUM_ARGS(), "zzs", &z_in_cp, &z_out_cp, &subj, &subj_len) == FAILURE) { return; } if (ZEND_SIZE_T_INT_OVFL(subj_len)) { php_error_docref(NULL, E_WARNING, "String is too long"); RETURN_NULL(); } if (IS_LONG == Z_TYPE_P(z_in_cp)) { if (ZEND_LONG_UINT_OVFL(Z_LVAL_P(z_in_cp))) { php_error_docref(NULL, E_WARNING, "Argument %d is out of range", Z_LVAL_P(z_in_cp)); RETURN_NULL(); } in_cp = php_win32_cp_get_by_id((DWORD)Z_LVAL_P(z_in_cp)); if (!in_cp) { php_error_docref(NULL, E_WARNING, "Invalid codepage %d", Z_LVAL_P(z_in_cp)); RETURN_NULL(); } } else { convert_to_string(z_in_cp); in_cp = php_win32_cp_get_by_enc(Z_STRVAL_P(z_in_cp)); if (!in_cp) { php_error_docref(NULL, E_WARNING, "Invalid charset %s", Z_STRVAL_P(z_in_cp)); RETURN_NULL(); } } if (IS_LONG == Z_TYPE_P(z_out_cp)) { if (ZEND_LONG_UINT_OVFL(Z_LVAL_P(z_out_cp))) { php_error_docref(NULL, E_WARNING, "Argument %d is out of range", Z_LVAL_P(z_out_cp)); RETURN_NULL(); } out_cp = php_win32_cp_get_by_id((DWORD)Z_LVAL_P(z_out_cp)); if (!out_cp) { php_error_docref(NULL, E_WARNING, "Invalid codepage %d", Z_LVAL_P(z_out_cp)); RETURN_NULL(); } } else { convert_to_string(z_out_cp); out_cp = php_win32_cp_get_by_enc(Z_STRVAL_P(z_out_cp)); if (!out_cp) { php_error_docref(NULL, E_WARNING, "Invalid charset %s", Z_STRVAL_P(z_out_cp)); RETURN_NULL(); } } tmpw = php_win32_cp_conv_to_w(in_cp->id, in_cp->to_w_fl, subj, subj_len, &tmpw_len); if (!tmpw) { php_error_docref(NULL, E_WARNING, "Wide char conversion failed"); RETURN_NULL(); } ret = php_win32_cp_conv_from_w(out_cp->id, out_cp->from_w_fl, tmpw, tmpw_len, &ret_len); if (!ret) { free(tmpw); php_error_docref(NULL, E_WARNING, "Wide char conversion failed"); RETURN_NULL(); } RETVAL_STRINGL(ret, ret_len); free(tmpw); free(ret); } /* }}} */ /* }}} */ /* * Local variables: * tab-width: 4 * c-basic-offset: 4 * End: * vim600: sw=4 ts=4 fdm=marker * vim<600: sw=4 ts=4 */