summaryrefslogtreecommitdiff
path: root/win32/codepage.h
blob: 8f8fbcbaa187b6fd7a9637e8f5c81620800ecc4a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
/*
   +----------------------------------------------------------------------+
   | Copyright (c) The PHP Group                                          |
   +----------------------------------------------------------------------+
   | This source file is subject to version 3.01 of the PHP license,      |
   | that is bundled with this package in the file LICENSE, and is        |
   | available through the world-wide-web at the following url:           |
   | http://www.php.net/license/3_01.txt                                  |
   | If you did not receive a copy of the PHP license and are unable to   |
   | obtain it through the world-wide-web, please send a note to          |
   | license@php.net so we can mail you a copy immediately.               |
   +----------------------------------------------------------------------+
   | Author: Anatol Belski <ab@php.net>                                   |
   +----------------------------------------------------------------------+
*/

#ifndef PHP_WIN32_CODEPAGE_H
#define PHP_WIN32_CODEPAGE_H

#ifdef __cplusplus
extern "C" {
#endif

#ifdef PHP_EXPORTS
# define PW32CP __declspec(dllexport)
#else
# define PW32CP __declspec(dllimport)
#endif

#define PHP_WIN32_CP_IGNORE_LEN (0)
#define PHP_WIN32_CP_IGNORE_LEN_P ((size_t *)-1)

struct php_win32_cp {
	DWORD id;
	DWORD to_w_fl;
	DWORD from_w_fl;
	DWORD char_size;
	char *name;
	char *enc;
	char *desc;
};

PW32CP BOOL php_win32_cp_use_unicode(void);
PW32CP const struct php_win32_cp *php_win32_cp_do_setup(const char *);
#define php_win32_cp_setup() php_win32_cp_do_setup(NULL)
PW32CP const struct php_win32_cp *php_win32_cp_do_update(const char *);
#define php_win32_cp_update() php_win32_cp_do_update(NULL)
PW32CP const struct php_win32_cp *php_win32_cp_shutdown(void);
PW32CP const struct php_win32_cp *php_win32_cp_get_current(void);
PW32CP const struct php_win32_cp *php_win32_cp_get_orig(void);
PW32CP const struct php_win32_cp *php_win32_cp_get_by_id(DWORD id);
PW32CP const struct php_win32_cp *php_win32_cp_set_by_id(DWORD id);
PW32CP const struct php_win32_cp *php_win32_cp_get_by_enc(const char *enc);
PW32CP const struct php_win32_cp *php_win32_cp_cli_do_setup(DWORD);
#define php_win32_cp_cli_setup() php_win32_cp_cli_do_setup(0)
#define php_win32_cp_cli_update() php_win32_cp_cli_do_setup(0)
PW32CP const struct php_win32_cp *php_win32_cp_cli_do_restore(DWORD);
#define php_win32_cp_cli_restore() php_win32_cp_cli_do_restore(0)

/* This API is binary safe and expects a \0 terminated input.
   The returned out is \0 terminated, but the length doesn't count \0. */
PW32CP wchar_t *php_win32_cp_conv_to_w(DWORD in_cp, DWORD flags, const char* in, size_t in_len, size_t *out_len);
PW32CP wchar_t *php_win32_cp_conv_utf8_to_w(const char* in, size_t in_len, size_t *out_len);
#define php_win32_cp_utf8_to_w(in) php_win32_cp_conv_utf8_to_w(in, PHP_WIN32_CP_IGNORE_LEN, PHP_WIN32_CP_IGNORE_LEN_P)
PW32CP wchar_t *php_win32_cp_conv_cur_to_w(const char* in, size_t in_len, size_t *out_len);
#define php_win32_cp_cur_to_w(in) php_win32_cp_conv_cur_to_w(in, PHP_WIN32_CP_IGNORE_LEN, PHP_WIN32_CP_IGNORE_LEN_P)
PW32CP wchar_t *php_win32_cp_conv_ascii_to_w(const char* in, size_t in_len, size_t *out_len);
#define php_win32_cp_ascii_to_w(in) php_win32_cp_conv_ascii_to_w(in, PHP_WIN32_CP_IGNORE_LEN, PHP_WIN32_CP_IGNORE_LEN_P)
PW32CP char *php_win32_cp_conv_from_w(DWORD out_cp, DWORD flags, const wchar_t* in, size_t in_len, size_t *out_len);
PW32CP char *php_win32_cp_conv_w_to_utf8(const wchar_t* in, size_t in_len, size_t *out_len);
#define php_win32_cp_w_to_utf8(in) php_win32_cp_conv_w_to_utf8(in, PHP_WIN32_CP_IGNORE_LEN, PHP_WIN32_CP_IGNORE_LEN_P)
PW32CP char *php_win32_cp_conv_w_to_cur(const wchar_t* in, size_t in_len, size_t *out_len);
#define php_win32_cp_w_to_cur(in) php_win32_cp_conv_w_to_cur(in, PHP_WIN32_CP_IGNORE_LEN, PHP_WIN32_CP_IGNORE_LEN_P)
PW32CP wchar_t *php_win32_cp_env_any_to_w(const char* env);

/* This function tries to make the best guess to convert any
   given string to a wide char, also preferring the fastest code
   path to unicode. It returns NULL on fail. */
__forceinline static wchar_t *php_win32_cp_conv_any_to_w(const char* in, size_t in_len, size_t *out_len)
{/*{{{*/
	wchar_t *ret = NULL;

	if (php_win32_cp_use_unicode()) {
		/* First try the pure ascii conversion. This is the fastest way to do the
			thing. Only applicable if the source string is UTF-8 in general.
			While it could possibly be ok with European encodings, usage with
			Asian encodings can cause unintended side effects. Lookup the term
			"mojibake" if need more. */
		ret = php_win32_cp_conv_ascii_to_w(in, in_len, out_len);

		/* If that failed, try to convert to multibyte. */
		if (!ret) {
			ret = php_win32_cp_conv_utf8_to_w(in, in_len, out_len);

			/* Still need this fallback with regard to possible broken data
				in the existing scripts. Broken data might be hardcoded in
				the user scripts, as UTF-8 settings was de facto ignored in
				older PHP versions. The fallback can be removed later for
				the sake of purity, keep now for BC reasons. */
			if (!ret) {
				const struct php_win32_cp *acp = php_win32_cp_get_by_id(GetACP());

				if (acp) {
					ret = php_win32_cp_conv_to_w(acp->id, acp->to_w_fl, in, in_len, out_len);
				}
			}
		}
	} else {
		/* No unicode, convert from the current thread cp. */
		ret = php_win32_cp_conv_cur_to_w(in, in_len, out_len);
	}

	return ret;
}/*}}}*/
#define php_win32_cp_any_to_w(in) php_win32_cp_conv_any_to_w(in, PHP_WIN32_CP_IGNORE_LEN, PHP_WIN32_CP_IGNORE_LEN_P)

/* This function converts from unicode function output back to PHP. If
	the PHP's current charset is not compatible with unicode, so the currently
	configured CP will be used. */
__forceinline static char *php_win32_cp_conv_w_to_any(const wchar_t* in, size_t in_len, size_t *out_len)
{/*{{{*/
	return php_win32_cp_conv_w_to_cur(in, in_len, out_len);
}/*}}}*/
#define php_win32_cp_w_to_any(in) php_win32_cp_conv_w_to_any(in, PHP_WIN32_CP_IGNORE_LEN, PHP_WIN32_CP_IGNORE_LEN_P)

#define PHP_WIN32_CP_W_TO_ANY_ARRAY(aw, aw_len, aa, aa_len) do { \
	int i; \
	aa_len = aw_len; \
	aa = (char **) malloc(aw_len * sizeof(char *)); \
	if (!aa) { \
		break; \
	} \
	for (i = 0; i < aw_len; i++) { \
		aa[i] = php_win32_cp_w_to_any(aw[i]); \
	} \
} while (0);


#define PHP_WIN32_CP_FREE_ARRAY(a, a_len) do { \
	int i; \
	for (i = 0; i < a_len; i++) { \
		free(a[i]); \
	} \
	free(a); \
} while (0);

#ifdef __cplusplus
}
#endif

#endif /* PHP_WIN32_CODEPAGE_H */