summaryrefslogtreecommitdiff
path: root/libcpp/ucnid.tab
blob: bfc084780e91849c2480f4c4d24e7c6820d87f1b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
; Table of UCNs which are valid in identifiers.
; Copyright (C) 2003-2015 Free Software Foundation, Inc.
; 
; This program is free software; you can redistribute it and/or modify it
; under the terms of the GNU General Public License as published by the
; Free Software Foundation; either version 3, or (at your option) any
; later version.
; 
; This program is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
; GNU General Public License for more details.
; 
; You should have received a copy of the GNU General Public License
; along with this program; see the file COPYING3.  If not see
; <http://www.gnu.org/licenses/>.
; 
; This file reproduces the table in ISO/IEC 9899:1999 (C99) Annex
; D, which is itself a reproduction from ISO/IEC TR 10176:1998, and
; the similar table from ISO/IEC 14882:1988 (C++98) Annex E, which is
; a reproduction of ISO/IEC PDTR 10176.  Unfortunately these tables
; are not identical.  It also reproduces the somewhat different tables
; in C11 and C++11, which are identical to each other.

[C99]

; Latin
00aa 00ba 00c0-00d6 00d8-00f6 00f8-01f5 01fa-0217 0250-02a8 1e00-1e9b
1ea0-1ef9 207f

; Greek
0386 0388-038a 038c 038e-03a1 03a3-03ce 03d0-03d6 03da 03dc 03de 03e0
03e2-03f3 1f00-1f15 1f18-1f1d 1f20-1f45 1f48-1f4d 1f50-1f57 1f59 1f5b
1f5d 1f5f-1f7d 1f80-1fb4 1fb6-1fbc 1fc2-1fc4 1fc6-1fcc 1fd0-1fd3
1fd6-1fdb 1fe0-1fec 1ff2-1ff4 1ff6-1ffc

; Cyrillic
0401-040c 040e-044f 0451-045c 045e-0481 0490-04c4 04c7-04c8 04cb-04cc
04d0-04eb 04ee-04f5 04f8-04f9

; Armenian
0531-0556 0561-0587

; Hebrew
05b0-05b9 05bb-05bd 05bf 05c1-05c2 05d0-05ea 05f0-05f2

; Arabic
0621-063a 0640-0652 0670-06b7 06ba-06be 06c0-06ce 06d0-06dc 06e5-06e8
06ea-06ed

; Devanagari
0901-0903 0905-0939 093e-094d 0950-0952 0958-0963

; Bengali
0981-0983 0985-098c 098f-0990 0993-09a8 09aa-09b0 09b2 09b6-09b9
09be-09c4 09c7-09c8 09cb-09cd 09dc-09dd 09df-09e3 09f0-09f1

; Gurmukhi
0a02 0a05-0a0a 0a0f-0a10 0a13-0a28 0a2a-0a30 0a32-0a33 0a35-0a36
0a38-0a39 0a3e-0a42 0a47-0a48 0a4b-0a4d 0a59-0a5c 0a5e 0a74

; Gujarati
0a81-0a83 0a85-0a8b 0a8d 0a8f-0a91 0a93-0aa8 0aaa-0ab0 0ab2-0ab3
0ab5-0ab9 0abd-0ac5 0ac7-0ac9 0acb-0acd 0ad0 0ae0

; Oriya
0b01-0b03 0b05-0b0c 0b0f-0b10 0b13-0b28 0b2a-0b30 0b32-0b33 0b36-0b39
0b3e-0b43 0b47-0b48 0b4b-0b4d 0b5c-0b5d 0b5f-0b61

; Tamil
0b82-0b83 0b85-0b8a 0b8e-0b90 0b92-0b95 0b99-0b9a 0b9c 0b9e-0b9f
0ba3-0ba4 0ba8-0baa 0bae-0bb5 0bb7-0bb9 0bbe-0bc2 0bc6-0bc8 0bca-0bcd

; Telugu
0c01-0c03 0c05-0c0c 0c0e-0c10 0c12-0c28 0c2a-0c33 0c35-0c39 0c3e-0c44
0c46-0c48 0c4a-0c4d 0c60-0c61

; Kannada
0c82-0c83 0c85-0c8c 0c8e-0c90 0c92-0ca8 0caa-0cb3 0cb5-0cb9 0cbe-0cc4
0cc6-0cc8 0cca-0ccd 0cde 0ce0-0ce1

; Malayalam
0d02-0d03 0d05-0d0c 0d0e-0d10 0d12-0d28 0d2a-0d39 0d3e-0d43 0d46-0d48
0d4a-0d4d 0d60-0d61

# CORRECTION: exclude 0e50-0e59 from the Thai range as it also appears
# in the Digits range below.
; Thai
0e01-0e3a 0e40-0e49 0e5a-0e5b

; Lao
0e81-0e82 0e84 0e87-0e88 0e8a 0e8d 0e94-0e97 0e99-0e9f 0ea1-0ea3 0ea5
0ea7 0eaa-0eab 0ead-0eae 0eb0-0eb9 0ebb-0ebd 0ec0-0ec4 0ec6 0ec8-0ecd
0edc-0edd

; Tibetan
0f00 0f18-0f19 0f35 0f37 0f39 0f3e-0f47 0f49-0f69 0f71-0f84 0f86-0f8b
0f90-0f95 0f97 0f99-0fad 0fb1-0fb7 0fb9

; Georgian
10a0-10c5 10d0-10f6

; Hiragana
3041-3093 309b-309c

; Katakana
30a1-30f6 30fb-30fc

; Bopomofo
3105-312c

; CJK Unified Ideographs
4e00-9fa5

; Hangul
ac00-d7a3

; Special characters
00b5 00b7 02b0-02b8 02bb 02bd-02c1 02d0-02d1 02e0-02e4 037a 0559 093d
0b3d 1fbe 203f-2040 2102 2107 210a-2113 2115 2118-211d 2124 2126 2128
212a-2131 2133-2138 2160-2182 3005-3007 3021-3029

[C99DIG]
0660-0669 06f0-06f9 0966-096f 09e6-09ef 0a66-0a6f 0ae6-0aef 0b66-0b6f
0be7-0bef 0c66-0c6f 0ce6-0cef 0d66-0d6f 0e50-0e59 0ed0-0ed9 0f20-0f33

[CXX]

; Latin
00c0-00d6 00d8-00f6 00f8-01f5 01fa-0217 0250-02a8 1e00-1e9a 1ea0-1ef9

; Greek
0384 0388-038a 038c 038e-03a1 03a3-03ce 03d0-03d6 03da 03dc 03de 03e0 
03e2-03f3 1f00-1f15 1f18-1f1d 1f20-1f45 1f48-1f4d 1f50-1f57 1f59 1f5b
1f5d 1f5f-1f7d 1f80-1fb4 1fb6-1fbc 1fc2-1fc4 1fc6-1fcc 1fd0-1fd3 
1fd6-1fdb 1fe0-1fec 1ff2-1ff4 1ff6-1ffc

; Cyrillic
0401-040d 040f-044f 0451-045c 045e-0481 0490-04c4 04c7-04c8 04cb-04cc
04d0-04eb 04ee-04f5 04f8-04f9

; Armenian
0531-0556 0561-0587

; Hebrew
05d0-05ea 05f0-05f4

; Arabic
0621-063a 0640-0652 0670-06b7 06ba-06be 06c0-06ce 06e5-06e7

; Devanagari
0905-0939 0958-0962

; Bengali
0985-098c 098f-0990 0993-09a8 09aa-09b0 09b2 09b6-09b9 09dc-09dd 
09df-09e1 09f0-09f1

; Gurmukhi
0a05-0a0a 0a0f-0a10 0a13-0a28 0a2a-0a30 0a32-0a33 0a35-0a36 0a38-0a39
0a59-0a5c 0a5e

; Gujarati
0a85-0a8b 0a8d 0a8f-0a91 0a93-0aa8 0aaa-0ab0 0ab2-0ab3 0ab5-0ab9 0ae0

; Oriya
0b05-0b0c 0b0f-0b10 0b13-0b28 0b2a-0b30 0b32-0b33 0b36-0b39 0b5c-0b5d
0b5f-0b61

; Tamil
0b85-0b8a 0b8e-0b90 0b92-0b95 0b99-0b9a 0b9c 0b9e-0b9f 0ba3-0ba4 
0ba8-0baa 0bae-0bb5 0bb7-0bb9

; Telugu
0c05-0c0c 0c0e-0c10 0c12-0c28 0c2a-0c33 0c35-0c39 0c60-0c61

; Kannada
0c85-0c8c 0c8e-0c90 0c92-0ca8 0caa-0cb3 0cb5-0cb9 0ce0-0ce1

; Malayalam
0d05-0d0c 0d0e-0d10 0d12-0d28 0d2a-0d39 0d60-0d61

; Thai
0e01-0e30 0e32-0e33 0e40-0e46 0e4f-0e5b

; Digits
0e50-0e59

; Lao
0e81-0e82 0e84 0e87-0e88 0e8a 0e8d 0e94-0e97 0e99-0e9f 0ea1-0ea3 0ea5
0ea7 0eaa-0eab 0ead-0eb0 0eb2 0eb3 0ebd 0ec0-0ec4 0ec6

; Georgian
10a0-10c5 10d0-10f6

; Hiragana
3041-3094 309b-309e

; Katakana
30a1-30fe

; Bopomofo
3105-312c

; Hangul
1100-1159 1161-11a2 11a8-11f9

; CJK Unified Ideographs
f900-fa2d fb1f-fb36 fb38-fb3c fb3e fb40-fb41 fb42-fb44 fb46-fbb1
fbd3-fd3f fd50-fd8f fd92-fdc7 fdf0-fdfb fe70-fe72 fe74 fe76-fefc
ff21-ff3a ff41-ff5a ff66-ffbe ffc2-ffc7 ffca-ffcf ffd2-ffd7
ffda-ffdc 4e00-9fa5

[C11]
; Group 1
00a8 00aa 00ad 00af 00b2-00b5 00b7-00ba 00bc-00be 00c0-00d6 00d8-00f6
00f8-00ff

; Group 2, minus characters under C11NOSTART
0100-02ff 0370-167f 1681-180d 180f-1dbf 1e00-1fff

; Group 3
200b-200d 202a-202e 203f-2040 2054 2060-206f

; Group 4, minus characters under C11NOSTART
2070-20cf 2100-218f 2460-24ff 2776-2793 2c00-2dff 2e80-2fff

; Group 5
3004-3007 3021-302f 3031-303f

; Group 6
3040-d7ff

; Group 7, minus characters under C11NOSTART
f900-fd3d fd40-fdcf fdf0-fe1f fe30-fe44 fe47-fffd

; Group 8
10000-1fffd 20000-2fffd 30000-3fffd 40000-4fffd 50000-5fffd
60000-6fffd 70000-7fffd 80000-8fffd 90000-9fffd a0000-afffd
b0000-bfffd c0000-cfffd d0000-dfffd e0000-efffd

[C11NOSTART]
; Group 1
0300-036f 1dc0-1dff 20d0-20ff fe20-fe2f