summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--distsrc/THIRD-PARTY-NOTICES44
-rw-r--r--src/mongo/SConscript7
-rw-r--r--src/mongo/db/fts/SConscript7
-rw-r--r--src/mongo/db/fts/unicode/SConscript65
-rw-r--r--src/mongo/db/fts/unicode/codepoints.h86
-rw-r--r--src/mongo/db/fts/unicode/codepoints_diacritic_map.cpp3969
-rw-r--r--src/mongo/db/fts/unicode/codepoints_test.cpp94
-rw-r--r--src/mongo/db/fts/unicode/gen_casefold_map.py76
-rw-r--r--src/mongo/db/fts/unicode/gen_delimiter_list.py80
-rw-r--r--src/mongo/db/fts/unicode/gen_diacritic_list.py63
-rw-r--r--src/mongo/db/fts/unicode/gen_diacritic_map.py105
-rw-r--r--src/mongo/db/fts/unicode/gen_helper.py39
-rw-r--r--src/mongo/db/fts/unicode/string.cpp157
-rw-r--r--src/mongo/db/fts/unicode/string.h139
-rw-r--r--src/mongo/db/fts/unicode/string_test.cpp187
-rw-r--r--src/mongo/shell/linenoise_utf8.h6
-rw-r--r--src/third_party/unicode-8.0.0/CaseFolding.txt1414
-rw-r--r--src/third_party/unicode-8.0.0/PropList.txt1525
-rw-r--r--src/third_party/unicode-8.0.0/ReadMe.txt17
19 files changed, 8076 insertions, 4 deletions
diff --git a/distsrc/THIRD-PARTY-NOTICES b/distsrc/THIRD-PARTY-NOTICES
index 040be4680fd..37349340d22 100644
--- a/distsrc/THIRD-PARTY-NOTICES
+++ b/distsrc/THIRD-PARTY-NOTICES
@@ -501,7 +501,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
-13) License Notice for SpiderMonkey
+14) License Notice for SpiderMonkey
-----------------------------------
|------------------------------------------------|------------------|---------------|
@@ -698,7 +698,7 @@ You can contact the author at :
- LZ4 source repository : http://code.google.com/p/lz4/
- LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
-14) License Notice for Intel DFP Math Library
+15) License Notice for Intel DFP Math Library
---------------------------------------------
Copyright (c) 2011, Intel Corp.
@@ -730,4 +730,44 @@ OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+16) License Notice for Unicode Data
+-----------------------------------
+
+Copyright © 1991-2015 Unicode, Inc. All rights reserved.
+Distributed under the Terms of Use in
+http://www.unicode.org/copyright.html.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of the Unicode data files and any associated documentation
+(the "Data Files") or Unicode software and any associated documentation
+(the "Software") to deal in the Data Files or Software
+without restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, and/or sell copies of
+the Data Files or Software, and to permit persons to whom the Data Files
+or Software are furnished to do so, provided that
+(a) this copyright and permission notice appear with all copies
+of the Data Files or Software,
+(b) this copyright and permission notice appear in associated
+documentation, and
+(c) there is clear notice in each modified Data File or in the Software
+as well as in the documentation associated with the Data File(s) or
+Software that the data or software has been modified.
+
+THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
+NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
+DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THE DATA FILES OR SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder
+shall not be used in advertising or otherwise to promote the sale,
+use or other dealings in these Data Files or Software without prior
+written authorization of the copyright holder.
+
End
diff --git a/src/mongo/SConscript b/src/mongo/SConscript
index bd6407bbff8..f6e42077bfa 100644
--- a/src/mongo/SConscript
+++ b/src/mongo/SConscript
@@ -312,6 +312,11 @@ env.Install(
'util/options_parser/options_parser_init',
]))
+env.Library("linenoise_utf8",
+ source=[
+ "shell/linenoise_utf8.cpp",
+ ])
+
# --- sniffer ---
mongosniff_built = False
if env.TargetOSIs('osx') or env["_HAVEPCAP"]:
@@ -341,7 +346,6 @@ if not has_option('noshell') and (usev8 or usemozjs):
"shell/bench.cpp",
"shell/clientAndShell.cpp",
"shell/linenoise.cpp",
- "shell/linenoise_utf8.cpp",
"shell/mk_wcwidth.cpp",
"shell/mongo-server.cpp",
"shell/shell_utils.cpp",
@@ -355,6 +359,7 @@ if not has_option('noshell') and (usev8 or usemozjs):
'scripting/scripting',
'util/processinfo',
'util/signal_handlers',
+ 'linenoise_utf8',
'shell/mongojs',
])
diff --git a/src/mongo/db/fts/SConscript b/src/mongo/db/fts/SConscript
index 25f3b467c57..3a769baca98 100644
--- a/src/mongo/db/fts/SConscript
+++ b/src/mongo/db/fts/SConscript
@@ -2,6 +2,12 @@
Import("env")
+env.SConscript(
+ dirs=[
+ 'unicode',
+ ],
+)
+
stop_word_languages = [
'danish',
'dutch',
@@ -44,6 +50,7 @@ baseEnv.Library('base', [
'tokenizer.cpp',
], LIBDEPS=["$BUILD_DIR/mongo/base",
"$BUILD_DIR/mongo/db/common",
+ "$BUILD_DIR/mongo/db/fts/unicode/unicode",
"$BUILD_DIR/mongo/platform/platform",
"$BUILD_DIR/third_party/shim_stemmer"
])
diff --git a/src/mongo/db/fts/unicode/SConscript b/src/mongo/db/fts/unicode/SConscript
new file mode 100644
index 00000000000..dc01c5b618c
--- /dev/null
+++ b/src/mongo/db/fts/unicode/SConscript
@@ -0,0 +1,65 @@
+# -*- mode: python -*-
+
+Import("env")
+
+env.Command(
+ target="codepoints_casefold.cpp",
+ source=[
+ "gen_casefold_map.py",
+ "#/src/third_party/unicode-8.0.0/CaseFolding.txt",
+ "gen_helper.py",
+ ],
+ action="$PYTHON ${SOURCES[0]} ${SOURCES[1]} $TARGETS")
+
+env.Command(
+ target="codepoints_delimiter_list.cpp",
+ source=[
+ "gen_delimiter_list.py",
+ "#/src/third_party/unicode-8.0.0/PropList.txt",
+ "gen_helper.py",
+ ],
+ action="$PYTHON ${SOURCES[0]} ${SOURCES[1]} $TARGETS")
+
+env.Command(
+ target="codepoints_diacritic_list.cpp",
+ source=[
+ "gen_diacritic_list.py",
+ "#/src/third_party/unicode-8.0.0/PropList.txt",
+ "gen_helper.py",
+ ],
+ action="$PYTHON ${SOURCES[0]} ${SOURCES[1]} $TARGETS")
+
+env.Library(
+ target='unicode',
+ source=[
+ 'codepoints_casefold.cpp',
+ 'codepoints_delimiter_list.cpp',
+ 'codepoints_diacritic_list.cpp',
+ 'codepoints_diacritic_map.cpp',
+ 'string.cpp',
+ ],
+ LIBDEPS=[
+ '$BUILD_DIR/mongo/base',
+ '$BUILD_DIR/mongo/linenoise_utf8',
+ ]
+)
+
+env.CppUnitTest(
+ target='string_test',
+ source=[
+ 'string_test.cpp'
+ ],
+ LIBDEPS=[
+ 'unicode',
+ ]
+)
+
+env.CppUnitTest(
+ target='codepoints_test',
+ source=[
+ 'codepoints_test.cpp'
+ ],
+ LIBDEPS=[
+ 'unicode',
+ ]
+)
diff --git a/src/mongo/db/fts/unicode/codepoints.h b/src/mongo/db/fts/unicode/codepoints.h
new file mode 100644
index 00000000000..5b1e8e2b2b5
--- /dev/null
+++ b/src/mongo/db/fts/unicode/codepoints.h
@@ -0,0 +1,86 @@
+/**
+ * Copyright (C) 2015 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include <string>
+
+namespace mongo {
+namespace unicode {
+
+/**
+ * There are currently two options supported for the delimiter list. The only difference between the
+ * English and NotEnglish modes is that in English, the apostrophe is considered a delimiter, while
+ * in NotEnglish, it is not.
+ */
+enum class DelimiterListLanguage {
+ kEnglish,
+ kNotEnglish,
+};
+
+/**
+ * There are currently two Case folding modes supported. The only difference between the Normal mode
+ * and the Turkish mode is that in the Turkish mode, the letter I is lowercased to ı, and the letter
+ * İ is lowercased to i. In the normal mode, the letter I is lowercased to i, and there is no
+ * mapping for İ.
+ */
+enum class CaseFoldMode {
+ kNormal,
+ kTurkish,
+};
+
+/**
+ * Returns whether or not the given codepoint is a diacritic. In 'D' normalized Unicode text,
+ * diacritics are removed by removing characters with these codepoints.
+ */
+bool codepointIsDiacritic(char32_t codepoint);
+
+/**
+ * Returns whether or not the given codepoint is considered a delimiter in the language 'lang'.
+ * Currently, there is only a difference between English and non-English languages (the apostrophe).
+ * To see which Unicode character categories were considered delimiters, see gen_delimiter_list.py.
+ */
+bool codepointIsDelimiter(char32_t codepoint, DelimiterListLanguage lang);
+
+/**
+ * Return a version of the given codepoint without any diacritics. These mappings are generated by
+ * taking all of the characters within a set of Unicode code blocks (see gen_diacritic_map.py to see
+ * which code blocks are used), decomposing them to the NFD normalization form, removing any
+ * combining marks, and renormalizing them to the NFC form. The result is a mapping from original
+ * codepoint to a codepoint with no diacritics.
+ */
+char32_t codepointRemoveDiacritics(char32_t codepoint);
+
+/**
+ * Return the lowercased version of the given codepoint, applying the special Turkish version of
+ * case folding if specified.
+ */
+char32_t codepointToLower(char32_t codepoint, CaseFoldMode mode = CaseFoldMode::kNormal);
+
+} // namespace unicode
+} // namespace mongo
diff --git a/src/mongo/db/fts/unicode/codepoints_diacritic_map.cpp b/src/mongo/db/fts/unicode/codepoints_diacritic_map.cpp
new file mode 100644
index 00000000000..c6b39d328b8
--- /dev/null
+++ b/src/mongo/db/fts/unicode/codepoints_diacritic_map.cpp
@@ -0,0 +1,3969 @@
+/**
+ * Copyright (C) 2015 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ *
+ * THIS IS A GENERATED FILE, DO NOT MODIFY.
+ */
+
+#include "mongo/db/fts/unicode/codepoints.h"
+
+namespace mongo {
+namespace unicode {
+
+char32_t codepointRemoveDiacritics(char32_t codepoint) {
+ switch (codepoint) {
+ case 0xc0:
+ return 0x41;
+ case 0xc1:
+ return 0x41;
+ case 0xc2:
+ return 0x41;
+ case 0xc3:
+ return 0x41;
+ case 0xc4:
+ return 0x41;
+ case 0xc5:
+ return 0x41;
+ case 0xc7:
+ return 0x43;
+ case 0xc8:
+ return 0x45;
+ case 0xc9:
+ return 0x45;
+ case 0xca:
+ return 0x45;
+ case 0xcb:
+ return 0x45;
+ case 0xcc:
+ return 0x49;
+ case 0xcd:
+ return 0x49;
+ case 0xce:
+ return 0x49;
+ case 0xcf:
+ return 0x49;
+ case 0xd1:
+ return 0x4e;
+ case 0xd2:
+ return 0x4f;
+ case 0xd3:
+ return 0x4f;
+ case 0xd4:
+ return 0x4f;
+ case 0xd5:
+ return 0x4f;
+ case 0xd6:
+ return 0x4f;
+ case 0xd9:
+ return 0x55;
+ case 0xda:
+ return 0x55;
+ case 0xdb:
+ return 0x55;
+ case 0xdc:
+ return 0x55;
+ case 0xdd:
+ return 0x59;
+ case 0xe0:
+ return 0x61;
+ case 0xe1:
+ return 0x61;
+ case 0xe2:
+ return 0x61;
+ case 0xe3:
+ return 0x61;
+ case 0xe4:
+ return 0x61;
+ case 0xe5:
+ return 0x61;
+ case 0xe7:
+ return 0x63;
+ case 0xe8:
+ return 0x65;
+ case 0xe9:
+ return 0x65;
+ case 0xea:
+ return 0x65;
+ case 0xeb:
+ return 0x65;
+ case 0xec:
+ return 0x69;
+ case 0xed:
+ return 0x69;
+ case 0xee:
+ return 0x69;
+ case 0xef:
+ return 0x69;
+ case 0xf1:
+ return 0x6e;
+ case 0xf2:
+ return 0x6f;
+ case 0xf3:
+ return 0x6f;
+ case 0xf4:
+ return 0x6f;
+ case 0xf5:
+ return 0x6f;
+ case 0xf6:
+ return 0x6f;
+ case 0xf9:
+ return 0x75;
+ case 0xfa:
+ return 0x75;
+ case 0xfb:
+ return 0x75;
+ case 0xfc:
+ return 0x75;
+ case 0xfd:
+ return 0x79;
+ case 0xff:
+ return 0x79;
+ case 0x100:
+ return 0x41;
+ case 0x101:
+ return 0x61;
+ case 0x102:
+ return 0x41;
+ case 0x103:
+ return 0x61;
+ case 0x104:
+ return 0x41;
+ case 0x105:
+ return 0x61;
+ case 0x106:
+ return 0x43;
+ case 0x107:
+ return 0x63;
+ case 0x108:
+ return 0x43;
+ case 0x109:
+ return 0x63;
+ case 0x10a:
+ return 0x43;
+ case 0x10b:
+ return 0x63;
+ case 0x10c:
+ return 0x43;
+ case 0x10d:
+ return 0x63;
+ case 0x10e:
+ return 0x44;
+ case 0x10f:
+ return 0x64;
+ case 0x112:
+ return 0x45;
+ case 0x113:
+ return 0x65;
+ case 0x114:
+ return 0x45;
+ case 0x115:
+ return 0x65;
+ case 0x116:
+ return 0x45;
+ case 0x117:
+ return 0x65;
+ case 0x118:
+ return 0x45;
+ case 0x119:
+ return 0x65;
+ case 0x11a:
+ return 0x45;
+ case 0x11b:
+ return 0x65;
+ case 0x11c:
+ return 0x47;
+ case 0x11d:
+ return 0x67;
+ case 0x11e:
+ return 0x47;
+ case 0x11f:
+ return 0x67;
+ case 0x120:
+ return 0x47;
+ case 0x121:
+ return 0x67;
+ case 0x122:
+ return 0x47;
+ case 0x123:
+ return 0x67;
+ case 0x124:
+ return 0x48;
+ case 0x125:
+ return 0x68;
+ case 0x128:
+ return 0x49;
+ case 0x129:
+ return 0x69;
+ case 0x12a:
+ return 0x49;
+ case 0x12b:
+ return 0x69;
+ case 0x12c:
+ return 0x49;
+ case 0x12d:
+ return 0x69;
+ case 0x12e:
+ return 0x49;
+ case 0x12f:
+ return 0x69;
+ case 0x130:
+ return 0x49;
+ case 0x134:
+ return 0x4a;
+ case 0x135:
+ return 0x6a;
+ case 0x136:
+ return 0x4b;
+ case 0x137:
+ return 0x6b;
+ case 0x139:
+ return 0x4c;
+ case 0x13a:
+ return 0x6c;
+ case 0x13b:
+ return 0x4c;
+ case 0x13c:
+ return 0x6c;
+ case 0x13d:
+ return 0x4c;
+ case 0x13e:
+ return 0x6c;
+ case 0x143:
+ return 0x4e;
+ case 0x144:
+ return 0x6e;
+ case 0x145:
+ return 0x4e;
+ case 0x146:
+ return 0x6e;
+ case 0x147:
+ return 0x4e;
+ case 0x148:
+ return 0x6e;
+ case 0x14c:
+ return 0x4f;
+ case 0x14d:
+ return 0x6f;
+ case 0x14e:
+ return 0x4f;
+ case 0x14f:
+ return 0x6f;
+ case 0x150:
+ return 0x4f;
+ case 0x151:
+ return 0x6f;
+ case 0x154:
+ return 0x52;
+ case 0x155:
+ return 0x72;
+ case 0x156:
+ return 0x52;
+ case 0x157:
+ return 0x72;
+ case 0x158:
+ return 0x52;
+ case 0x159:
+ return 0x72;
+ case 0x15a:
+ return 0x53;
+ case 0x15b:
+ return 0x73;
+ case 0x15c:
+ return 0x53;
+ case 0x15d:
+ return 0x73;
+ case 0x15e:
+ return 0x53;
+ case 0x15f:
+ return 0x73;
+ case 0x160:
+ return 0x53;
+ case 0x161:
+ return 0x73;
+ case 0x162:
+ return 0x54;
+ case 0x163:
+ return 0x74;
+ case 0x164:
+ return 0x54;
+ case 0x165:
+ return 0x74;
+ case 0x168:
+ return 0x55;
+ case 0x169:
+ return 0x75;
+ case 0x16a:
+ return 0x55;
+ case 0x16b:
+ return 0x75;
+ case 0x16c:
+ return 0x55;
+ case 0x16d:
+ return 0x75;
+ case 0x16e:
+ return 0x55;
+ case 0x16f:
+ return 0x75;
+ case 0x170:
+ return 0x55;
+ case 0x171:
+ return 0x75;
+ case 0x172:
+ return 0x55;
+ case 0x173:
+ return 0x75;
+ case 0x174:
+ return 0x57;
+ case 0x175:
+ return 0x77;
+ case 0x176:
+ return 0x59;
+ case 0x177:
+ return 0x79;
+ case 0x178:
+ return 0x59;
+ case 0x179:
+ return 0x5a;
+ case 0x17a:
+ return 0x7a;
+ case 0x17b:
+ return 0x5a;
+ case 0x17c:
+ return 0x7a;
+ case 0x17d:
+ return 0x5a;
+ case 0x17e:
+ return 0x7a;
+ case 0x1a0:
+ return 0x4f;
+ case 0x1a1:
+ return 0x6f;
+ case 0x1af:
+ return 0x55;
+ case 0x1b0:
+ return 0x75;
+ case 0x1cd:
+ return 0x41;
+ case 0x1ce:
+ return 0x61;
+ case 0x1cf:
+ return 0x49;
+ case 0x1d0:
+ return 0x69;
+ case 0x1d1:
+ return 0x4f;
+ case 0x1d2:
+ return 0x6f;
+ case 0x1d3:
+ return 0x55;
+ case 0x1d4:
+ return 0x75;
+ case 0x1d5:
+ return 0x55;
+ case 0x1d6:
+ return 0x75;
+ case 0x1d7:
+ return 0x55;
+ case 0x1d8:
+ return 0x75;
+ case 0x1d9:
+ return 0x55;
+ case 0x1da:
+ return 0x75;
+ case 0x1db:
+ return 0x55;
+ case 0x1dc:
+ return 0x75;
+ case 0x1de:
+ return 0x41;
+ case 0x1df:
+ return 0x61;
+ case 0x1e0:
+ return 0x41;
+ case 0x1e1:
+ return 0x61;
+ case 0x1e2:
+ return 0xc6;
+ case 0x1e3:
+ return 0xe6;
+ case 0x1e6:
+ return 0x47;
+ case 0x1e7:
+ return 0x67;
+ case 0x1e8:
+ return 0x4b;
+ case 0x1e9:
+ return 0x6b;
+ case 0x1ea:
+ return 0x4f;
+ case 0x1eb:
+ return 0x6f;
+ case 0x1ec:
+ return 0x4f;
+ case 0x1ed:
+ return 0x6f;
+ case 0x1ee:
+ return 0x1b7;
+ case 0x1ef:
+ return 0x292;
+ case 0x1f0:
+ return 0x6a;
+ case 0x1f4:
+ return 0x47;
+ case 0x1f5:
+ return 0x67;
+ case 0x1f8:
+ return 0x4e;
+ case 0x1f9:
+ return 0x6e;
+ case 0x1fa:
+ return 0x41;
+ case 0x1fb:
+ return 0x61;
+ case 0x1fc:
+ return 0xc6;
+ case 0x1fd:
+ return 0xe6;
+ case 0x1fe:
+ return 0xd8;
+ case 0x1ff:
+ return 0xf8;
+ case 0x200:
+ return 0x41;
+ case 0x201:
+ return 0x61;
+ case 0x202:
+ return 0x41;
+ case 0x203:
+ return 0x61;
+ case 0x204:
+ return 0x45;
+ case 0x205:
+ return 0x65;
+ case 0x206:
+ return 0x45;
+ case 0x207:
+ return 0x65;
+ case 0x208:
+ return 0x49;
+ case 0x209:
+ return 0x69;
+ case 0x20a:
+ return 0x49;
+ case 0x20b:
+ return 0x69;
+ case 0x20c:
+ return 0x4f;
+ case 0x20d:
+ return 0x6f;
+ case 0x20e:
+ return 0x4f;
+ case 0x20f:
+ return 0x6f;
+ case 0x210:
+ return 0x52;
+ case 0x211:
+ return 0x72;
+ case 0x212:
+ return 0x52;
+ case 0x213:
+ return 0x72;
+ case 0x214:
+ return 0x55;
+ case 0x215:
+ return 0x75;
+ case 0x216:
+ return 0x55;
+ case 0x217:
+ return 0x75;
+ case 0x218:
+ return 0x53;
+ case 0x219:
+ return 0x73;
+ case 0x21a:
+ return 0x54;
+ case 0x21b:
+ return 0x74;
+ case 0x21e:
+ return 0x48;
+ case 0x21f:
+ return 0x68;
+ case 0x226:
+ return 0x41;
+ case 0x227:
+ return 0x61;
+ case 0x228:
+ return 0x45;
+ case 0x229:
+ return 0x65;
+ case 0x22a:
+ return 0x4f;
+ case 0x22b:
+ return 0x6f;
+ case 0x22c:
+ return 0x4f;
+ case 0x22d:
+ return 0x6f;
+ case 0x22e:
+ return 0x4f;
+ case 0x22f:
+ return 0x6f;
+ case 0x230:
+ return 0x4f;
+ case 0x231:
+ return 0x6f;
+ case 0x232:
+ return 0x59;
+ case 0x233:
+ return 0x79;
+ case 0x37e:
+ return 0x3b;
+ case 0x386:
+ return 0x391;
+ case 0x388:
+ return 0x395;
+ case 0x389:
+ return 0x397;
+ case 0x38a:
+ return 0x399;
+ case 0x38c:
+ return 0x39f;
+ case 0x38e:
+ return 0x3a5;
+ case 0x38f:
+ return 0x3a9;
+ case 0x390:
+ return 0x3b9;
+ case 0x3aa:
+ return 0x399;
+ case 0x3ab:
+ return 0x3a5;
+ case 0x3ac:
+ return 0x3b1;
+ case 0x3ad:
+ return 0x3b5;
+ case 0x3ae:
+ return 0x3b7;
+ case 0x3af:
+ return 0x3b9;
+ case 0x3b0:
+ return 0x3c5;
+ case 0x3ca:
+ return 0x3b9;
+ case 0x3cb:
+ return 0x3c5;
+ case 0x3cc:
+ return 0x3bf;
+ case 0x3cd:
+ return 0x3c5;
+ case 0x3ce:
+ return 0x3c9;
+ case 0x3d3:
+ return 0x3d2;
+ case 0x3d4:
+ return 0x3d2;
+ case 0x400:
+ return 0x415;
+ case 0x401:
+ return 0x415;
+ case 0x403:
+ return 0x413;
+ case 0x407:
+ return 0x406;
+ case 0x40c:
+ return 0x41a;
+ case 0x40d:
+ return 0x418;
+ case 0x40e:
+ return 0x423;
+ case 0x419:
+ return 0x418;
+ case 0x439:
+ return 0x438;
+ case 0x450:
+ return 0x435;
+ case 0x451:
+ return 0x435;
+ case 0x453:
+ return 0x433;
+ case 0x457:
+ return 0x456;
+ case 0x45c:
+ return 0x43a;
+ case 0x45d:
+ return 0x438;
+ case 0x45e:
+ return 0x443;
+ case 0x476:
+ return 0x474;
+ case 0x477:
+ return 0x475;
+ case 0x4c1:
+ return 0x416;
+ case 0x4c2:
+ return 0x436;
+ case 0x4d0:
+ return 0x410;
+ case 0x4d1:
+ return 0x430;
+ case 0x4d2:
+ return 0x410;
+ case 0x4d3:
+ return 0x430;
+ case 0x4d6:
+ return 0x415;
+ case 0x4d7:
+ return 0x435;
+ case 0x4da:
+ return 0x4d8;
+ case 0x4db:
+ return 0x4d9;
+ case 0x4dc:
+ return 0x416;
+ case 0x4dd:
+ return 0x436;
+ case 0x4de:
+ return 0x417;
+ case 0x4df:
+ return 0x437;
+ case 0x4e2:
+ return 0x418;
+ case 0x4e3:
+ return 0x438;
+ case 0x4e4:
+ return 0x418;
+ case 0x4e5:
+ return 0x438;
+ case 0x4e6:
+ return 0x41e;
+ case 0x4e7:
+ return 0x43e;
+ case 0x4ea:
+ return 0x4e8;
+ case 0x4eb:
+ return 0x4e9;
+ case 0x4ec:
+ return 0x42d;
+ case 0x4ed:
+ return 0x44d;
+ case 0x4ee:
+ return 0x423;
+ case 0x4ef:
+ return 0x443;
+ case 0x4f0:
+ return 0x423;
+ case 0x4f1:
+ return 0x443;
+ case 0x4f2:
+ return 0x423;
+ case 0x4f3:
+ return 0x443;
+ case 0x4f4:
+ return 0x427;
+ case 0x4f5:
+ return 0x447;
+ case 0x4f8:
+ return 0x42b;
+ case 0x4f9:
+ return 0x44b;
+ case 0x929:
+ return 0x928;
+ case 0x931:
+ return 0x930;
+ case 0x934:
+ return 0x933;
+ case 0x958:
+ return 0x915;
+ case 0x959:
+ return 0x916;
+ case 0x95a:
+ return 0x917;
+ case 0x95b:
+ return 0x91c;
+ case 0x95c:
+ return 0x921;
+ case 0x95d:
+ return 0x922;
+ case 0x95e:
+ return 0x92b;
+ case 0x95f:
+ return 0x92f;
+ case 0x9dc:
+ return 0x9a1;
+ case 0x9dd:
+ return 0x9a2;
+ case 0x9df:
+ return 0x9af;
+ case 0xa33:
+ return 0xa32;
+ case 0xa36:
+ return 0xa38;
+ case 0xa59:
+ return 0xa16;
+ case 0xa5a:
+ return 0xa17;
+ case 0xa5b:
+ return 0xa1c;
+ case 0xa5e:
+ return 0xa2b;
+ case 0xb5c:
+ return 0xb21;
+ case 0xb5d:
+ return 0xb22;
+ case 0xdda:
+ return 0xdd9;
+ case 0xddd:
+ return 0xddc;
+ case 0x1e00:
+ return 0x41;
+ case 0x1e01:
+ return 0x61;
+ case 0x1e02:
+ return 0x42;
+ case 0x1e03:
+ return 0x62;
+ case 0x1e04:
+ return 0x42;
+ case 0x1e05:
+ return 0x62;
+ case 0x1e06:
+ return 0x42;
+ case 0x1e07:
+ return 0x62;
+ case 0x1e08:
+ return 0x43;
+ case 0x1e09:
+ return 0x63;
+ case 0x1e0a:
+ return 0x44;
+ case 0x1e0b:
+ return 0x64;
+ case 0x1e0c:
+ return 0x44;
+ case 0x1e0d:
+ return 0x64;
+ case 0x1e0e:
+ return 0x44;
+ case 0x1e0f:
+ return 0x64;
+ case 0x1e10:
+ return 0x44;
+ case 0x1e11:
+ return 0x64;
+ case 0x1e12:
+ return 0x44;
+ case 0x1e13:
+ return 0x64;
+ case 0x1e14:
+ return 0x45;
+ case 0x1e15:
+ return 0x65;
+ case 0x1e16:
+ return 0x45;
+ case 0x1e17:
+ return 0x65;
+ case 0x1e18:
+ return 0x45;
+ case 0x1e19:
+ return 0x65;
+ case 0x1e1a:
+ return 0x45;
+ case 0x1e1b:
+ return 0x65;
+ case 0x1e1c:
+ return 0x45;
+ case 0x1e1d:
+ return 0x65;
+ case 0x1e1e:
+ return 0x46;
+ case 0x1e1f:
+ return 0x66;
+ case 0x1e20:
+ return 0x47;
+ case 0x1e21:
+ return 0x67;
+ case 0x1e22:
+ return 0x48;
+ case 0x1e23:
+ return 0x68;
+ case 0x1e24:
+ return 0x48;
+ case 0x1e25:
+ return 0x68;
+ case 0x1e26:
+ return 0x48;
+ case 0x1e27:
+ return 0x68;
+ case 0x1e28:
+ return 0x48;
+ case 0x1e29:
+ return 0x68;
+ case 0x1e2a:
+ return 0x48;
+ case 0x1e2b:
+ return 0x68;
+ case 0x1e2c:
+ return 0x49;
+ case 0x1e2d:
+ return 0x69;
+ case 0x1e2e:
+ return 0x49;
+ case 0x1e2f:
+ return 0x69;
+ case 0x1e30:
+ return 0x4b;
+ case 0x1e31:
+ return 0x6b;
+ case 0x1e32:
+ return 0x4b;
+ case 0x1e33:
+ return 0x6b;
+ case 0x1e34:
+ return 0x4b;
+ case 0x1e35:
+ return 0x6b;
+ case 0x1e36:
+ return 0x4c;
+ case 0x1e37:
+ return 0x6c;
+ case 0x1e38:
+ return 0x4c;
+ case 0x1e39:
+ return 0x6c;
+ case 0x1e3a:
+ return 0x4c;
+ case 0x1e3b:
+ return 0x6c;
+ case 0x1e3c:
+ return 0x4c;
+ case 0x1e3d:
+ return 0x6c;
+ case 0x1e3e:
+ return 0x4d;
+ case 0x1e3f:
+ return 0x6d;
+ case 0x1e40:
+ return 0x4d;
+ case 0x1e41:
+ return 0x6d;
+ case 0x1e42:
+ return 0x4d;
+ case 0x1e43:
+ return 0x6d;
+ case 0x1e44:
+ return 0x4e;
+ case 0x1e45:
+ return 0x6e;
+ case 0x1e46:
+ return 0x4e;
+ case 0x1e47:
+ return 0x6e;
+ case 0x1e48:
+ return 0x4e;
+ case 0x1e49:
+ return 0x6e;
+ case 0x1e4a:
+ return 0x4e;
+ case 0x1e4b:
+ return 0x6e;
+ case 0x1e4c:
+ return 0x4f;
+ case 0x1e4d:
+ return 0x6f;
+ case 0x1e4e:
+ return 0x4f;
+ case 0x1e4f:
+ return 0x6f;
+ case 0x1e50:
+ return 0x4f;
+ case 0x1e51:
+ return 0x6f;
+ case 0x1e52:
+ return 0x4f;
+ case 0x1e53:
+ return 0x6f;
+ case 0x1e54:
+ return 0x50;
+ case 0x1e55:
+ return 0x70;
+ case 0x1e56:
+ return 0x50;
+ case 0x1e57:
+ return 0x70;
+ case 0x1e58:
+ return 0x52;
+ case 0x1e59:
+ return 0x72;
+ case 0x1e5a:
+ return 0x52;
+ case 0x1e5b:
+ return 0x72;
+ case 0x1e5c:
+ return 0x52;
+ case 0x1e5d:
+ return 0x72;
+ case 0x1e5e:
+ return 0x52;
+ case 0x1e5f:
+ return 0x72;
+ case 0x1e60:
+ return 0x53;
+ case 0x1e61:
+ return 0x73;
+ case 0x1e62:
+ return 0x53;
+ case 0x1e63:
+ return 0x73;
+ case 0x1e64:
+ return 0x53;
+ case 0x1e65:
+ return 0x73;
+ case 0x1e66:
+ return 0x53;
+ case 0x1e67:
+ return 0x73;
+ case 0x1e68:
+ return 0x53;
+ case 0x1e69:
+ return 0x73;
+ case 0x1e6a:
+ return 0x54;
+ case 0x1e6b:
+ return 0x74;
+ case 0x1e6c:
+ return 0x54;
+ case 0x1e6d:
+ return 0x74;
+ case 0x1e6e:
+ return 0x54;
+ case 0x1e6f:
+ return 0x74;
+ case 0x1e70:
+ return 0x54;
+ case 0x1e71:
+ return 0x74;
+ case 0x1e72:
+ return 0x55;
+ case 0x1e73:
+ return 0x75;
+ case 0x1e74:
+ return 0x55;
+ case 0x1e75:
+ return 0x75;
+ case 0x1e76:
+ return 0x55;
+ case 0x1e77:
+ return 0x75;
+ case 0x1e78:
+ return 0x55;
+ case 0x1e79:
+ return 0x75;
+ case 0x1e7a:
+ return 0x55;
+ case 0x1e7b:
+ return 0x75;
+ case 0x1e7c:
+ return 0x56;
+ case 0x1e7d:
+ return 0x76;
+ case 0x1e7e:
+ return 0x56;
+ case 0x1e7f:
+ return 0x76;
+ case 0x1e80:
+ return 0x57;
+ case 0x1e81:
+ return 0x77;
+ case 0x1e82:
+ return 0x57;
+ case 0x1e83:
+ return 0x77;
+ case 0x1e84:
+ return 0x57;
+ case 0x1e85:
+ return 0x77;
+ case 0x1e86:
+ return 0x57;
+ case 0x1e87:
+ return 0x77;
+ case 0x1e88:
+ return 0x57;
+ case 0x1e89:
+ return 0x77;
+ case 0x1e8a:
+ return 0x58;
+ case 0x1e8b:
+ return 0x78;
+ case 0x1e8c:
+ return 0x58;
+ case 0x1e8d:
+ return 0x78;
+ case 0x1e8e:
+ return 0x59;
+ case 0x1e8f:
+ return 0x79;
+ case 0x1e90:
+ return 0x5a;
+ case 0x1e91:
+ return 0x7a;
+ case 0x1e92:
+ return 0x5a;
+ case 0x1e93:
+ return 0x7a;
+ case 0x1e94:
+ return 0x5a;
+ case 0x1e95:
+ return 0x7a;
+ case 0x1e96:
+ return 0x68;
+ case 0x1e97:
+ return 0x74;
+ case 0x1e98:
+ return 0x77;
+ case 0x1e99:
+ return 0x79;
+ case 0x1e9b:
+ return 0x17f;
+ case 0x1ea0:
+ return 0x41;
+ case 0x1ea1:
+ return 0x61;
+ case 0x1ea2:
+ return 0x41;
+ case 0x1ea3:
+ return 0x61;
+ case 0x1ea4:
+ return 0x41;
+ case 0x1ea5:
+ return 0x61;
+ case 0x1ea6:
+ return 0x41;
+ case 0x1ea7:
+ return 0x61;
+ case 0x1ea8:
+ return 0x41;
+ case 0x1ea9:
+ return 0x61;
+ case 0x1eaa:
+ return 0x41;
+ case 0x1eab:
+ return 0x61;
+ case 0x1eac:
+ return 0x41;
+ case 0x1ead:
+ return 0x61;
+ case 0x1eae:
+ return 0x41;
+ case 0x1eaf:
+ return 0x61;
+ case 0x1eb0:
+ return 0x41;
+ case 0x1eb1:
+ return 0x61;
+ case 0x1eb2:
+ return 0x41;
+ case 0x1eb3:
+ return 0x61;
+ case 0x1eb4:
+ return 0x41;
+ case 0x1eb5:
+ return 0x61;
+ case 0x1eb6:
+ return 0x41;
+ case 0x1eb7:
+ return 0x61;
+ case 0x1eb8:
+ return 0x45;
+ case 0x1eb9:
+ return 0x65;
+ case 0x1eba:
+ return 0x45;
+ case 0x1ebb:
+ return 0x65;
+ case 0x1ebc:
+ return 0x45;
+ case 0x1ebd:
+ return 0x65;
+ case 0x1ebe:
+ return 0x45;
+ case 0x1ebf:
+ return 0x65;
+ case 0x1ec0:
+ return 0x45;
+ case 0x1ec1:
+ return 0x65;
+ case 0x1ec2:
+ return 0x45;
+ case 0x1ec3:
+ return 0x65;
+ case 0x1ec4:
+ return 0x45;
+ case 0x1ec5:
+ return 0x65;
+ case 0x1ec6:
+ return 0x45;
+ case 0x1ec7:
+ return 0x65;
+ case 0x1ec8:
+ return 0x49;
+ case 0x1ec9:
+ return 0x69;
+ case 0x1eca:
+ return 0x49;
+ case 0x1ecb:
+ return 0x69;
+ case 0x1ecc:
+ return 0x4f;
+ case 0x1ecd:
+ return 0x6f;
+ case 0x1ece:
+ return 0x4f;
+ case 0x1ecf:
+ return 0x6f;
+ case 0x1ed0:
+ return 0x4f;
+ case 0x1ed1:
+ return 0x6f;
+ case 0x1ed2:
+ return 0x4f;
+ case 0x1ed3:
+ return 0x6f;
+ case 0x1ed4:
+ return 0x4f;
+ case 0x1ed5:
+ return 0x6f;
+ case 0x1ed6:
+ return 0x4f;
+ case 0x1ed7:
+ return 0x6f;
+ case 0x1ed8:
+ return 0x4f;
+ case 0x1ed9:
+ return 0x6f;
+ case 0x1eda:
+ return 0x4f;
+ case 0x1edb:
+ return 0x6f;
+ case 0x1edc:
+ return 0x4f;
+ case 0x1edd:
+ return 0x6f;
+ case 0x1ede:
+ return 0x4f;
+ case 0x1edf:
+ return 0x6f;
+ case 0x1ee0:
+ return 0x4f;
+ case 0x1ee1:
+ return 0x6f;
+ case 0x1ee2:
+ return 0x4f;
+ case 0x1ee3:
+ return 0x6f;
+ case 0x1ee4:
+ return 0x55;
+ case 0x1ee5:
+ return 0x75;
+ case 0x1ee6:
+ return 0x55;
+ case 0x1ee7:
+ return 0x75;
+ case 0x1ee8:
+ return 0x55;
+ case 0x1ee9:
+ return 0x75;
+ case 0x1eea:
+ return 0x55;
+ case 0x1eeb:
+ return 0x75;
+ case 0x1eec:
+ return 0x55;
+ case 0x1eed:
+ return 0x75;
+ case 0x1eee:
+ return 0x55;
+ case 0x1eef:
+ return 0x75;
+ case 0x1ef0:
+ return 0x55;
+ case 0x1ef1:
+ return 0x75;
+ case 0x1ef2:
+ return 0x59;
+ case 0x1ef3:
+ return 0x79;
+ case 0x1ef4:
+ return 0x59;
+ case 0x1ef5:
+ return 0x79;
+ case 0x1ef6:
+ return 0x59;
+ case 0x1ef7:
+ return 0x79;
+ case 0x1ef8:
+ return 0x59;
+ case 0x1ef9:
+ return 0x79;
+ case 0x1f00:
+ return 0x3b1;
+ case 0x1f01:
+ return 0x3b1;
+ case 0x1f02:
+ return 0x3b1;
+ case 0x1f03:
+ return 0x3b1;
+ case 0x1f04:
+ return 0x3b1;
+ case 0x1f05:
+ return 0x3b1;
+ case 0x1f06:
+ return 0x3b1;
+ case 0x1f07:
+ return 0x3b1;
+ case 0x1f08:
+ return 0x391;
+ case 0x1f09:
+ return 0x391;
+ case 0x1f0a:
+ return 0x391;
+ case 0x1f0b:
+ return 0x391;
+ case 0x1f0c:
+ return 0x391;
+ case 0x1f0d:
+ return 0x391;
+ case 0x1f0e:
+ return 0x391;
+ case 0x1f0f:
+ return 0x391;
+ case 0x1f10:
+ return 0x3b5;
+ case 0x1f11:
+ return 0x3b5;
+ case 0x1f12:
+ return 0x3b5;
+ case 0x1f13:
+ return 0x3b5;
+ case 0x1f14:
+ return 0x3b5;
+ case 0x1f15:
+ return 0x3b5;
+ case 0x1f18:
+ return 0x395;
+ case 0x1f19:
+ return 0x395;
+ case 0x1f1a:
+ return 0x395;
+ case 0x1f1b:
+ return 0x395;
+ case 0x1f1c:
+ return 0x395;
+ case 0x1f1d:
+ return 0x395;
+ case 0x1f20:
+ return 0x3b7;
+ case 0x1f21:
+ return 0x3b7;
+ case 0x1f22:
+ return 0x3b7;
+ case 0x1f23:
+ return 0x3b7;
+ case 0x1f24:
+ return 0x3b7;
+ case 0x1f25:
+ return 0x3b7;
+ case 0x1f26:
+ return 0x3b7;
+ case 0x1f27:
+ return 0x3b7;
+ case 0x1f28:
+ return 0x397;
+ case 0x1f29:
+ return 0x397;
+ case 0x1f2a:
+ return 0x397;
+ case 0x1f2b:
+ return 0x397;
+ case 0x1f2c:
+ return 0x397;
+ case 0x1f2d:
+ return 0x397;
+ case 0x1f2e:
+ return 0x397;
+ case 0x1f2f:
+ return 0x397;
+ case 0x1f30:
+ return 0x3b9;
+ case 0x1f31:
+ return 0x3b9;
+ case 0x1f32:
+ return 0x3b9;
+ case 0x1f33:
+ return 0x3b9;
+ case 0x1f34:
+ return 0x3b9;
+ case 0x1f35:
+ return 0x3b9;
+ case 0x1f36:
+ return 0x3b9;
+ case 0x1f37:
+ return 0x3b9;
+ case 0x1f38:
+ return 0x399;
+ case 0x1f39:
+ return 0x399;
+ case 0x1f3a:
+ return 0x399;
+ case 0x1f3b:
+ return 0x399;
+ case 0x1f3c:
+ return 0x399;
+ case 0x1f3d:
+ return 0x399;
+ case 0x1f3e:
+ return 0x399;
+ case 0x1f3f:
+ return 0x399;
+ case 0x1f40:
+ return 0x3bf;
+ case 0x1f41:
+ return 0x3bf;
+ case 0x1f42:
+ return 0x3bf;
+ case 0x1f43:
+ return 0x3bf;
+ case 0x1f44:
+ return 0x3bf;
+ case 0x1f45:
+ return 0x3bf;
+ case 0x1f48:
+ return 0x39f;
+ case 0x1f49:
+ return 0x39f;
+ case 0x1f4a:
+ return 0x39f;
+ case 0x1f4b:
+ return 0x39f;
+ case 0x1f4c:
+ return 0x39f;
+ case 0x1f4d:
+ return 0x39f;
+ case 0x1f50:
+ return 0x3c5;
+ case 0x1f51:
+ return 0x3c5;
+ case 0x1f52:
+ return 0x3c5;
+ case 0x1f53:
+ return 0x3c5;
+ case 0x1f54:
+ return 0x3c5;
+ case 0x1f55:
+ return 0x3c5;
+ case 0x1f56:
+ return 0x3c5;
+ case 0x1f57:
+ return 0x3c5;
+ case 0x1f59:
+ return 0x3a5;
+ case 0x1f5b:
+ return 0x3a5;
+ case 0x1f5d:
+ return 0x3a5;
+ case 0x1f5f:
+ return 0x3a5;
+ case 0x1f60:
+ return 0x3c9;
+ case 0x1f61:
+ return 0x3c9;
+ case 0x1f62:
+ return 0x3c9;
+ case 0x1f63:
+ return 0x3c9;
+ case 0x1f64:
+ return 0x3c9;
+ case 0x1f65:
+ return 0x3c9;
+ case 0x1f66:
+ return 0x3c9;
+ case 0x1f67:
+ return 0x3c9;
+ case 0x1f68:
+ return 0x3a9;
+ case 0x1f69:
+ return 0x3a9;
+ case 0x1f6a:
+ return 0x3a9;
+ case 0x1f6b:
+ return 0x3a9;
+ case 0x1f6c:
+ return 0x3a9;
+ case 0x1f6d:
+ return 0x3a9;
+ case 0x1f6e:
+ return 0x3a9;
+ case 0x1f6f:
+ return 0x3a9;
+ case 0x1f70:
+ return 0x3b1;
+ case 0x1f71:
+ return 0x3b1;
+ case 0x1f72:
+ return 0x3b5;
+ case 0x1f73:
+ return 0x3b5;
+ case 0x1f74:
+ return 0x3b7;
+ case 0x1f75:
+ return 0x3b7;
+ case 0x1f76:
+ return 0x3b9;
+ case 0x1f77:
+ return 0x3b9;
+ case 0x1f78:
+ return 0x3bf;
+ case 0x1f79:
+ return 0x3bf;
+ case 0x1f7a:
+ return 0x3c5;
+ case 0x1f7b:
+ return 0x3c5;
+ case 0x1f7c:
+ return 0x3c9;
+ case 0x1f7d:
+ return 0x3c9;
+ case 0x1f80:
+ return 0x3b1;
+ case 0x1f81:
+ return 0x3b1;
+ case 0x1f82:
+ return 0x3b1;
+ case 0x1f83:
+ return 0x3b1;
+ case 0x1f84:
+ return 0x3b1;
+ case 0x1f85:
+ return 0x3b1;
+ case 0x1f86:
+ return 0x3b1;
+ case 0x1f87:
+ return 0x3b1;
+ case 0x1f88:
+ return 0x391;
+ case 0x1f89:
+ return 0x391;
+ case 0x1f8a:
+ return 0x391;
+ case 0x1f8b:
+ return 0x391;
+ case 0x1f8c:
+ return 0x391;
+ case 0x1f8d:
+ return 0x391;
+ case 0x1f8e:
+ return 0x391;
+ case 0x1f8f:
+ return 0x391;
+ case 0x1f90:
+ return 0x3b7;
+ case 0x1f91:
+ return 0x3b7;
+ case 0x1f92:
+ return 0x3b7;
+ case 0x1f93:
+ return 0x3b7;
+ case 0x1f94:
+ return 0x3b7;
+ case 0x1f95:
+ return 0x3b7;
+ case 0x1f96:
+ return 0x3b7;
+ case 0x1f97:
+ return 0x3b7;
+ case 0x1f98:
+ return 0x397;
+ case 0x1f99:
+ return 0x397;
+ case 0x1f9a:
+ return 0x397;
+ case 0x1f9b:
+ return 0x397;
+ case 0x1f9c:
+ return 0x397;
+ case 0x1f9d:
+ return 0x397;
+ case 0x1f9e:
+ return 0x397;
+ case 0x1f9f:
+ return 0x397;
+ case 0x1fa0:
+ return 0x3c9;
+ case 0x1fa1:
+ return 0x3c9;
+ case 0x1fa2:
+ return 0x3c9;
+ case 0x1fa3:
+ return 0x3c9;
+ case 0x1fa4:
+ return 0x3c9;
+ case 0x1fa5:
+ return 0x3c9;
+ case 0x1fa6:
+ return 0x3c9;
+ case 0x1fa7:
+ return 0x3c9;
+ case 0x1fa8:
+ return 0x3a9;
+ case 0x1fa9:
+ return 0x3a9;
+ case 0x1faa:
+ return 0x3a9;
+ case 0x1fab:
+ return 0x3a9;
+ case 0x1fac:
+ return 0x3a9;
+ case 0x1fad:
+ return 0x3a9;
+ case 0x1fae:
+ return 0x3a9;
+ case 0x1faf:
+ return 0x3a9;
+ case 0x1fb0:
+ return 0x3b1;
+ case 0x1fb1:
+ return 0x3b1;
+ case 0x1fb2:
+ return 0x3b1;
+ case 0x1fb3:
+ return 0x3b1;
+ case 0x1fb4:
+ return 0x3b1;
+ case 0x1fb6:
+ return 0x3b1;
+ case 0x1fb7:
+ return 0x3b1;
+ case 0x1fb8:
+ return 0x391;
+ case 0x1fb9:
+ return 0x391;
+ case 0x1fba:
+ return 0x391;
+ case 0x1fbb:
+ return 0x391;
+ case 0x1fbc:
+ return 0x391;
+ case 0x1fbe:
+ return 0x3b9;
+ case 0x1fc2:
+ return 0x3b7;
+ case 0x1fc3:
+ return 0x3b7;
+ case 0x1fc4:
+ return 0x3b7;
+ case 0x1fc6:
+ return 0x3b7;
+ case 0x1fc7:
+ return 0x3b7;
+ case 0x1fc8:
+ return 0x395;
+ case 0x1fc9:
+ return 0x395;
+ case 0x1fca:
+ return 0x397;
+ case 0x1fcb:
+ return 0x397;
+ case 0x1fcc:
+ return 0x397;
+ case 0x1fd0:
+ return 0x3b9;
+ case 0x1fd1:
+ return 0x3b9;
+ case 0x1fd2:
+ return 0x3b9;
+ case 0x1fd3:
+ return 0x3b9;
+ case 0x1fd6:
+ return 0x3b9;
+ case 0x1fd7:
+ return 0x3b9;
+ case 0x1fd8:
+ return 0x399;
+ case 0x1fd9:
+ return 0x399;
+ case 0x1fda:
+ return 0x399;
+ case 0x1fdb:
+ return 0x399;
+ case 0x1fe0:
+ return 0x3c5;
+ case 0x1fe1:
+ return 0x3c5;
+ case 0x1fe2:
+ return 0x3c5;
+ case 0x1fe3:
+ return 0x3c5;
+ case 0x1fe4:
+ return 0x3c1;
+ case 0x1fe5:
+ return 0x3c1;
+ case 0x1fe6:
+ return 0x3c5;
+ case 0x1fe7:
+ return 0x3c5;
+ case 0x1fe8:
+ return 0x3a5;
+ case 0x1fe9:
+ return 0x3a5;
+ case 0x1fea:
+ return 0x3a5;
+ case 0x1feb:
+ return 0x3a5;
+ case 0x1fec:
+ return 0x3a1;
+ case 0x1ff2:
+ return 0x3c9;
+ case 0x1ff3:
+ return 0x3c9;
+ case 0x1ff4:
+ return 0x3c9;
+ case 0x1ff6:
+ return 0x3c9;
+ case 0x1ff7:
+ return 0x3c9;
+ case 0x1ff8:
+ return 0x39f;
+ case 0x1ff9:
+ return 0x39f;
+ case 0x1ffa:
+ return 0x3a9;
+ case 0x1ffb:
+ return 0x3a9;
+ case 0x1ffc:
+ return 0x3a9;
+ case 0x2000:
+ return 0x2002;
+ case 0x2001:
+ return 0x2003;
+ case 0x2126:
+ return 0x3a9;
+ case 0x212a:
+ return 0x4b;
+ case 0x212b:
+ return 0x41;
+ case 0x219a:
+ return 0x2190;
+ case 0x219b:
+ return 0x2192;
+ case 0x21ae:
+ return 0x2194;
+ case 0x21cd:
+ return 0x21d0;
+ case 0x21ce:
+ return 0x21d4;
+ case 0x21cf:
+ return 0x21d2;
+ case 0x2204:
+ return 0x2203;
+ case 0x2209:
+ return 0x2208;
+ case 0x220c:
+ return 0x220b;
+ case 0x2224:
+ return 0x2223;
+ case 0x2226:
+ return 0x2225;
+ case 0x2241:
+ return 0x223c;
+ case 0x2244:
+ return 0x2243;
+ case 0x2247:
+ return 0x2245;
+ case 0x2249:
+ return 0x2248;
+ case 0x2260:
+ return 0x3d;
+ case 0x2262:
+ return 0x2261;
+ case 0x226d:
+ return 0x224d;
+ case 0x226e:
+ return 0x3c;
+ case 0x226f:
+ return 0x3e;
+ case 0x2270:
+ return 0x2264;
+ case 0x2271:
+ return 0x2265;
+ case 0x2274:
+ return 0x2272;
+ case 0x2275:
+ return 0x2273;
+ case 0x2278:
+ return 0x2276;
+ case 0x2279:
+ return 0x2277;
+ case 0x2280:
+ return 0x227a;
+ case 0x2281:
+ return 0x227b;
+ case 0x2284:
+ return 0x2282;
+ case 0x2285:
+ return 0x2283;
+ case 0x2288:
+ return 0x2286;
+ case 0x2289:
+ return 0x2287;
+ case 0x22ac:
+ return 0x22a2;
+ case 0x22ad:
+ return 0x22a8;
+ case 0x22ae:
+ return 0x22a9;
+ case 0x22af:
+ return 0x22ab;
+ case 0x22e0:
+ return 0x227c;
+ case 0x22e1:
+ return 0x227d;
+ case 0x22e2:
+ return 0x2291;
+ case 0x22e3:
+ return 0x2292;
+ case 0x22ea:
+ return 0x22b2;
+ case 0x22eb:
+ return 0x22b3;
+ case 0x22ec:
+ return 0x22b4;
+ case 0x22ed:
+ return 0x22b5;
+ case 0x2329:
+ return 0x3008;
+ case 0x232a:
+ return 0x3009;
+ case 0x2adc:
+ return 0x2add;
+ case 0x304c:
+ return 0x304b;
+ case 0x304e:
+ return 0x304d;
+ case 0x3050:
+ return 0x304f;
+ case 0x3052:
+ return 0x3051;
+ case 0x3054:
+ return 0x3053;
+ case 0x3056:
+ return 0x3055;
+ case 0x3058:
+ return 0x3057;
+ case 0x305a:
+ return 0x3059;
+ case 0x305c:
+ return 0x305b;
+ case 0x305e:
+ return 0x305d;
+ case 0x3060:
+ return 0x305f;
+ case 0x3062:
+ return 0x3061;
+ case 0x3065:
+ return 0x3064;
+ case 0x3067:
+ return 0x3066;
+ case 0x3069:
+ return 0x3068;
+ case 0x3070:
+ return 0x306f;
+ case 0x3071:
+ return 0x306f;
+ case 0x3073:
+ return 0x3072;
+ case 0x3074:
+ return 0x3072;
+ case 0x3076:
+ return 0x3075;
+ case 0x3077:
+ return 0x3075;
+ case 0x3079:
+ return 0x3078;
+ case 0x307a:
+ return 0x3078;
+ case 0x307c:
+ return 0x307b;
+ case 0x307d:
+ return 0x307b;
+ case 0x3094:
+ return 0x3046;
+ case 0x309e:
+ return 0x309d;
+ case 0x30ac:
+ return 0x30ab;
+ case 0x30ae:
+ return 0x30ad;
+ case 0x30b0:
+ return 0x30af;
+ case 0x30b2:
+ return 0x30b1;
+ case 0x30b4:
+ return 0x30b3;
+ case 0x30b6:
+ return 0x30b5;
+ case 0x30b8:
+ return 0x30b7;
+ case 0x30ba:
+ return 0x30b9;
+ case 0x30bc:
+ return 0x30bb;
+ case 0x30be:
+ return 0x30bd;
+ case 0x30c0:
+ return 0x30bf;
+ case 0x30c2:
+ return 0x30c1;
+ case 0x30c5:
+ return 0x30c4;
+ case 0x30c7:
+ return 0x30c6;
+ case 0x30c9:
+ return 0x30c8;
+ case 0x30d0:
+ return 0x30cf;
+ case 0x30d1:
+ return 0x30cf;
+ case 0x30d3:
+ return 0x30d2;
+ case 0x30d4:
+ return 0x30d2;
+ case 0x30d6:
+ return 0x30d5;
+ case 0x30d7:
+ return 0x30d5;
+ case 0x30d9:
+ return 0x30d8;
+ case 0x30da:
+ return 0x30d8;
+ case 0x30dc:
+ return 0x30db;
+ case 0x30dd:
+ return 0x30db;
+ case 0x30f4:
+ return 0x30a6;
+ case 0x30f7:
+ return 0x30ef;
+ case 0x30f8:
+ return 0x30f0;
+ case 0x30f9:
+ return 0x30f1;
+ case 0x30fa:
+ return 0x30f2;
+ case 0x30fe:
+ return 0x30fd;
+ case 0xf900:
+ return 0x8c48;
+ case 0xf901:
+ return 0x66f4;
+ case 0xf902:
+ return 0x8eca;
+ case 0xf903:
+ return 0x8cc8;
+ case 0xf904:
+ return 0x6ed1;
+ case 0xf905:
+ return 0x4e32;
+ case 0xf906:
+ return 0x53e5;
+ case 0xf907:
+ return 0x9f9c;
+ case 0xf908:
+ return 0x9f9c;
+ case 0xf909:
+ return 0x5951;
+ case 0xf90a:
+ return 0x91d1;
+ case 0xf90b:
+ return 0x5587;
+ case 0xf90c:
+ return 0x5948;
+ case 0xf90d:
+ return 0x61f6;
+ case 0xf90e:
+ return 0x7669;
+ case 0xf90f:
+ return 0x7f85;
+ case 0xf910:
+ return 0x863f;
+ case 0xf911:
+ return 0x87ba;
+ case 0xf912:
+ return 0x88f8;
+ case 0xf913:
+ return 0x908f;
+ case 0xf914:
+ return 0x6a02;
+ case 0xf915:
+ return 0x6d1b;
+ case 0xf916:
+ return 0x70d9;
+ case 0xf917:
+ return 0x73de;
+ case 0xf918:
+ return 0x843d;
+ case 0xf919:
+ return 0x916a;
+ case 0xf91a:
+ return 0x99f1;
+ case 0xf91b:
+ return 0x4e82;
+ case 0xf91c:
+ return 0x5375;
+ case 0xf91d:
+ return 0x6b04;
+ case 0xf91e:
+ return 0x721b;
+ case 0xf91f:
+ return 0x862d;
+ case 0xf920:
+ return 0x9e1e;
+ case 0xf921:
+ return 0x5d50;
+ case 0xf922:
+ return 0x6feb;
+ case 0xf923:
+ return 0x85cd;
+ case 0xf924:
+ return 0x8964;
+ case 0xf925:
+ return 0x62c9;
+ case 0xf926:
+ return 0x81d8;
+ case 0xf927:
+ return 0x881f;
+ case 0xf928:
+ return 0x5eca;
+ case 0xf929:
+ return 0x6717;
+ case 0xf92a:
+ return 0x6d6a;
+ case 0xf92b:
+ return 0x72fc;
+ case 0xf92c:
+ return 0x90ce;
+ case 0xf92d:
+ return 0x4f86;
+ case 0xf92e:
+ return 0x51b7;
+ case 0xf92f:
+ return 0x52de;
+ case 0xf930:
+ return 0x64c4;
+ case 0xf931:
+ return 0x6ad3;
+ case 0xf932:
+ return 0x7210;
+ case 0xf933:
+ return 0x76e7;
+ case 0xf934:
+ return 0x8001;
+ case 0xf935:
+ return 0x8606;
+ case 0xf936:
+ return 0x865c;
+ case 0xf937:
+ return 0x8def;
+ case 0xf938:
+ return 0x9732;
+ case 0xf939:
+ return 0x9b6f;
+ case 0xf93a:
+ return 0x9dfa;
+ case 0xf93b:
+ return 0x788c;
+ case 0xf93c:
+ return 0x797f;
+ case 0xf93d:
+ return 0x7da0;
+ case 0xf93e:
+ return 0x83c9;
+ case 0xf93f:
+ return 0x9304;
+ case 0xf940:
+ return 0x9e7f;
+ case 0xf941:
+ return 0x8ad6;
+ case 0xf942:
+ return 0x58df;
+ case 0xf943:
+ return 0x5f04;
+ case 0xf944:
+ return 0x7c60;
+ case 0xf945:
+ return 0x807e;
+ case 0xf946:
+ return 0x7262;
+ case 0xf947:
+ return 0x78ca;
+ case 0xf948:
+ return 0x8cc2;
+ case 0xf949:
+ return 0x96f7;
+ case 0xf94a:
+ return 0x58d8;
+ case 0xf94b:
+ return 0x5c62;
+ case 0xf94c:
+ return 0x6a13;
+ case 0xf94d:
+ return 0x6dda;
+ case 0xf94e:
+ return 0x6f0f;
+ case 0xf94f:
+ return 0x7d2f;
+ case 0xf950:
+ return 0x7e37;
+ case 0xf951:
+ return 0x964b;
+ case 0xf952:
+ return 0x52d2;
+ case 0xf953:
+ return 0x808b;
+ case 0xf954:
+ return 0x51dc;
+ case 0xf955:
+ return 0x51cc;
+ case 0xf956:
+ return 0x7a1c;
+ case 0xf957:
+ return 0x7dbe;
+ case 0xf958:
+ return 0x83f1;
+ case 0xf959:
+ return 0x9675;
+ case 0xf95a:
+ return 0x8b80;
+ case 0xf95b:
+ return 0x62cf;
+ case 0xf95c:
+ return 0x6a02;
+ case 0xf95d:
+ return 0x8afe;
+ case 0xf95e:
+ return 0x4e39;
+ case 0xf95f:
+ return 0x5be7;
+ case 0xf960:
+ return 0x6012;
+ case 0xf961:
+ return 0x7387;
+ case 0xf962:
+ return 0x7570;
+ case 0xf963:
+ return 0x5317;
+ case 0xf964:
+ return 0x78fb;
+ case 0xf965:
+ return 0x4fbf;
+ case 0xf966:
+ return 0x5fa9;
+ case 0xf967:
+ return 0x4e0d;
+ case 0xf968:
+ return 0x6ccc;
+ case 0xf969:
+ return 0x6578;
+ case 0xf96a:
+ return 0x7d22;
+ case 0xf96b:
+ return 0x53c3;
+ case 0xf96c:
+ return 0x585e;
+ case 0xf96d:
+ return 0x7701;
+ case 0xf96e:
+ return 0x8449;
+ case 0xf96f:
+ return 0x8aaa;
+ case 0xf970:
+ return 0x6bba;
+ case 0xf971:
+ return 0x8fb0;
+ case 0xf972:
+ return 0x6c88;
+ case 0xf973:
+ return 0x62fe;
+ case 0xf974:
+ return 0x82e5;
+ case 0xf975:
+ return 0x63a0;
+ case 0xf976:
+ return 0x7565;
+ case 0xf977:
+ return 0x4eae;
+ case 0xf978:
+ return 0x5169;
+ case 0xf979:
+ return 0x51c9;
+ case 0xf97a:
+ return 0x6881;
+ case 0xf97b:
+ return 0x7ce7;
+ case 0xf97c:
+ return 0x826f;
+ case 0xf97d:
+ return 0x8ad2;
+ case 0xf97e:
+ return 0x91cf;
+ case 0xf97f:
+ return 0x52f5;
+ case 0xf980:
+ return 0x5442;
+ case 0xf981:
+ return 0x5973;
+ case 0xf982:
+ return 0x5eec;
+ case 0xf983:
+ return 0x65c5;
+ case 0xf984:
+ return 0x6ffe;
+ case 0xf985:
+ return 0x792a;
+ case 0xf986:
+ return 0x95ad;
+ case 0xf987:
+ return 0x9a6a;
+ case 0xf988:
+ return 0x9e97;
+ case 0xf989:
+ return 0x9ece;
+ case 0xf98a:
+ return 0x529b;
+ case 0xf98b:
+ return 0x66c6;
+ case 0xf98c:
+ return 0x6b77;
+ case 0xf98d:
+ return 0x8f62;
+ case 0xf98e:
+ return 0x5e74;
+ case 0xf98f:
+ return 0x6190;
+ case 0xf990:
+ return 0x6200;
+ case 0xf991:
+ return 0x649a;
+ case 0xf992:
+ return 0x6f23;
+ case 0xf993:
+ return 0x7149;
+ case 0xf994:
+ return 0x7489;
+ case 0xf995:
+ return 0x79ca;
+ case 0xf996:
+ return 0x7df4;
+ case 0xf997:
+ return 0x806f;
+ case 0xf998:
+ return 0x8f26;
+ case 0xf999:
+ return 0x84ee;
+ case 0xf99a:
+ return 0x9023;
+ case 0xf99b:
+ return 0x934a;
+ case 0xf99c:
+ return 0x5217;
+ case 0xf99d:
+ return 0x52a3;
+ case 0xf99e:
+ return 0x54bd;
+ case 0xf99f:
+ return 0x70c8;
+ case 0xf9a0:
+ return 0x88c2;
+ case 0xf9a1:
+ return 0x8aaa;
+ case 0xf9a2:
+ return 0x5ec9;
+ case 0xf9a3:
+ return 0x5ff5;
+ case 0xf9a4:
+ return 0x637b;
+ case 0xf9a5:
+ return 0x6bae;
+ case 0xf9a6:
+ return 0x7c3e;
+ case 0xf9a7:
+ return 0x7375;
+ case 0xf9a8:
+ return 0x4ee4;
+ case 0xf9a9:
+ return 0x56f9;
+ case 0xf9aa:
+ return 0x5be7;
+ case 0xf9ab:
+ return 0x5dba;
+ case 0xf9ac:
+ return 0x601c;
+ case 0xf9ad:
+ return 0x73b2;
+ case 0xf9ae:
+ return 0x7469;
+ case 0xf9af:
+ return 0x7f9a;
+ case 0xf9b0:
+ return 0x8046;
+ case 0xf9b1:
+ return 0x9234;
+ case 0xf9b2:
+ return 0x96f6;
+ case 0xf9b3:
+ return 0x9748;
+ case 0xf9b4:
+ return 0x9818;
+ case 0xf9b5:
+ return 0x4f8b;
+ case 0xf9b6:
+ return 0x79ae;
+ case 0xf9b7:
+ return 0x91b4;
+ case 0xf9b8:
+ return 0x96b8;
+ case 0xf9b9:
+ return 0x60e1;
+ case 0xf9ba:
+ return 0x4e86;
+ case 0xf9bb:
+ return 0x50da;
+ case 0xf9bc:
+ return 0x5bee;
+ case 0xf9bd:
+ return 0x5c3f;
+ case 0xf9be:
+ return 0x6599;
+ case 0xf9bf:
+ return 0x6a02;
+ case 0xf9c0:
+ return 0x71ce;
+ case 0xf9c1:
+ return 0x7642;
+ case 0xf9c2:
+ return 0x84fc;
+ case 0xf9c3:
+ return 0x907c;
+ case 0xf9c4:
+ return 0x9f8d;
+ case 0xf9c5:
+ return 0x6688;
+ case 0xf9c6:
+ return 0x962e;
+ case 0xf9c7:
+ return 0x5289;
+ case 0xf9c8:
+ return 0x677b;
+ case 0xf9c9:
+ return 0x67f3;
+ case 0xf9ca:
+ return 0x6d41;
+ case 0xf9cb:
+ return 0x6e9c;
+ case 0xf9cc:
+ return 0x7409;
+ case 0xf9cd:
+ return 0x7559;
+ case 0xf9ce:
+ return 0x786b;
+ case 0xf9cf:
+ return 0x7d10;
+ case 0xf9d0:
+ return 0x985e;
+ case 0xf9d1:
+ return 0x516d;
+ case 0xf9d2:
+ return 0x622e;
+ case 0xf9d3:
+ return 0x9678;
+ case 0xf9d4:
+ return 0x502b;
+ case 0xf9d5:
+ return 0x5d19;
+ case 0xf9d6:
+ return 0x6dea;
+ case 0xf9d7:
+ return 0x8f2a;
+ case 0xf9d8:
+ return 0x5f8b;
+ case 0xf9d9:
+ return 0x6144;
+ case 0xf9da:
+ return 0x6817;
+ case 0xf9db:
+ return 0x7387;
+ case 0xf9dc:
+ return 0x9686;
+ case 0xf9dd:
+ return 0x5229;
+ case 0xf9de:
+ return 0x540f;
+ case 0xf9df:
+ return 0x5c65;
+ case 0xf9e0:
+ return 0x6613;
+ case 0xf9e1:
+ return 0x674e;
+ case 0xf9e2:
+ return 0x68a8;
+ case 0xf9e3:
+ return 0x6ce5;
+ case 0xf9e4:
+ return 0x7406;
+ case 0xf9e5:
+ return 0x75e2;
+ case 0xf9e6:
+ return 0x7f79;
+ case 0xf9e7:
+ return 0x88cf;
+ case 0xf9e8:
+ return 0x88e1;
+ case 0xf9e9:
+ return 0x91cc;
+ case 0xf9ea:
+ return 0x96e2;
+ case 0xf9eb:
+ return 0x533f;
+ case 0xf9ec:
+ return 0x6eba;
+ case 0xf9ed:
+ return 0x541d;
+ case 0xf9ee:
+ return 0x71d0;
+ case 0xf9ef:
+ return 0x7498;
+ case 0xf9f0:
+ return 0x85fa;
+ case 0xf9f1:
+ return 0x96a3;
+ case 0xf9f2:
+ return 0x9c57;
+ case 0xf9f3:
+ return 0x9e9f;
+ case 0xf9f4:
+ return 0x6797;
+ case 0xf9f5:
+ return 0x6dcb;
+ case 0xf9f6:
+ return 0x81e8;
+ case 0xf9f7:
+ return 0x7acb;
+ case 0xf9f8:
+ return 0x7b20;
+ case 0xf9f9:
+ return 0x7c92;
+ case 0xf9fa:
+ return 0x72c0;
+ case 0xf9fb:
+ return 0x7099;
+ case 0xf9fc:
+ return 0x8b58;
+ case 0xf9fd:
+ return 0x4ec0;
+ case 0xf9fe:
+ return 0x8336;
+ case 0xf9ff:
+ return 0x523a;
+ case 0xfa00:
+ return 0x5207;
+ case 0xfa01:
+ return 0x5ea6;
+ case 0xfa02:
+ return 0x62d3;
+ case 0xfa03:
+ return 0x7cd6;
+ case 0xfa04:
+ return 0x5b85;
+ case 0xfa05:
+ return 0x6d1e;
+ case 0xfa06:
+ return 0x66b4;
+ case 0xfa07:
+ return 0x8f3b;
+ case 0xfa08:
+ return 0x884c;
+ case 0xfa09:
+ return 0x964d;
+ case 0xfa0a:
+ return 0x898b;
+ case 0xfa0b:
+ return 0x5ed3;
+ case 0xfa0c:
+ return 0x5140;
+ case 0xfa0d:
+ return 0x55c0;
+ case 0xfa10:
+ return 0x585a;
+ case 0xfa12:
+ return 0x6674;
+ case 0xfa15:
+ return 0x51de;
+ case 0xfa16:
+ return 0x732a;
+ case 0xfa17:
+ return 0x76ca;
+ case 0xfa18:
+ return 0x793c;
+ case 0xfa19:
+ return 0x795e;
+ case 0xfa1a:
+ return 0x7965;
+ case 0xfa1b:
+ return 0x798f;
+ case 0xfa1c:
+ return 0x9756;
+ case 0xfa1d:
+ return 0x7cbe;
+ case 0xfa1e:
+ return 0x7fbd;
+ case 0xfa20:
+ return 0x8612;
+ case 0xfa22:
+ return 0x8af8;
+ case 0xfa25:
+ return 0x9038;
+ case 0xfa26:
+ return 0x90fd;
+ case 0xfa2a:
+ return 0x98ef;
+ case 0xfa2b:
+ return 0x98fc;
+ case 0xfa2c:
+ return 0x9928;
+ case 0xfa2d:
+ return 0x9db4;
+ case 0xfa2e:
+ return 0x90de;
+ case 0xfa2f:
+ return 0x96b7;
+ case 0xfa30:
+ return 0x4fae;
+ case 0xfa31:
+ return 0x50e7;
+ case 0xfa32:
+ return 0x514d;
+ case 0xfa33:
+ return 0x52c9;
+ case 0xfa34:
+ return 0x52e4;
+ case 0xfa35:
+ return 0x5351;
+ case 0xfa36:
+ return 0x559d;
+ case 0xfa37:
+ return 0x5606;
+ case 0xfa38:
+ return 0x5668;
+ case 0xfa39:
+ return 0x5840;
+ case 0xfa3a:
+ return 0x58a8;
+ case 0xfa3b:
+ return 0x5c64;
+ case 0xfa3c:
+ return 0x5c6e;
+ case 0xfa3d:
+ return 0x6094;
+ case 0xfa3e:
+ return 0x6168;
+ case 0xfa3f:
+ return 0x618e;
+ case 0xfa40:
+ return 0x61f2;
+ case 0xfa41:
+ return 0x654f;
+ case 0xfa42:
+ return 0x65e2;
+ case 0xfa43:
+ return 0x6691;
+ case 0xfa44:
+ return 0x6885;
+ case 0xfa45:
+ return 0x6d77;
+ case 0xfa46:
+ return 0x6e1a;
+ case 0xfa47:
+ return 0x6f22;
+ case 0xfa48:
+ return 0x716e;
+ case 0xfa49:
+ return 0x722b;
+ case 0xfa4a:
+ return 0x7422;
+ case 0xfa4b:
+ return 0x7891;
+ case 0xfa4c:
+ return 0x793e;
+ case 0xfa4d:
+ return 0x7949;
+ case 0xfa4e:
+ return 0x7948;
+ case 0xfa4f:
+ return 0x7950;
+ case 0xfa50:
+ return 0x7956;
+ case 0xfa51:
+ return 0x795d;
+ case 0xfa52:
+ return 0x798d;
+ case 0xfa53:
+ return 0x798e;
+ case 0xfa54:
+ return 0x7a40;
+ case 0xfa55:
+ return 0x7a81;
+ case 0xfa56:
+ return 0x7bc0;
+ case 0xfa57:
+ return 0x7df4;
+ case 0xfa58:
+ return 0x7e09;
+ case 0xfa59:
+ return 0x7e41;
+ case 0xfa5a:
+ return 0x7f72;
+ case 0xfa5b:
+ return 0x8005;
+ case 0xfa5c:
+ return 0x81ed;
+ case 0xfa5d:
+ return 0x8279;
+ case 0xfa5e:
+ return 0x8279;
+ case 0xfa5f:
+ return 0x8457;
+ case 0xfa60:
+ return 0x8910;
+ case 0xfa61:
+ return 0x8996;
+ case 0xfa62:
+ return 0x8b01;
+ case 0xfa63:
+ return 0x8b39;
+ case 0xfa64:
+ return 0x8cd3;
+ case 0xfa65:
+ return 0x8d08;
+ case 0xfa66:
+ return 0x8fb6;
+ case 0xfa67:
+ return 0x9038;
+ case 0xfa68:
+ return 0x96e3;
+ case 0xfa69:
+ return 0x97ff;
+ case 0xfa6a:
+ return 0x983b;
+ case 0xfa6b:
+ return 0x6075;
+ case 0xfa6c:
+ return 0x242ee;
+ case 0xfa6d:
+ return 0x8218;
+ case 0xfa70:
+ return 0x4e26;
+ case 0xfa71:
+ return 0x51b5;
+ case 0xfa72:
+ return 0x5168;
+ case 0xfa73:
+ return 0x4f80;
+ case 0xfa74:
+ return 0x5145;
+ case 0xfa75:
+ return 0x5180;
+ case 0xfa76:
+ return 0x52c7;
+ case 0xfa77:
+ return 0x52fa;
+ case 0xfa78:
+ return 0x559d;
+ case 0xfa79:
+ return 0x5555;
+ case 0xfa7a:
+ return 0x5599;
+ case 0xfa7b:
+ return 0x55e2;
+ case 0xfa7c:
+ return 0x585a;
+ case 0xfa7d:
+ return 0x58b3;
+ case 0xfa7e:
+ return 0x5944;
+ case 0xfa7f:
+ return 0x5954;
+ case 0xfa80:
+ return 0x5a62;
+ case 0xfa81:
+ return 0x5b28;
+ case 0xfa82:
+ return 0x5ed2;
+ case 0xfa83:
+ return 0x5ed9;
+ case 0xfa84:
+ return 0x5f69;
+ case 0xfa85:
+ return 0x5fad;
+ case 0xfa86:
+ return 0x60d8;
+ case 0xfa87:
+ return 0x614e;
+ case 0xfa88:
+ return 0x6108;
+ case 0xfa89:
+ return 0x618e;
+ case 0xfa8a:
+ return 0x6160;
+ case 0xfa8b:
+ return 0x61f2;
+ case 0xfa8c:
+ return 0x6234;
+ case 0xfa8d:
+ return 0x63c4;
+ case 0xfa8e:
+ return 0x641c;
+ case 0xfa8f:
+ return 0x6452;
+ case 0xfa90:
+ return 0x6556;
+ case 0xfa91:
+ return 0x6674;
+ case 0xfa92:
+ return 0x6717;
+ case 0xfa93:
+ return 0x671b;
+ case 0xfa94:
+ return 0x6756;
+ case 0xfa95:
+ return 0x6b79;
+ case 0xfa96:
+ return 0x6bba;
+ case 0xfa97:
+ return 0x6d41;
+ case 0xfa98:
+ return 0x6edb;
+ case 0xfa99:
+ return 0x6ecb;
+ case 0xfa9a:
+ return 0x6f22;
+ case 0xfa9b:
+ return 0x701e;
+ case 0xfa9c:
+ return 0x716e;
+ case 0xfa9d:
+ return 0x77a7;
+ case 0xfa9e:
+ return 0x7235;
+ case 0xfa9f:
+ return 0x72af;
+ case 0xfaa0:
+ return 0x732a;
+ case 0xfaa1:
+ return 0x7471;
+ case 0xfaa2:
+ return 0x7506;
+ case 0xfaa3:
+ return 0x753b;
+ case 0xfaa4:
+ return 0x761d;
+ case 0xfaa5:
+ return 0x761f;
+ case 0xfaa6:
+ return 0x76ca;
+ case 0xfaa7:
+ return 0x76db;
+ case 0xfaa8:
+ return 0x76f4;
+ case 0xfaa9:
+ return 0x774a;
+ case 0xfaaa:
+ return 0x7740;
+ case 0xfaab:
+ return 0x78cc;
+ case 0xfaac:
+ return 0x7ab1;
+ case 0xfaad:
+ return 0x7bc0;
+ case 0xfaae:
+ return 0x7c7b;
+ case 0xfaaf:
+ return 0x7d5b;
+ case 0xfab0:
+ return 0x7df4;
+ case 0xfab1:
+ return 0x7f3e;
+ case 0xfab2:
+ return 0x8005;
+ case 0xfab3:
+ return 0x8352;
+ case 0xfab4:
+ return 0x83ef;
+ case 0xfab5:
+ return 0x8779;
+ case 0xfab6:
+ return 0x8941;
+ case 0xfab7:
+ return 0x8986;
+ case 0xfab8:
+ return 0x8996;
+ case 0xfab9:
+ return 0x8abf;
+ case 0xfaba:
+ return 0x8af8;
+ case 0xfabb:
+ return 0x8acb;
+ case 0xfabc:
+ return 0x8b01;
+ case 0xfabd:
+ return 0x8afe;
+ case 0xfabe:
+ return 0x8aed;
+ case 0xfabf:
+ return 0x8b39;
+ case 0xfac0:
+ return 0x8b8a;
+ case 0xfac1:
+ return 0x8d08;
+ case 0xfac2:
+ return 0x8f38;
+ case 0xfac3:
+ return 0x9072;
+ case 0xfac4:
+ return 0x9199;
+ case 0xfac5:
+ return 0x9276;
+ case 0xfac6:
+ return 0x967c;
+ case 0xfac7:
+ return 0x96e3;
+ case 0xfac8:
+ return 0x9756;
+ case 0xfac9:
+ return 0x97db;
+ case 0xfaca:
+ return 0x97ff;
+ case 0xfacb:
+ return 0x980b;
+ case 0xfacc:
+ return 0x983b;
+ case 0xfacd:
+ return 0x9b12;
+ case 0xface:
+ return 0x9f9c;
+ case 0xfacf:
+ return 0x2284a;
+ case 0xfad0:
+ return 0x22844;
+ case 0xfad1:
+ return 0x233d5;
+ case 0xfad2:
+ return 0x3b9d;
+ case 0xfad3:
+ return 0x4018;
+ case 0xfad4:
+ return 0x4039;
+ case 0xfad5:
+ return 0x25249;
+ case 0xfad6:
+ return 0x25cd0;
+ case 0xfad7:
+ return 0x27ed3;
+ case 0xfad8:
+ return 0x9f43;
+ case 0xfad9:
+ return 0x9f8e;
+ case 0xfb1d:
+ return 0x5d9;
+ case 0xfb1f:
+ return 0x5f2;
+ case 0xfb2a:
+ return 0x5e9;
+ case 0xfb2b:
+ return 0x5e9;
+ case 0xfb2c:
+ return 0x5e9;
+ case 0xfb2d:
+ return 0x5e9;
+ case 0xfb2e:
+ return 0x5d0;
+ case 0xfb2f:
+ return 0x5d0;
+ case 0xfb30:
+ return 0x5d0;
+ case 0xfb31:
+ return 0x5d1;
+ case 0xfb32:
+ return 0x5d2;
+ case 0xfb33:
+ return 0x5d3;
+ case 0xfb34:
+ return 0x5d4;
+ case 0xfb35:
+ return 0x5d5;
+ case 0xfb36:
+ return 0x5d6;
+ case 0xfb38:
+ return 0x5d8;
+ case 0xfb39:
+ return 0x5d9;
+ case 0xfb3a:
+ return 0x5da;
+ case 0xfb3b:
+ return 0x5db;
+ case 0xfb3c:
+ return 0x5dc;
+ case 0xfb3e:
+ return 0x5de;
+ case 0xfb40:
+ return 0x5e0;
+ case 0xfb41:
+ return 0x5e1;
+ case 0xfb43:
+ return 0x5e3;
+ case 0xfb44:
+ return 0x5e4;
+ case 0xfb46:
+ return 0x5e6;
+ case 0xfb47:
+ return 0x5e7;
+ case 0xfb48:
+ return 0x5e8;
+ case 0xfb49:
+ return 0x5e9;
+ case 0xfb4a:
+ return 0x5ea;
+ case 0xfb4b:
+ return 0x5d5;
+ case 0xfb4c:
+ return 0x5d1;
+ case 0xfb4d:
+ return 0x5db;
+ case 0xfb4e:
+ return 0x5e4;
+ case 0x1109a:
+ return 0x11099;
+ case 0x1109c:
+ return 0x1109b;
+ case 0x110ab:
+ return 0x110a5;
+ case 0x2f800:
+ return 0x4e3d;
+ case 0x2f801:
+ return 0x4e38;
+ case 0x2f802:
+ return 0x4e41;
+ case 0x2f803:
+ return 0x20122;
+ case 0x2f804:
+ return 0x4f60;
+ case 0x2f805:
+ return 0x4fae;
+ case 0x2f806:
+ return 0x4fbb;
+ case 0x2f807:
+ return 0x5002;
+ case 0x2f808:
+ return 0x507a;
+ case 0x2f809:
+ return 0x5099;
+ case 0x2f80a:
+ return 0x50e7;
+ case 0x2f80b:
+ return 0x50cf;
+ case 0x2f80c:
+ return 0x349e;
+ case 0x2f80d:
+ return 0x2063a;
+ case 0x2f80e:
+ return 0x514d;
+ case 0x2f80f:
+ return 0x5154;
+ case 0x2f810:
+ return 0x5164;
+ case 0x2f811:
+ return 0x5177;
+ case 0x2f812:
+ return 0x2051c;
+ case 0x2f813:
+ return 0x34b9;
+ case 0x2f814:
+ return 0x5167;
+ case 0x2f815:
+ return 0x518d;
+ case 0x2f816:
+ return 0x2054b;
+ case 0x2f817:
+ return 0x5197;
+ case 0x2f818:
+ return 0x51a4;
+ case 0x2f819:
+ return 0x4ecc;
+ case 0x2f81a:
+ return 0x51ac;
+ case 0x2f81b:
+ return 0x51b5;
+ case 0x2f81c:
+ return 0x291df;
+ case 0x2f81d:
+ return 0x51f5;
+ case 0x2f81e:
+ return 0x5203;
+ case 0x2f81f:
+ return 0x34df;
+ case 0x2f820:
+ return 0x523b;
+ case 0x2f821:
+ return 0x5246;
+ case 0x2f822:
+ return 0x5272;
+ case 0x2f823:
+ return 0x5277;
+ case 0x2f824:
+ return 0x3515;
+ case 0x2f825:
+ return 0x52c7;
+ case 0x2f826:
+ return 0x52c9;
+ case 0x2f827:
+ return 0x52e4;
+ case 0x2f828:
+ return 0x52fa;
+ case 0x2f829:
+ return 0x5305;
+ case 0x2f82a:
+ return 0x5306;
+ case 0x2f82b:
+ return 0x5317;
+ case 0x2f82c:
+ return 0x5349;
+ case 0x2f82d:
+ return 0x5351;
+ case 0x2f82e:
+ return 0x535a;
+ case 0x2f82f:
+ return 0x5373;
+ case 0x2f830:
+ return 0x537d;
+ case 0x2f831:
+ return 0x537f;
+ case 0x2f832:
+ return 0x537f;
+ case 0x2f833:
+ return 0x537f;
+ case 0x2f834:
+ return 0x20a2c;
+ case 0x2f835:
+ return 0x7070;
+ case 0x2f836:
+ return 0x53ca;
+ case 0x2f837:
+ return 0x53df;
+ case 0x2f838:
+ return 0x20b63;
+ case 0x2f839:
+ return 0x53eb;
+ case 0x2f83a:
+ return 0x53f1;
+ case 0x2f83b:
+ return 0x5406;
+ case 0x2f83c:
+ return 0x549e;
+ case 0x2f83d:
+ return 0x5438;
+ case 0x2f83e:
+ return 0x5448;
+ case 0x2f83f:
+ return 0x5468;
+ case 0x2f840:
+ return 0x54a2;
+ case 0x2f841:
+ return 0x54f6;
+ case 0x2f842:
+ return 0x5510;
+ case 0x2f843:
+ return 0x5553;
+ case 0x2f844:
+ return 0x5563;
+ case 0x2f845:
+ return 0x5584;
+ case 0x2f846:
+ return 0x5584;
+ case 0x2f847:
+ return 0x5599;
+ case 0x2f848:
+ return 0x55ab;
+ case 0x2f849:
+ return 0x55b3;
+ case 0x2f84a:
+ return 0x55c2;
+ case 0x2f84b:
+ return 0x5716;
+ case 0x2f84c:
+ return 0x5606;
+ case 0x2f84d:
+ return 0x5717;
+ case 0x2f84e:
+ return 0x5651;
+ case 0x2f84f:
+ return 0x5674;
+ case 0x2f850:
+ return 0x5207;
+ case 0x2f851:
+ return 0x58ee;
+ case 0x2f852:
+ return 0x57ce;
+ case 0x2f853:
+ return 0x57f4;
+ case 0x2f854:
+ return 0x580d;
+ case 0x2f855:
+ return 0x578b;
+ case 0x2f856:
+ return 0x5832;
+ case 0x2f857:
+ return 0x5831;
+ case 0x2f858:
+ return 0x58ac;
+ case 0x2f859:
+ return 0x214e4;
+ case 0x2f85a:
+ return 0x58f2;
+ case 0x2f85b:
+ return 0x58f7;
+ case 0x2f85c:
+ return 0x5906;
+ case 0x2f85d:
+ return 0x591a;
+ case 0x2f85e:
+ return 0x5922;
+ case 0x2f85f:
+ return 0x5962;
+ case 0x2f860:
+ return 0x216a8;
+ case 0x2f861:
+ return 0x216ea;
+ case 0x2f862:
+ return 0x59ec;
+ case 0x2f863:
+ return 0x5a1b;
+ case 0x2f864:
+ return 0x5a27;
+ case 0x2f865:
+ return 0x59d8;
+ case 0x2f866:
+ return 0x5a66;
+ case 0x2f867:
+ return 0x36ee;
+ case 0x2f868:
+ return 0x36fc;
+ case 0x2f869:
+ return 0x5b08;
+ case 0x2f86a:
+ return 0x5b3e;
+ case 0x2f86b:
+ return 0x5b3e;
+ case 0x2f86c:
+ return 0x219c8;
+ case 0x2f86d:
+ return 0x5bc3;
+ case 0x2f86e:
+ return 0x5bd8;
+ case 0x2f86f:
+ return 0x5be7;
+ case 0x2f870:
+ return 0x5bf3;
+ case 0x2f871:
+ return 0x21b18;
+ case 0x2f872:
+ return 0x5bff;
+ case 0x2f873:
+ return 0x5c06;
+ case 0x2f874:
+ return 0x5f53;
+ case 0x2f875:
+ return 0x5c22;
+ case 0x2f876:
+ return 0x3781;
+ case 0x2f877:
+ return 0x5c60;
+ case 0x2f878:
+ return 0x5c6e;
+ case 0x2f879:
+ return 0x5cc0;
+ case 0x2f87a:
+ return 0x5c8d;
+ case 0x2f87b:
+ return 0x21de4;
+ case 0x2f87c:
+ return 0x5d43;
+ case 0x2f87d:
+ return 0x21de6;
+ case 0x2f87e:
+ return 0x5d6e;
+ case 0x2f87f:
+ return 0x5d6b;
+ case 0x2f880:
+ return 0x5d7c;
+ case 0x2f881:
+ return 0x5de1;
+ case 0x2f882:
+ return 0x5de2;
+ case 0x2f883:
+ return 0x382f;
+ case 0x2f884:
+ return 0x5dfd;
+ case 0x2f885:
+ return 0x5e28;
+ case 0x2f886:
+ return 0x5e3d;
+ case 0x2f887:
+ return 0x5e69;
+ case 0x2f888:
+ return 0x3862;
+ case 0x2f889:
+ return 0x22183;
+ case 0x2f88a:
+ return 0x387c;
+ case 0x2f88b:
+ return 0x5eb0;
+ case 0x2f88c:
+ return 0x5eb3;
+ case 0x2f88d:
+ return 0x5eb6;
+ case 0x2f88e:
+ return 0x5eca;
+ case 0x2f88f:
+ return 0x2a392;
+ case 0x2f890:
+ return 0x5efe;
+ case 0x2f891:
+ return 0x22331;
+ case 0x2f892:
+ return 0x22331;
+ case 0x2f893:
+ return 0x8201;
+ case 0x2f894:
+ return 0x5f22;
+ case 0x2f895:
+ return 0x5f22;
+ case 0x2f896:
+ return 0x38c7;
+ case 0x2f897:
+ return 0x232b8;
+ case 0x2f898:
+ return 0x261da;
+ case 0x2f899:
+ return 0x5f62;
+ case 0x2f89a:
+ return 0x5f6b;
+ case 0x2f89b:
+ return 0x38e3;
+ case 0x2f89c:
+ return 0x5f9a;
+ case 0x2f89d:
+ return 0x5fcd;
+ case 0x2f89e:
+ return 0x5fd7;
+ case 0x2f89f:
+ return 0x5ff9;
+ case 0x2f8a0:
+ return 0x6081;
+ case 0x2f8a1:
+ return 0x393a;
+ case 0x2f8a2:
+ return 0x391c;
+ case 0x2f8a3:
+ return 0x6094;
+ case 0x2f8a4:
+ return 0x226d4;
+ case 0x2f8a5:
+ return 0x60c7;
+ case 0x2f8a6:
+ return 0x6148;
+ case 0x2f8a7:
+ return 0x614c;
+ case 0x2f8a8:
+ return 0x614e;
+ case 0x2f8a9:
+ return 0x614c;
+ case 0x2f8aa:
+ return 0x617a;
+ case 0x2f8ab:
+ return 0x618e;
+ case 0x2f8ac:
+ return 0x61b2;
+ case 0x2f8ad:
+ return 0x61a4;
+ case 0x2f8ae:
+ return 0x61af;
+ case 0x2f8af:
+ return 0x61de;
+ case 0x2f8b0:
+ return 0x61f2;
+ case 0x2f8b1:
+ return 0x61f6;
+ case 0x2f8b2:
+ return 0x6210;
+ case 0x2f8b3:
+ return 0x621b;
+ case 0x2f8b4:
+ return 0x625d;
+ case 0x2f8b5:
+ return 0x62b1;
+ case 0x2f8b6:
+ return 0x62d4;
+ case 0x2f8b7:
+ return 0x6350;
+ case 0x2f8b8:
+ return 0x22b0c;
+ case 0x2f8b9:
+ return 0x633d;
+ case 0x2f8ba:
+ return 0x62fc;
+ case 0x2f8bb:
+ return 0x6368;
+ case 0x2f8bc:
+ return 0x6383;
+ case 0x2f8bd:
+ return 0x63e4;
+ case 0x2f8be:
+ return 0x22bf1;
+ case 0x2f8bf:
+ return 0x6422;
+ case 0x2f8c0:
+ return 0x63c5;
+ case 0x2f8c1:
+ return 0x63a9;
+ case 0x2f8c2:
+ return 0x3a2e;
+ case 0x2f8c3:
+ return 0x6469;
+ case 0x2f8c4:
+ return 0x647e;
+ case 0x2f8c5:
+ return 0x649d;
+ case 0x2f8c6:
+ return 0x6477;
+ case 0x2f8c7:
+ return 0x3a6c;
+ case 0x2f8c8:
+ return 0x654f;
+ case 0x2f8c9:
+ return 0x656c;
+ case 0x2f8ca:
+ return 0x2300a;
+ case 0x2f8cb:
+ return 0x65e3;
+ case 0x2f8cc:
+ return 0x66f8;
+ case 0x2f8cd:
+ return 0x6649;
+ case 0x2f8ce:
+ return 0x3b19;
+ case 0x2f8cf:
+ return 0x6691;
+ case 0x2f8d0:
+ return 0x3b08;
+ case 0x2f8d1:
+ return 0x3ae4;
+ case 0x2f8d2:
+ return 0x5192;
+ case 0x2f8d3:
+ return 0x5195;
+ case 0x2f8d4:
+ return 0x6700;
+ case 0x2f8d5:
+ return 0x669c;
+ case 0x2f8d6:
+ return 0x80ad;
+ case 0x2f8d7:
+ return 0x43d9;
+ case 0x2f8d8:
+ return 0x6717;
+ case 0x2f8d9:
+ return 0x671b;
+ case 0x2f8da:
+ return 0x6721;
+ case 0x2f8db:
+ return 0x675e;
+ case 0x2f8dc:
+ return 0x6753;
+ case 0x2f8dd:
+ return 0x233c3;
+ case 0x2f8de:
+ return 0x3b49;
+ case 0x2f8df:
+ return 0x67fa;
+ case 0x2f8e0:
+ return 0x6785;
+ case 0x2f8e1:
+ return 0x6852;
+ case 0x2f8e2:
+ return 0x6885;
+ case 0x2f8e3:
+ return 0x2346d;
+ case 0x2f8e4:
+ return 0x688e;
+ case 0x2f8e5:
+ return 0x681f;
+ case 0x2f8e6:
+ return 0x6914;
+ case 0x2f8e7:
+ return 0x3b9d;
+ case 0x2f8e8:
+ return 0x6942;
+ case 0x2f8e9:
+ return 0x69a3;
+ case 0x2f8ea:
+ return 0x69ea;
+ case 0x2f8eb:
+ return 0x6aa8;
+ case 0x2f8ec:
+ return 0x236a3;
+ case 0x2f8ed:
+ return 0x6adb;
+ case 0x2f8ee:
+ return 0x3c18;
+ case 0x2f8ef:
+ return 0x6b21;
+ case 0x2f8f0:
+ return 0x238a7;
+ case 0x2f8f1:
+ return 0x6b54;
+ case 0x2f8f2:
+ return 0x3c4e;
+ case 0x2f8f3:
+ return 0x6b72;
+ case 0x2f8f4:
+ return 0x6b9f;
+ case 0x2f8f5:
+ return 0x6bba;
+ case 0x2f8f6:
+ return 0x6bbb;
+ case 0x2f8f7:
+ return 0x23a8d;
+ case 0x2f8f8:
+ return 0x21d0b;
+ case 0x2f8f9:
+ return 0x23afa;
+ case 0x2f8fa:
+ return 0x6c4e;
+ case 0x2f8fb:
+ return 0x23cbc;
+ case 0x2f8fc:
+ return 0x6cbf;
+ case 0x2f8fd:
+ return 0x6ccd;
+ case 0x2f8fe:
+ return 0x6c67;
+ case 0x2f8ff:
+ return 0x6d16;
+ case 0x2f900:
+ return 0x6d3e;
+ case 0x2f901:
+ return 0x6d77;
+ case 0x2f902:
+ return 0x6d41;
+ case 0x2f903:
+ return 0x6d69;
+ case 0x2f904:
+ return 0x6d78;
+ case 0x2f905:
+ return 0x6d85;
+ case 0x2f906:
+ return 0x23d1e;
+ case 0x2f907:
+ return 0x6d34;
+ case 0x2f908:
+ return 0x6e2f;
+ case 0x2f909:
+ return 0x6e6e;
+ case 0x2f90a:
+ return 0x3d33;
+ case 0x2f90b:
+ return 0x6ecb;
+ case 0x2f90c:
+ return 0x6ec7;
+ case 0x2f90d:
+ return 0x23ed1;
+ case 0x2f90e:
+ return 0x6df9;
+ case 0x2f90f:
+ return 0x6f6e;
+ case 0x2f910:
+ return 0x23f5e;
+ case 0x2f911:
+ return 0x23f8e;
+ case 0x2f912:
+ return 0x6fc6;
+ case 0x2f913:
+ return 0x7039;
+ case 0x2f914:
+ return 0x701e;
+ case 0x2f915:
+ return 0x701b;
+ case 0x2f916:
+ return 0x3d96;
+ case 0x2f917:
+ return 0x704a;
+ case 0x2f918:
+ return 0x707d;
+ case 0x2f919:
+ return 0x7077;
+ case 0x2f91a:
+ return 0x70ad;
+ case 0x2f91b:
+ return 0x20525;
+ case 0x2f91c:
+ return 0x7145;
+ case 0x2f91d:
+ return 0x24263;
+ case 0x2f91e:
+ return 0x719c;
+ case 0x2f91f:
+ return 0x243ab;
+ case 0x2f920:
+ return 0x7228;
+ case 0x2f921:
+ return 0x7235;
+ case 0x2f922:
+ return 0x7250;
+ case 0x2f923:
+ return 0x24608;
+ case 0x2f924:
+ return 0x7280;
+ case 0x2f925:
+ return 0x7295;
+ case 0x2f926:
+ return 0x24735;
+ case 0x2f927:
+ return 0x24814;
+ case 0x2f928:
+ return 0x737a;
+ case 0x2f929:
+ return 0x738b;
+ case 0x2f92a:
+ return 0x3eac;
+ case 0x2f92b:
+ return 0x73a5;
+ case 0x2f92c:
+ return 0x3eb8;
+ case 0x2f92d:
+ return 0x3eb8;
+ case 0x2f92e:
+ return 0x7447;
+ case 0x2f92f:
+ return 0x745c;
+ case 0x2f930:
+ return 0x7471;
+ case 0x2f931:
+ return 0x7485;
+ case 0x2f932:
+ return 0x74ca;
+ case 0x2f933:
+ return 0x3f1b;
+ case 0x2f934:
+ return 0x7524;
+ case 0x2f935:
+ return 0x24c36;
+ case 0x2f936:
+ return 0x753e;
+ case 0x2f937:
+ return 0x24c92;
+ case 0x2f938:
+ return 0x7570;
+ case 0x2f939:
+ return 0x2219f;
+ case 0x2f93a:
+ return 0x7610;
+ case 0x2f93b:
+ return 0x24fa1;
+ case 0x2f93c:
+ return 0x24fb8;
+ case 0x2f93d:
+ return 0x25044;
+ case 0x2f93e:
+ return 0x3ffc;
+ case 0x2f93f:
+ return 0x4008;
+ case 0x2f940:
+ return 0x76f4;
+ case 0x2f941:
+ return 0x250f3;
+ case 0x2f942:
+ return 0x250f2;
+ case 0x2f943:
+ return 0x25119;
+ case 0x2f944:
+ return 0x25133;
+ case 0x2f945:
+ return 0x771e;
+ case 0x2f946:
+ return 0x771f;
+ case 0x2f947:
+ return 0x771f;
+ case 0x2f948:
+ return 0x774a;
+ case 0x2f949:
+ return 0x4039;
+ case 0x2f94a:
+ return 0x778b;
+ case 0x2f94b:
+ return 0x4046;
+ case 0x2f94c:
+ return 0x4096;
+ case 0x2f94d:
+ return 0x2541d;
+ case 0x2f94e:
+ return 0x784e;
+ case 0x2f94f:
+ return 0x788c;
+ case 0x2f950:
+ return 0x78cc;
+ case 0x2f951:
+ return 0x40e3;
+ case 0x2f952:
+ return 0x25626;
+ case 0x2f953:
+ return 0x7956;
+ case 0x2f954:
+ return 0x2569a;
+ case 0x2f955:
+ return 0x256c5;
+ case 0x2f956:
+ return 0x798f;
+ case 0x2f957:
+ return 0x79eb;
+ case 0x2f958:
+ return 0x412f;
+ case 0x2f959:
+ return 0x7a40;
+ case 0x2f95a:
+ return 0x7a4a;
+ case 0x2f95b:
+ return 0x7a4f;
+ case 0x2f95c:
+ return 0x2597c;
+ case 0x2f95d:
+ return 0x25aa7;
+ case 0x2f95e:
+ return 0x25aa7;
+ case 0x2f95f:
+ return 0x7aee;
+ case 0x2f960:
+ return 0x4202;
+ case 0x2f961:
+ return 0x25bab;
+ case 0x2f962:
+ return 0x7bc6;
+ case 0x2f963:
+ return 0x7bc9;
+ case 0x2f964:
+ return 0x4227;
+ case 0x2f965:
+ return 0x25c80;
+ case 0x2f966:
+ return 0x7cd2;
+ case 0x2f967:
+ return 0x42a0;
+ case 0x2f968:
+ return 0x7ce8;
+ case 0x2f969:
+ return 0x7ce3;
+ case 0x2f96a:
+ return 0x7d00;
+ case 0x2f96b:
+ return 0x25f86;
+ case 0x2f96c:
+ return 0x7d63;
+ case 0x2f96d:
+ return 0x4301;
+ case 0x2f96e:
+ return 0x7dc7;
+ case 0x2f96f:
+ return 0x7e02;
+ case 0x2f970:
+ return 0x7e45;
+ case 0x2f971:
+ return 0x4334;
+ case 0x2f972:
+ return 0x26228;
+ case 0x2f973:
+ return 0x26247;
+ case 0x2f974:
+ return 0x4359;
+ case 0x2f975:
+ return 0x262d9;
+ case 0x2f976:
+ return 0x7f7a;
+ case 0x2f977:
+ return 0x2633e;
+ case 0x2f978:
+ return 0x7f95;
+ case 0x2f979:
+ return 0x7ffa;
+ case 0x2f97a:
+ return 0x8005;
+ case 0x2f97b:
+ return 0x264da;
+ case 0x2f97c:
+ return 0x26523;
+ case 0x2f97d:
+ return 0x8060;
+ case 0x2f97e:
+ return 0x265a8;
+ case 0x2f97f:
+ return 0x8070;
+ case 0x2f980:
+ return 0x2335f;
+ case 0x2f981:
+ return 0x43d5;
+ case 0x2f982:
+ return 0x80b2;
+ case 0x2f983:
+ return 0x8103;
+ case 0x2f984:
+ return 0x440b;
+ case 0x2f985:
+ return 0x813e;
+ case 0x2f986:
+ return 0x5ab5;
+ case 0x2f987:
+ return 0x267a7;
+ case 0x2f988:
+ return 0x267b5;
+ case 0x2f989:
+ return 0x23393;
+ case 0x2f98a:
+ return 0x2339c;
+ case 0x2f98b:
+ return 0x8201;
+ case 0x2f98c:
+ return 0x8204;
+ case 0x2f98d:
+ return 0x8f9e;
+ case 0x2f98e:
+ return 0x446b;
+ case 0x2f98f:
+ return 0x8291;
+ case 0x2f990:
+ return 0x828b;
+ case 0x2f991:
+ return 0x829d;
+ case 0x2f992:
+ return 0x52b3;
+ case 0x2f993:
+ return 0x82b1;
+ case 0x2f994:
+ return 0x82b3;
+ case 0x2f995:
+ return 0x82bd;
+ case 0x2f996:
+ return 0x82e6;
+ case 0x2f997:
+ return 0x26b3c;
+ case 0x2f998:
+ return 0x82e5;
+ case 0x2f999:
+ return 0x831d;
+ case 0x2f99a:
+ return 0x8363;
+ case 0x2f99b:
+ return 0x83ad;
+ case 0x2f99c:
+ return 0x8323;
+ case 0x2f99d:
+ return 0x83bd;
+ case 0x2f99e:
+ return 0x83e7;
+ case 0x2f99f:
+ return 0x8457;
+ case 0x2f9a0:
+ return 0x8353;
+ case 0x2f9a1:
+ return 0x83ca;
+ case 0x2f9a2:
+ return 0x83cc;
+ case 0x2f9a3:
+ return 0x83dc;
+ case 0x2f9a4:
+ return 0x26c36;
+ case 0x2f9a5:
+ return 0x26d6b;
+ case 0x2f9a6:
+ return 0x26cd5;
+ case 0x2f9a7:
+ return 0x452b;
+ case 0x2f9a8:
+ return 0x84f1;
+ case 0x2f9a9:
+ return 0x84f3;
+ case 0x2f9aa:
+ return 0x8516;
+ case 0x2f9ab:
+ return 0x273ca;
+ case 0x2f9ac:
+ return 0x8564;
+ case 0x2f9ad:
+ return 0x26f2c;
+ case 0x2f9ae:
+ return 0x455d;
+ case 0x2f9af:
+ return 0x4561;
+ case 0x2f9b0:
+ return 0x26fb1;
+ case 0x2f9b1:
+ return 0x270d2;
+ case 0x2f9b2:
+ return 0x456b;
+ case 0x2f9b3:
+ return 0x8650;
+ case 0x2f9b4:
+ return 0x865c;
+ case 0x2f9b5:
+ return 0x8667;
+ case 0x2f9b6:
+ return 0x8669;
+ case 0x2f9b7:
+ return 0x86a9;
+ case 0x2f9b8:
+ return 0x8688;
+ case 0x2f9b9:
+ return 0x870e;
+ case 0x2f9ba:
+ return 0x86e2;
+ case 0x2f9bb:
+ return 0x8779;
+ case 0x2f9bc:
+ return 0x8728;
+ case 0x2f9bd:
+ return 0x876b;
+ case 0x2f9be:
+ return 0x8786;
+ case 0x2f9bf:
+ return 0x45d7;
+ case 0x2f9c0:
+ return 0x87e1;
+ case 0x2f9c1:
+ return 0x8801;
+ case 0x2f9c2:
+ return 0x45f9;
+ case 0x2f9c3:
+ return 0x8860;
+ case 0x2f9c4:
+ return 0x8863;
+ case 0x2f9c5:
+ return 0x27667;
+ case 0x2f9c6:
+ return 0x88d7;
+ case 0x2f9c7:
+ return 0x88de;
+ case 0x2f9c8:
+ return 0x4635;
+ case 0x2f9c9:
+ return 0x88fa;
+ case 0x2f9ca:
+ return 0x34bb;
+ case 0x2f9cb:
+ return 0x278ae;
+ case 0x2f9cc:
+ return 0x27966;
+ case 0x2f9cd:
+ return 0x46be;
+ case 0x2f9ce:
+ return 0x46c7;
+ case 0x2f9cf:
+ return 0x8aa0;
+ case 0x2f9d0:
+ return 0x8aed;
+ case 0x2f9d1:
+ return 0x8b8a;
+ case 0x2f9d2:
+ return 0x8c55;
+ case 0x2f9d3:
+ return 0x27ca8;
+ case 0x2f9d4:
+ return 0x8cab;
+ case 0x2f9d5:
+ return 0x8cc1;
+ case 0x2f9d6:
+ return 0x8d1b;
+ case 0x2f9d7:
+ return 0x8d77;
+ case 0x2f9d8:
+ return 0x27f2f;
+ case 0x2f9d9:
+ return 0x20804;
+ case 0x2f9da:
+ return 0x8dcb;
+ case 0x2f9db:
+ return 0x8dbc;
+ case 0x2f9dc:
+ return 0x8df0;
+ case 0x2f9dd:
+ return 0x208de;
+ case 0x2f9de:
+ return 0x8ed4;
+ case 0x2f9df:
+ return 0x8f38;
+ case 0x2f9e0:
+ return 0x285d2;
+ case 0x2f9e1:
+ return 0x285ed;
+ case 0x2f9e2:
+ return 0x9094;
+ case 0x2f9e3:
+ return 0x90f1;
+ case 0x2f9e4:
+ return 0x9111;
+ case 0x2f9e5:
+ return 0x2872e;
+ case 0x2f9e6:
+ return 0x911b;
+ case 0x2f9e7:
+ return 0x9238;
+ case 0x2f9e8:
+ return 0x92d7;
+ case 0x2f9e9:
+ return 0x92d8;
+ case 0x2f9ea:
+ return 0x927c;
+ case 0x2f9eb:
+ return 0x93f9;
+ case 0x2f9ec:
+ return 0x9415;
+ case 0x2f9ed:
+ return 0x28bfa;
+ case 0x2f9ee:
+ return 0x958b;
+ case 0x2f9ef:
+ return 0x4995;
+ case 0x2f9f0:
+ return 0x95b7;
+ case 0x2f9f1:
+ return 0x28d77;
+ case 0x2f9f2:
+ return 0x49e6;
+ case 0x2f9f3:
+ return 0x96c3;
+ case 0x2f9f4:
+ return 0x5db2;
+ case 0x2f9f5:
+ return 0x9723;
+ case 0x2f9f6:
+ return 0x29145;
+ case 0x2f9f7:
+ return 0x2921a;
+ case 0x2f9f8:
+ return 0x4a6e;
+ case 0x2f9f9:
+ return 0x4a76;
+ case 0x2f9fa:
+ return 0x97e0;
+ case 0x2f9fb:
+ return 0x2940a;
+ case 0x2f9fc:
+ return 0x4ab2;
+ case 0x2f9fd:
+ return 0x29496;
+ case 0x2f9fe:
+ return 0x980b;
+ case 0x2f9ff:
+ return 0x980b;
+ case 0x2fa00:
+ return 0x9829;
+ case 0x2fa01:
+ return 0x295b6;
+ case 0x2fa02:
+ return 0x98e2;
+ case 0x2fa03:
+ return 0x4b33;
+ case 0x2fa04:
+ return 0x9929;
+ case 0x2fa05:
+ return 0x99a7;
+ case 0x2fa06:
+ return 0x99c2;
+ case 0x2fa07:
+ return 0x99fe;
+ case 0x2fa08:
+ return 0x4bce;
+ case 0x2fa09:
+ return 0x29b30;
+ case 0x2fa0a:
+ return 0x9b12;
+ case 0x2fa0b:
+ return 0x9c40;
+ case 0x2fa0c:
+ return 0x9cfd;
+ case 0x2fa0d:
+ return 0x4cce;
+ case 0x2fa0e:
+ return 0x4ced;
+ case 0x2fa0f:
+ return 0x9d67;
+ case 0x2fa10:
+ return 0x2a0ce;
+ case 0x2fa11:
+ return 0x4cf8;
+ case 0x2fa12:
+ return 0x2a105;
+ case 0x2fa13:
+ return 0x2a20e;
+ case 0x2fa14:
+ return 0x2a291;
+ case 0x2fa15:
+ return 0x9ebb;
+ case 0x2fa16:
+ return 0x4d56;
+ case 0x2fa17:
+ return 0x9ef9;
+ case 0x2fa18:
+ return 0x9efe;
+ case 0x2fa19:
+ return 0x9f05;
+ case 0x2fa1a:
+ return 0x9f0f;
+ case 0x2fa1b:
+ return 0x9f16;
+ case 0x2fa1c:
+ return 0x9f3b;
+ case 0x2fa1d:
+ return 0x2a600;
+ default:
+ return codepoint;
+ }
+}
+} // namespace unicode
+} // namespace mongo
diff --git a/src/mongo/db/fts/unicode/codepoints_test.cpp b/src/mongo/db/fts/unicode/codepoints_test.cpp
new file mode 100644
index 00000000000..90510666cba
--- /dev/null
+++ b/src/mongo/db/fts/unicode/codepoints_test.cpp
@@ -0,0 +1,94 @@
+/**
+ * Copyright (C) 2015 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/fts/unicode/codepoints.h"
+#include "mongo/unittest/unittest.h"
+
+namespace mongo {
+namespace unicode {
+
+/**
+ * Above most of the arrays in this class are the UTF-32 character literals that correspond to the
+ * codepoints in the array.
+ */
+
+TEST(UnicodeCodepoints, Diacritics) {
+ // There are no character literals for combining marks.
+ const char32_t marks[] = {0x0301, 0x0339, 0x1AB4, 0x1DC5, 0xA69D};
+
+ // const char32_t not_marks[] = {U'-', U'.', U'\'', U'*', U'm'};
+ const char32_t not_marks[] = {0x2D, 0x2E, 0x27, 0x2A, 0x6D};
+
+ for (auto i = 0; i < 5; ++i) {
+ ASSERT(codepointIsDiacritic(marks[i]));
+ ASSERT_FALSE(codepointIsDiacritic(not_marks[i]));
+ }
+}
+
+TEST(UnicodeCodepoints, Delimiters) {
+ // const char32_t delimiters[] = {U'-', U'.', U'"', U'¿', U'«'};
+ const char32_t delimiters[] = {0x2D, 0x2E, 0x22, 0xBF, 0xAB};
+ // const char32_t not_delimiters[] = {U'a', U'ê', U'π', U'Ω', U'å'};
+ const char32_t not_delimiters[] = {0x61, 0xEA, 0x3C0, 0x3A9, 0xE5};
+
+ for (auto i = 0; i < 5; ++i) {
+ ASSERT(codepointIsDelimiter(delimiters[i], DelimiterListLanguage::kEnglish));
+ ASSERT(codepointIsDelimiter(delimiters[i], DelimiterListLanguage::kNotEnglish));
+ ASSERT_FALSE(codepointIsDelimiter(not_delimiters[i], DelimiterListLanguage::kEnglish));
+ ASSERT_FALSE(codepointIsDelimiter(not_delimiters[i], DelimiterListLanguage::kNotEnglish));
+ }
+
+ // Special case for English.
+ ASSERT(codepointIsDelimiter(0x27, DelimiterListLanguage::kNotEnglish));
+ ASSERT_FALSE(codepointIsDelimiter(0x27, DelimiterListLanguage::kEnglish));
+}
+
+TEST(UnicodeCodepoints, RemoveDiacritics) {
+ // const char32_t originals[] = {U'á', U'ê', U'ñ', U'å', U'ç'};
+ const char32_t originals[] = {0xE1, 0xEA, 0xF1, 0xE5, 0xE7};
+ // const char32_t clean[] = {U'a', U'e', U'n', U'a', U'c'};
+ const char32_t clean[] = {0x61, 0x65, 0x6E, 0x61, 0x63};
+
+ for (auto i = 0; i < 5; ++i) {
+ ASSERT_EQUALS(clean[i], codepointRemoveDiacritics(originals[i]));
+ }
+}
+
+TEST(UnicodeCodepoints, ToLower) {
+ // const char32_t upper[] = {U'Á', U'Ê', U'Ñ', U'Å', U'Ç'};
+ const char32_t upper[] = {0xC1, 0xCA, 0xD1, 0xC5, 0xC7};
+ // const char32_t lower[] = {U'á', U'ê', U'ñ', U'å', U'ç'};
+ const char32_t lower[] = {0xE1, 0xEA, 0xF1, 0xE5, 0xE7};
+
+ for (auto i = 0; i < 5; ++i) {
+ ASSERT_EQUALS(lower[i], codepointToLower(upper[i]));
+ }
+}
+
+} // namespace unicode
+} // namespace mongo
diff --git a/src/mongo/db/fts/unicode/gen_casefold_map.py b/src/mongo/db/fts/unicode/gen_casefold_map.py
new file mode 100644
index 00000000000..fc55cdd57c7
--- /dev/null
+++ b/src/mongo/db/fts/unicode/gen_casefold_map.py
@@ -0,0 +1,76 @@
+ #!/usr/bin/python
+ # -*- coding: utf-8 -*-
+import os
+import sys
+
+from gen_helper import getCopyrightNotice, openNamespaces, closeNamespaces, \
+ include
+
+def generate(unicode_casefold_file, target):
+ """Generates a C++ source file that contains a Unicode case folding
+ function.
+
+ The case folding function contains a switch statement with cases for every
+ Unicode codepoint that has a case folding mapping.
+ """
+ out = open(target, "w")
+
+ out.write(getCopyrightNotice())
+ out.write(include("mongo/db/fts/unicode/codepoints.h"))
+ out.write("\n")
+ out.write(openNamespaces())
+
+ case_mappings = {}
+
+ cf_file = open(unicode_casefold_file, 'r')
+
+ for line in cf_file:
+ # Filter out blank lines and lines that start with #
+ data = line[:line.find('#')]
+ if(data == ""):
+ continue
+
+ # Parse the data on the line
+ values = data.split("; ")
+ assert(len(values) == 4)
+
+ status = values[1]
+ if status == 'C' or status == 'S':
+ # We only include the "Common" and "Simple" mappings. "Full" case
+ # folding mappings expand certain letters to multiple codepoints,
+ # which we currently do not support.
+ original_codepoint = int(values[0], 16)
+ codepoint_mapping = int(values[2], 16)
+ case_mappings[original_codepoint] = codepoint_mapping
+
+ out.write("""char32_t codepointToLower(char32_t codepoint, CaseFoldMode \
+mode) {
+ if (mode == CaseFoldMode::kTurkish) {
+ if (codepoint == 0x049) { // I -> ı
+ return 0x131;
+ } else if (codepoint == 0x130) { // İ -> i
+ return 0x069;
+ }
+ }
+
+ switch (codepoint) {\n""")
+
+ mappings_list = []
+
+ for mapping in case_mappings:
+ mappings_list.append((mapping, case_mappings[mapping]))
+
+ sorted_mappings = sorted(mappings_list, key=lambda mapping: mapping[0])
+
+ for mapping in sorted_mappings:
+ out.write("\
+ case " + str(hex(mapping[0])) + ": return " + \
+ str(hex(mapping[1])) +";\n")
+
+ out.write("\
+ default: return codepoint;\n }\n}")
+
+ out.write(closeNamespaces())
+
+if __name__ == "__main__":
+ generate(sys.argv[1], sys.argv[2])
diff --git a/src/mongo/db/fts/unicode/gen_delimiter_list.py b/src/mongo/db/fts/unicode/gen_delimiter_list.py
new file mode 100644
index 00000000000..52b79544c6b
--- /dev/null
+++ b/src/mongo/db/fts/unicode/gen_delimiter_list.py
@@ -0,0 +1,80 @@
+ #!/usr/bin/python
+ # -*- coding: utf-8 -*-
+import sys
+
+from gen_helper import getCopyrightNotice, openNamespaces, closeNamespaces, \
+ include
+
+def generate(unicode_proplist_file, target):
+ """Generates a C++ source file that contains a delimiter checking function.
+
+ The delimiter checking function contains a switch statement with cases for
+ every delimiter in the Unicode Character Database with the properties
+ specified in delim_properties.
+ """
+ out = open(target, "w")
+
+ out.write(getCopyrightNotice())
+ out.write(include("mongo/db/fts/unicode/codepoints.h"))
+ out.write("\n")
+ out.write(openNamespaces())
+
+ delim_codepoints = set()
+
+ proplist_file = open(unicode_proplist_file, 'r')
+
+ delim_properties = ["White_Space",
+ "Dash",
+ "Hyphen",
+ "Quotation_Mark",
+ "Terminal_Punctuation",
+ "Pattern_Syntax",
+ "STerm"]
+
+ for line in proplist_file:
+ # Filter out blank lines and lines that start with #
+ data = line[:line.find('#')]
+ if(data == ""):
+ continue
+
+ # Parse the data on the line
+ values = data.split("; ")
+ assert(len(values) == 2)
+
+ uproperty = values[1].strip()
+ if uproperty in delim_properties:
+ if len(values[0].split('..')) == 2:
+ codepoint_range = values[0].split('..')
+
+ start = int(codepoint_range[0], 16)
+ end = int(codepoint_range[1], 16) + 1
+
+ for i in range(start, end):
+ if i not in delim_codepoints:
+ delim_codepoints.add(i)
+ else:
+ if int(values[0], 16) not in delim_codepoints:
+ delim_codepoints.add(int(values[0], 16))
+
+ # As of Unicode 8.0.0, all of the delimiters we used for text index
+ # version 2 are also in the list.
+
+ out.write("""bool codepointIsDelimiter(char32_t codepoint, \
+DelimiterListLanguage lang) {
+ if (lang == DelimiterListLanguage::kEnglish && codepoint == '\\'') {
+ return false;
+ }
+
+ switch (codepoint) {\n""")
+
+ for delim in sorted(delim_codepoints):
+ out.write("\
+ case " + str(hex(delim)) + ": return true;\n")
+
+ out.write("\
+ default: return false;\n }\n}")
+
+ out.write(closeNamespaces())
+
+if __name__ == "__main__":
+ generate(sys.argv[1], sys.argv[2])
diff --git a/src/mongo/db/fts/unicode/gen_diacritic_list.py b/src/mongo/db/fts/unicode/gen_diacritic_list.py
new file mode 100644
index 00000000000..260a85307af
--- /dev/null
+++ b/src/mongo/db/fts/unicode/gen_diacritic_list.py
@@ -0,0 +1,63 @@
+ #!/usr/bin/python
+ # -*- coding: utf-8 -*-
+import sys
+
+from gen_helper import getCopyrightNotice, openNamespaces, closeNamespaces, \
+ include
+
+def generate(unicode_proplist_file, target):
+ """Generates a C++ source file that contains a diacritic checking function.
+
+ The diacritic checking function contains a switch statement with cases for
+ every diacritic in the Unicode Character Database.
+ """
+ out = open(target, "w")
+
+ out.write(getCopyrightNotice())
+ out.write(include("mongo/db/fts/unicode/codepoints.h"))
+ out.write("\n")
+ out.write(openNamespaces())
+
+ diacritics = set()
+
+ proplist_file = open(unicode_proplist_file, 'r')
+
+ for line in proplist_file:
+ # Filter out blank lines and lines that start with #
+ data = line[:line.find('#')]
+ if(data == ""):
+ continue
+
+ # Parse the data on the line
+ values = data.split("; ")
+ assert(len(values) == 2)
+
+ uproperty = values[1].strip()
+ if uproperty in "Diacritic":
+ if len(values[0].split('..')) == 2:
+ codepoint_range = values[0].split('..')
+
+ start = int(codepoint_range[0], 16)
+ end = int(codepoint_range[1], 16) + 1
+
+ for i in range(start, end):
+ if i not in diacritics:
+ diacritics.add(i)
+ else:
+ if int(values[0], 16) not in diacritics:
+ diacritics.add(int(values[0], 16))
+
+ out.write("""bool codepointIsDiacritic(char32_t codepoint) {
+ switch (codepoint) {\n""")
+
+ for diacritic in sorted(diacritics):
+ out.write("\
+ case " + str(hex(diacritic)) + ": return true;\n")
+
+ out.write("\
+ default: return false;\n }\n}")
+
+ out.write(closeNamespaces())
+
+if __name__ == "__main__":
+ generate(sys.argv[1], sys.argv[2])
diff --git a/src/mongo/db/fts/unicode/gen_diacritic_map.py b/src/mongo/db/fts/unicode/gen_diacritic_map.py
new file mode 100644
index 00000000000..d002a1acbac
--- /dev/null
+++ b/src/mongo/db/fts/unicode/gen_diacritic_map.py
@@ -0,0 +1,105 @@
+ #!/usr/bin/python
+ # -*- coding: utf-8 -*-
+import sys
+from unicodedata import normalize, category, unidata_version
+
+from gen_helper import getCopyrightNotice, openNamespaces, closeNamespaces, \
+ include
+
+diacritics = set()
+
+def load_diacritics(unicode_proplist_file):
+ proplist_file = open(unicode_proplist_file, 'r')
+
+ for line in proplist_file:
+ # Filter out blank lines and lines that start with #
+ data = line[:line.find('#')]
+ if(data == ""):
+ continue
+
+ # Parse the data on the line
+ values = data.split("; ")
+ assert(len(values) == 2)
+
+ uproperty = values[1].strip()
+ if uproperty == "Diacritic":
+ if len(values[0].split('..')) == 2:
+ codepoint_range = values[0].split('..')
+
+ start = int(codepoint_range[0], 16)
+ end = int(codepoint_range[1], 16) + 1
+
+ for i in range(start, end):
+ if i not in diacritics:
+ diacritics.add(i)
+ else:
+ if int(values[0], 16) not in diacritics:
+ diacritics.add(int(values[0], 16))
+
+diacritic_mappings = {}
+
+def add_diacritic_mapping(codepoint):
+ # a : original unicode character
+ # d : decomposed unicode character
+ # r : decomposed unicode character with diacritics removed
+ # c : recomposed unicode character with diacritics removed
+ a = chr(codepoint)
+ d = normalize('NFD', a)
+ r = u''
+
+ for i in range(len(d)):
+ if ord(d[i]) not in diacritics:
+ r += d[i]
+
+ c = normalize('NFC', r)
+
+ # Only use mappings where the final recomposed form is a single codepoint
+ if (a != c and len(c) == 1):
+ diacritic_mappings[codepoint] = ord(c[0])
+
+def add_diacritic_range(start, end):
+ for x in range(start, end + 1):
+ add_diacritic_mapping(x)
+
+def generate(target):
+ """Generates a C++ source file that contains a diacritic removal mapping
+ function.
+
+ The delimiter checking function contains a switch statement with cases for
+ every character in Unicode that has a removable combining diacritical mark.
+ """
+ out = open(target, "w")
+
+ out.write(getCopyrightNotice())
+ out.write(include("mongo/db/fts/unicode/codepoints.h"))
+ out.write("\n")
+ out.write(openNamespaces())
+
+ # Map diacritics from 0 to the maximum Unicode codepoint
+ add_diacritic_range(0x0000, 0x10FFFF)
+
+ out.write("""char32_t codepointRemoveDiacritics(char32_t codepoint) {
+ switch (codepoint) {\n""")
+
+ mappings_list = []
+
+ for mapping in diacritic_mappings:
+ mappings_list.append((mapping, diacritic_mappings[mapping]))
+
+ sorted_mappings = sorted(mappings_list, key=lambda mapping: mapping[0])
+
+ for mapping in sorted_mappings:
+ out.write(" case " + str(hex(mapping[0])) + ": return " + \
+ str(hex(mapping[1])) +";\n")
+
+ out.write(" default: return codepoint;\n }\n}")
+
+ out.write(closeNamespaces())
+
+if __name__ == "__main__":
+ if(unidata_version != '8.0.0'):
+ print("""ERROR: This script must be run with a version of Python that \
+ contains the Unicode 8.0.0 Character Database.""")
+ sys.exit(1)
+ load_diacritics(sys.argv[1])
+ generate(sys.argv[2])
diff --git a/src/mongo/db/fts/unicode/gen_helper.py b/src/mongo/db/fts/unicode/gen_helper.py
new file mode 100644
index 00000000000..d3698e7894e
--- /dev/null
+++ b/src/mongo/db/fts/unicode/gen_helper.py
@@ -0,0 +1,39 @@
+def getCopyrightNotice():
+ return """/**
+ * Copyright (C) 2015 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ *
+ * THIS IS A GENERATED FILE, DO NOT MODIFY.
+ */\n\n"""
+
+def openNamespaces():
+ return "namespace mongo {\nnamespace unicode {\n\n"
+
+def closeNamespaces():
+ return "\n} // namespace unicode\n} // namespace mongo\n"
+
+def include(header):
+ return '#include "' + header + '"\n'
diff --git a/src/mongo/db/fts/unicode/string.cpp b/src/mongo/db/fts/unicode/string.cpp
new file mode 100644
index 00000000000..24c6ff8027e
--- /dev/null
+++ b/src/mongo/db/fts/unicode/string.cpp
@@ -0,0 +1,157 @@
+/**
+ * Copyright (C) 2015 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/fts/unicode/string.h"
+
+#include <algorithm>
+
+#include "mongo/shell/linenoise_utf8.h"
+#include "mongo/util/assert_util.h"
+
+namespace mongo {
+namespace unicode {
+
+using linenoise_utf8::copyString32to8;
+using linenoise_utf8::copyString8to32;
+
+using std::u32string;
+
+String::String(const StringData utf8_src) {
+ // _data is the target, resize it so that it's guaranteed to fit all of the input characters,
+ // plus a null character if there isn't one.
+ _data.resize(utf8_src.size() + 1);
+
+ int result = 0;
+ size_t resultSize = 0;
+
+ // Although utf8_src.rawData() is not guaranteed to be null-terminated, copyString8to32 won't
+ // access bad memory because it is limited by the size of its output buffer, which is set to the
+ // size of utf8_src.
+ copyString8to32(&_data[0],
+ reinterpret_cast<const unsigned char*>(&utf8_src.rawData()[0]),
+ _data.size(),
+ resultSize,
+ result);
+
+ uassert(28755, "text contains invalid UTF-8", result == 0);
+
+ // Resize _data so it is only as big as what it contains.
+ _data.resize(resultSize);
+}
+
+String::String(u32string&& src) : _data(std::move(src)) {}
+
+std::string String::toString() const {
+ // output is the target, resize it so that it's guaranteed to fit all of the input characters,
+ // plus a null character if there isn't one.
+ std::string output(_data.size() * 4 + 1, '\0');
+ size_t resultSize =
+ copyString32to8(reinterpret_cast<unsigned char*>(&output[0]), &_data[0], output.size());
+
+ // Resize output so it is only as large as what it contains.
+ output.resize(resultSize);
+ return output;
+}
+
+size_t String::size() const {
+ return _data.size();
+}
+
+const char32_t& String::operator[](int i) const {
+ return _data[i];
+}
+
+String String::substr(size_t pos, size_t len) const {
+ return String(_data.substr(pos, len));
+}
+
+String String::toLower(CaseFoldMode mode) const {
+ u32string newdata(_data.size(), 0);
+ auto index = 0;
+ for (auto codepoint : _data) {
+ newdata[index++] = codepointToLower(codepoint, mode);
+ }
+
+ return String(std::move(newdata));
+}
+
+String String::removeDiacritics() const {
+ u32string newdata(_data.size(), 0);
+ auto index = 0;
+ for (auto codepoint : _data) {
+ if (!codepointIsDiacritic(codepoint)) {
+ newdata[index++] = codepointRemoveDiacritics(codepoint);
+ }
+ }
+
+ newdata.resize(index);
+ return String(std::move(newdata));
+}
+
+bool String::substrMatch(const String& str,
+ const String& find,
+ SubstrMatchOptions options,
+ CaseFoldMode cfMode) {
+ // In Turkish, lowercasing needs to be applied first because the letter İ has a different case
+ // folding mapping than the letter I, but removing diacritics removes the dot from İ.
+ if (cfMode == CaseFoldMode::kTurkish) {
+ String cleanStr = str.toLower(cfMode);
+ String cleanFind = find.toLower(cfMode);
+ return substrMatch(cleanStr, cleanFind, options | kCaseSensitive, CaseFoldMode::kNormal);
+ }
+
+ if (options & kDiacriticSensitive) {
+ if (options & kCaseSensitive) {
+ // Case sensitive and diacritic sensitive.
+ return std::search(str._data.cbegin(),
+ str._data.cend(),
+ find._data.cbegin(),
+ find._data.cend(),
+ [&](char32_t c1, char32_t c2) { return (c1 == c2); }) !=
+ str._data.cend();
+ }
+
+ // Case insensitive and diacritic sensitive.
+ return std::search(str._data.cbegin(),
+ str._data.cend(),
+ find._data.cbegin(),
+ find._data.cend(),
+ [&](char32_t c1, char32_t c2) {
+ return (codepointToLower(c1, cfMode) ==
+ codepointToLower(c2, cfMode));
+ }) != str._data.cend();
+ }
+
+ String cleanStr = str.removeDiacritics();
+ String cleanFind = find.removeDiacritics();
+
+ return substrMatch(cleanStr, cleanFind, options | kDiacriticSensitive, cfMode);
+}
+
+} // namespace unicode
+} // namespace mongo
diff --git a/src/mongo/db/fts/unicode/string.h b/src/mongo/db/fts/unicode/string.h
new file mode 100644
index 00000000000..1fa77af2f3f
--- /dev/null
+++ b/src/mongo/db/fts/unicode/string.h
@@ -0,0 +1,139 @@
+/**
+ * Copyright (C) 2015 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <string>
+
+#include "mongo/base/string_data.h"
+#include "mongo/db/fts/unicode/codepoints.h"
+
+namespace mongo {
+namespace unicode {
+
+/**
+ * A string class that support basic Unicode functionality such as removing diacritic marks, and
+ * lowercasing. The String is constructed with UTF-8 source data, and is converted under the hood to
+ * a u32string (UTF-32) so operations can be easily done with individual Unicode code points.
+ */
+class String {
+public:
+ String() = default;
+
+#if defined(_MSC_VER) && _MSC_VER < 1900
+ String(String&& other) : _data(std::move(other._data)) {}
+
+ String& operator=(String&& other) {
+ _data = std::move(other._data);
+ return *this;
+ }
+#endif
+
+ /**
+ * Construct a String with UTF-8 source data (supports standard C++ string literals, and
+ * std::strings).
+ */
+ explicit String(StringData utf8_src);
+
+ /**
+ * Return a lowercased version of the String instance using the Unicode data in u_data.h.
+ */
+ String toLower(CaseFoldMode mode = CaseFoldMode::kNormal) const;
+
+ /**
+ * Returns a version of the String instance with diacritics and combining marks removed.
+ */
+ String removeDiacritics() const;
+
+ /**
+ * Returns a substring of the String instance, using the same semantics as std::string::substr.
+ */
+ String substr(size_t begin, size_t end) const;
+
+ /**
+ * Returns a UTF-8 encoded std::string version of the String instance.
+ */
+ std::string toString() const;
+
+ /**
+ * Returns the number Unicode codepoints in the String.
+ */
+ size_t size() const;
+
+ /**
+ * Returns the Unicode codepoint at index i of the String.
+ */
+ const char32_t& operator[](int i) const;
+
+ /**
+ * Options for the substrMatch method.
+ */
+ using SubstrMatchOptions = uint8_t;
+
+ /**
+ * No options (case insensitive and diacritic insensitive).
+ */
+ static const SubstrMatchOptions kNone = 0;
+
+ /**
+ * Perform case sensitive substring match.
+ */
+ static const SubstrMatchOptions kCaseSensitive = 1 << 0;
+
+ /**
+ * Perform diacritic sensitive substring match.
+ */
+ static const SubstrMatchOptions kDiacriticSensitive = 1 << 1;
+
+ /**
+ * Search the string 'str' for the string 'find'. If 'find' exists in 'str', return true, else
+ * return false. Optionally searches can be made case sensitive and diacritic insensitive. If
+ * the search is case insensitive, non-Turkish case folding is used unless the
+ * CaseFoldMode::Turkish is passed to mode.
+ */
+ static bool substrMatch(const String& str,
+ const String& find,
+ SubstrMatchOptions options,
+ CaseFoldMode mode = CaseFoldMode::kNormal);
+
+private:
+ /**
+ * Private constructor used by substr, toLower, and removeDiacritics to build a String from
+ * UTF-32 data.
+ */
+ String(std::u32string&& src);
+
+ /**
+ * The underlying UTF-32 data.
+ */
+ std::u32string _data;
+};
+
+} // namespace unicode
+} // namespace mongo
diff --git a/src/mongo/db/fts/unicode/string_test.cpp b/src/mongo/db/fts/unicode/string_test.cpp
new file mode 100644
index 00000000000..9354f7bf25c
--- /dev/null
+++ b/src/mongo/db/fts/unicode/string_test.cpp
@@ -0,0 +1,187 @@
+/**
+ * Copyright (C) 2015 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/fts/unicode/string.h"
+#include "mongo/shell/linenoise_utf8.h"
+#include "mongo/unittest/unittest.h"
+#include "mongo/util/text.h"
+
+#ifdef MSC_VER
+// Microsoft VS 2013 does not handle UTF-8 strings in char literal strings, error C4566
+// The Microsoft compiler can be tricked into using UTF-8 strings as follows:
+// 1. The file has a UTF-8 BOM
+// 2. The string literal is a wide character string literal (ie, prefixed with L)
+// at this point.
+#define UTF8(x) toUtf8String(L##x)
+#else
+#define UTF8(x) x
+#endif
+
+namespace mongo {
+namespace unicode {
+
+using linenoise_utf8::copyString32to8;
+
+TEST(UnicodeString, RemoveDiacritics) {
+ // NFC Normalized Text.
+ String test1 = String(UTF8("¿CUÁNTOS AÑOS TIENES TÚ?"));
+
+ // NFD Normalized Text ("Café").
+ const char test2[] = {'C', 'a', 'f', 'e', static_cast<char>(0xcc), static_cast<char>(0x81), 0};
+
+ ASSERT_EQUALS(UTF8("¿CUANTOS ANOS TIENES TU?"), test1.removeDiacritics().toString());
+ ASSERT_EQUALS(UTF8("Cafe"), String(test2).removeDiacritics().toString());
+}
+
+TEST(UnicodeString, CaseFolding) {
+ String test1 = String(UTF8("СКОЛЬКО ТЕБЕ ЛЕТ?"));
+ String test2 = String(UTF8("¿CUÁNTOS AÑOS TIENES TÚ?"));
+
+ ASSERT_EQUALS(UTF8("сколько тебе лет?"), test1.toLower().toString());
+ ASSERT_EQUALS(UTF8("¿cuántos años tienes tú?"), test2.toLower().toString());
+}
+
+TEST(UnicodeString, CaseFoldingTurkish) {
+ String test1 = String(UTF8("KAC YASINDASINIZ"));
+ String test2 = String(UTF8("KAC YASİNDASİNİZ"));
+
+ ASSERT_EQUALS(UTF8("kac yasındasınız"), test1.toLower(CaseFoldMode::kTurkish).toString());
+ ASSERT_EQUALS(UTF8("kac yasindasiniz"), test2.toLower(CaseFoldMode::kTurkish).toString());
+}
+
+TEST(UnicodeString, CaseFoldingAndRemoveDiacritics) {
+ // NFC Normalized Text.
+ String test1 = String(UTF8("Πόσο χρονών είσαι?"));
+ String test2 = String(UTF8("¿CUÁNTOS AÑOS TIENES TÚ?"));
+
+ // NFD Normalized Text ("CAFÉ").
+ const char test3[] = {'C', 'A', 'F', 'E', static_cast<char>(0xcc), static_cast<char>(0x81), 0};
+
+ ASSERT_EQUALS(UTF8("ποσο χρονων εισαι?"), test1.toLower().removeDiacritics().toString());
+ ASSERT_EQUALS(UTF8("¿cuantos anos tienes tu?"), test2.toLower().removeDiacritics().toString());
+ ASSERT_EQUALS(UTF8("cafe"), String(test3).toLower().removeDiacritics().toString());
+}
+
+TEST(UnicodeString, SubstringMatch) {
+ String str = String(UTF8("Одумайся! Престол свой сохрани; И ярость укроти."));
+
+ // Case insensitive & diacritic insensitive.
+ ASSERT(String::substrMatch(str, String(UTF8("ПРЁСТОЛ СВОИ")), String::kNone));
+ ASSERT_FALSE(String::substrMatch(str, String(UTF8("Престол сохрани")), String::kNone));
+
+ // Case sensitive & diacritic insensitive.
+ ASSERT(String::substrMatch(str, String(UTF8("Одумаися!")), String::kCaseSensitive));
+ ASSERT_FALSE(String::substrMatch(str, String(UTF8("одумайся!")), String::kCaseSensitive));
+
+ // Case insensitive & diacritic sensitive.
+ ASSERT(String::substrMatch(str, String(UTF8("одумайся!")), String::kDiacriticSensitive));
+ ASSERT_FALSE(String::substrMatch(str, String(UTF8("Одумаися!")), String::kDiacriticSensitive));
+
+ // Case sensitive & diacritic sensitive.
+ ASSERT(String::substrMatch(
+ str, String(UTF8("Одумайся!")), String::kDiacriticSensitive | String::kCaseSensitive));
+ ASSERT_FALSE(String::substrMatch(
+ str, String(UTF8("Одумаися!")), String::kDiacriticSensitive | String::kCaseSensitive));
+}
+
+TEST(UnicodeString, SubstringMatchTurkish) {
+ String str = String(UTF8("KAÇ YAŞINDASINIZ?"));
+
+ // Case insensitive & diacritic insensitive.
+ ASSERT(String::substrMatch(
+ str, String(UTF8("yasındasınız")), String::kNone, CaseFoldMode::kTurkish));
+ ASSERT_FALSE(String::substrMatch(
+ str, String(UTF8("yasindasiniz")), String::kNone, CaseFoldMode::kTurkish));
+
+ // Case insensitive & diacritic sensitive.
+ ASSERT(String::substrMatch(
+ str, String(UTF8("yaşındasınız")), String::kDiacriticSensitive, CaseFoldMode::kTurkish));
+ ASSERT_FALSE(String::substrMatch(
+ str, String(UTF8("yaşindasiniz")), String::kDiacriticSensitive, CaseFoldMode::kTurkish));
+}
+
+TEST(UnicodeString, BadUTF8) {
+ // Overlong.
+ const char invalid1[] = {static_cast<char>(0xC0), static_cast<char>(0xAF), 0};
+
+ // Invalid code positions.
+ const char invalid2[] = {
+ static_cast<char>(0xED), static_cast<char>(0xA0), static_cast<char>(0x80), 0};
+ const char invalid3[] = {
+ static_cast<char>(0xC2), static_cast<char>(0x41), static_cast<char>(0x42), 0};
+ const char invalid4[] = {static_cast<char>(0x61),
+ static_cast<char>(0xF1),
+ static_cast<char>(0x80),
+ static_cast<char>(0x80),
+ static_cast<char>(0xE1),
+ static_cast<char>(0x80),
+ static_cast<char>(0xC2),
+ static_cast<char>(0x62),
+ static_cast<char>(0x80),
+ static_cast<char>(0x63),
+ static_cast<char>(0x80),
+ static_cast<char>(0xBF),
+ static_cast<char>(0x64),
+ 0};
+
+ ASSERT_THROWS(String test1(invalid1), AssertionException);
+ ASSERT_THROWS(String test2(invalid2), AssertionException);
+ ASSERT_THROWS(String test3(invalid3), AssertionException);
+ ASSERT_THROWS(String test4(invalid4), AssertionException);
+}
+
+TEST(UnicodeString, UTF32ToUTF8) {
+ std::u32string original;
+ original.push_back(0x004D);
+ original.push_back(0x0430);
+ original.push_back(0x4E8C);
+ original.push_back(0x10302);
+ original.push_back(0);
+
+ std::string expected_result;
+ expected_result.push_back(0x4D);
+ expected_result.push_back(0xD0);
+ expected_result.push_back(0xB0);
+ expected_result.push_back(0xE4);
+ expected_result.push_back(0xBA);
+ expected_result.push_back(0x8C);
+ expected_result.push_back(0xF0);
+ expected_result.push_back(0x90);
+ expected_result.push_back(0x8C);
+ expected_result.push_back(0x82);
+ expected_result.push_back(0);
+
+ std::string result(11, '\0');
+
+ copyString32to8(reinterpret_cast<unsigned char*>(&result[0]), &original[0], 11);
+
+ ASSERT_EQUALS(expected_result, result);
+}
+
+} // namespace unicode
+} // namespace mongo
diff --git a/src/mongo/shell/linenoise_utf8.h b/src/mongo/shell/linenoise_utf8.h
index 4bd4c2bdc7e..aab3e6f73f2 100644
--- a/src/mongo/shell/linenoise_utf8.h
+++ b/src/mongo/shell/linenoise_utf8.h
@@ -31,10 +31,14 @@
#include <memory>
#include <string.h>
+#if defined(_MSC_VER) && _MSC_VER < 1900
+#include <string>
+#endif
+
namespace linenoise_utf8 {
typedef unsigned char UChar8; // UTF-8 octet
-typedef unsigned int UChar32; // Unicode code point
+typedef char32_t UChar32; // Unicode code point
// Error bits (or-ed together) returned from utf8toUChar32string
//
diff --git a/src/third_party/unicode-8.0.0/CaseFolding.txt b/src/third_party/unicode-8.0.0/CaseFolding.txt
new file mode 100644
index 00000000000..0197a6c40fb
--- /dev/null
+++ b/src/third_party/unicode-8.0.0/CaseFolding.txt
@@ -0,0 +1,1414 @@
+# CaseFolding-8.0.0.txt
+# Date: 2015-01-13, 18:16:36 GMT [MD]
+#
+# Unicode Character Database
+# Copyright (c) 1991-2015 Unicode, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+# For documentation, see http://www.unicode.org/reports/tr44/
+#
+# Case Folding Properties
+#
+# This file is a supplement to the UnicodeData file.
+# It provides a case folding mapping generated from the Unicode Character Database.
+# If all characters are mapped according to the full mapping below, then
+# case differences (according to UnicodeData.txt and SpecialCasing.txt)
+# are eliminated.
+#
+# The data supports both implementations that require simple case foldings
+# (where string lengths don't change), and implementations that allow full case folding
+# (where string lengths may grow). Note that where they can be supported, the
+# full case foldings are superior: for example, they allow "MASSE" and "Maße" to match.
+#
+# All code points not listed in this file map to themselves.
+#
+# NOTE: case folding does not preserve normalization formats!
+#
+# For information on case folding, including how to have case folding
+# preserve normalization formats, see Section 3.13 Default Case Algorithms in
+# The Unicode Standard.
+#
+# ================================================================================
+# Format
+# ================================================================================
+# The entries in this file are in the following machine-readable format:
+#
+# <code>; <status>; <mapping>; # <name>
+#
+# The status field is:
+# C: common case folding, common mappings shared by both simple and full mappings.
+# F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces.
+# S: simple case folding, mappings to single characters where different from F.
+# T: special case for uppercase I and dotted uppercase I
+# - For non-Turkic languages, this mapping is normally not used.
+# - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters.
+# Note that the Turkic mappings do not maintain canonical equivalence without additional processing.
+# See the discussions of case mapping in the Unicode Standard for more information.
+#
+# Usage:
+# A. To do a simple case folding, use the mappings with status C + S.
+# B. To do a full case folding, use the mappings with status C + F.
+#
+# The mappings with status T can be used or omitted depending on the desired case-folding
+# behavior. (The default option is to exclude them.)
+#
+# =================================================================
+
+# Property: Case_Folding
+
+# All code points not explicitly listed for Case_Folding
+# have the value C for the status field, and the code point itself for the mapping field.
+
+# =================================================================
+0041; C; 0061; # LATIN CAPITAL LETTER A
+0042; C; 0062; # LATIN CAPITAL LETTER B
+0043; C; 0063; # LATIN CAPITAL LETTER C
+0044; C; 0064; # LATIN CAPITAL LETTER D
+0045; C; 0065; # LATIN CAPITAL LETTER E
+0046; C; 0066; # LATIN CAPITAL LETTER F
+0047; C; 0067; # LATIN CAPITAL LETTER G
+0048; C; 0068; # LATIN CAPITAL LETTER H
+0049; C; 0069; # LATIN CAPITAL LETTER I
+0049; T; 0131; # LATIN CAPITAL LETTER I
+004A; C; 006A; # LATIN CAPITAL LETTER J
+004B; C; 006B; # LATIN CAPITAL LETTER K
+004C; C; 006C; # LATIN CAPITAL LETTER L
+004D; C; 006D; # LATIN CAPITAL LETTER M
+004E; C; 006E; # LATIN CAPITAL LETTER N
+004F; C; 006F; # LATIN CAPITAL LETTER O
+0050; C; 0070; # LATIN CAPITAL LETTER P
+0051; C; 0071; # LATIN CAPITAL LETTER Q
+0052; C; 0072; # LATIN CAPITAL LETTER R
+0053; C; 0073; # LATIN CAPITAL LETTER S
+0054; C; 0074; # LATIN CAPITAL LETTER T
+0055; C; 0075; # LATIN CAPITAL LETTER U
+0056; C; 0076; # LATIN CAPITAL LETTER V
+0057; C; 0077; # LATIN CAPITAL LETTER W
+0058; C; 0078; # LATIN CAPITAL LETTER X
+0059; C; 0079; # LATIN CAPITAL LETTER Y
+005A; C; 007A; # LATIN CAPITAL LETTER Z
+00B5; C; 03BC; # MICRO SIGN
+00C0; C; 00E0; # LATIN CAPITAL LETTER A WITH GRAVE
+00C1; C; 00E1; # LATIN CAPITAL LETTER A WITH ACUTE
+00C2; C; 00E2; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+00C3; C; 00E3; # LATIN CAPITAL LETTER A WITH TILDE
+00C4; C; 00E4; # LATIN CAPITAL LETTER A WITH DIAERESIS
+00C5; C; 00E5; # LATIN CAPITAL LETTER A WITH RING ABOVE
+00C6; C; 00E6; # LATIN CAPITAL LETTER AE
+00C7; C; 00E7; # LATIN CAPITAL LETTER C WITH CEDILLA
+00C8; C; 00E8; # LATIN CAPITAL LETTER E WITH GRAVE
+00C9; C; 00E9; # LATIN CAPITAL LETTER E WITH ACUTE
+00CA; C; 00EA; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+00CB; C; 00EB; # LATIN CAPITAL LETTER E WITH DIAERESIS
+00CC; C; 00EC; # LATIN CAPITAL LETTER I WITH GRAVE
+00CD; C; 00ED; # LATIN CAPITAL LETTER I WITH ACUTE
+00CE; C; 00EE; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+00CF; C; 00EF; # LATIN CAPITAL LETTER I WITH DIAERESIS
+00D0; C; 00F0; # LATIN CAPITAL LETTER ETH
+00D1; C; 00F1; # LATIN CAPITAL LETTER N WITH TILDE
+00D2; C; 00F2; # LATIN CAPITAL LETTER O WITH GRAVE
+00D3; C; 00F3; # LATIN CAPITAL LETTER O WITH ACUTE
+00D4; C; 00F4; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+00D5; C; 00F5; # LATIN CAPITAL LETTER O WITH TILDE
+00D6; C; 00F6; # LATIN CAPITAL LETTER O WITH DIAERESIS
+00D8; C; 00F8; # LATIN CAPITAL LETTER O WITH STROKE
+00D9; C; 00F9; # LATIN CAPITAL LETTER U WITH GRAVE
+00DA; C; 00FA; # LATIN CAPITAL LETTER U WITH ACUTE
+00DB; C; 00FB; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+00DC; C; 00FC; # LATIN CAPITAL LETTER U WITH DIAERESIS
+00DD; C; 00FD; # LATIN CAPITAL LETTER Y WITH ACUTE
+00DE; C; 00FE; # LATIN CAPITAL LETTER THORN
+00DF; F; 0073 0073; # LATIN SMALL LETTER SHARP S
+0100; C; 0101; # LATIN CAPITAL LETTER A WITH MACRON
+0102; C; 0103; # LATIN CAPITAL LETTER A WITH BREVE
+0104; C; 0105; # LATIN CAPITAL LETTER A WITH OGONEK
+0106; C; 0107; # LATIN CAPITAL LETTER C WITH ACUTE
+0108; C; 0109; # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
+010A; C; 010B; # LATIN CAPITAL LETTER C WITH DOT ABOVE
+010C; C; 010D; # LATIN CAPITAL LETTER C WITH CARON
+010E; C; 010F; # LATIN CAPITAL LETTER D WITH CARON
+0110; C; 0111; # LATIN CAPITAL LETTER D WITH STROKE
+0112; C; 0113; # LATIN CAPITAL LETTER E WITH MACRON
+0114; C; 0115; # LATIN CAPITAL LETTER E WITH BREVE
+0116; C; 0117; # LATIN CAPITAL LETTER E WITH DOT ABOVE
+0118; C; 0119; # LATIN CAPITAL LETTER E WITH OGONEK
+011A; C; 011B; # LATIN CAPITAL LETTER E WITH CARON
+011C; C; 011D; # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
+011E; C; 011F; # LATIN CAPITAL LETTER G WITH BREVE
+0120; C; 0121; # LATIN CAPITAL LETTER G WITH DOT ABOVE
+0122; C; 0123; # LATIN CAPITAL LETTER G WITH CEDILLA
+0124; C; 0125; # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
+0126; C; 0127; # LATIN CAPITAL LETTER H WITH STROKE
+0128; C; 0129; # LATIN CAPITAL LETTER I WITH TILDE
+012A; C; 012B; # LATIN CAPITAL LETTER I WITH MACRON
+012C; C; 012D; # LATIN CAPITAL LETTER I WITH BREVE
+012E; C; 012F; # LATIN CAPITAL LETTER I WITH OGONEK
+0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
+0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
+0132; C; 0133; # LATIN CAPITAL LIGATURE IJ
+0134; C; 0135; # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
+0136; C; 0137; # LATIN CAPITAL LETTER K WITH CEDILLA
+0139; C; 013A; # LATIN CAPITAL LETTER L WITH ACUTE
+013B; C; 013C; # LATIN CAPITAL LETTER L WITH CEDILLA
+013D; C; 013E; # LATIN CAPITAL LETTER L WITH CARON
+013F; C; 0140; # LATIN CAPITAL LETTER L WITH MIDDLE DOT
+0141; C; 0142; # LATIN CAPITAL LETTER L WITH STROKE
+0143; C; 0144; # LATIN CAPITAL LETTER N WITH ACUTE
+0145; C; 0146; # LATIN CAPITAL LETTER N WITH CEDILLA
+0147; C; 0148; # LATIN CAPITAL LETTER N WITH CARON
+0149; F; 02BC 006E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
+014A; C; 014B; # LATIN CAPITAL LETTER ENG
+014C; C; 014D; # LATIN CAPITAL LETTER O WITH MACRON
+014E; C; 014F; # LATIN CAPITAL LETTER O WITH BREVE
+0150; C; 0151; # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+0152; C; 0153; # LATIN CAPITAL LIGATURE OE
+0154; C; 0155; # LATIN CAPITAL LETTER R WITH ACUTE
+0156; C; 0157; # LATIN CAPITAL LETTER R WITH CEDILLA
+0158; C; 0159; # LATIN CAPITAL LETTER R WITH CARON
+015A; C; 015B; # LATIN CAPITAL LETTER S WITH ACUTE
+015C; C; 015D; # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
+015E; C; 015F; # LATIN CAPITAL LETTER S WITH CEDILLA
+0160; C; 0161; # LATIN CAPITAL LETTER S WITH CARON
+0162; C; 0163; # LATIN CAPITAL LETTER T WITH CEDILLA
+0164; C; 0165; # LATIN CAPITAL LETTER T WITH CARON
+0166; C; 0167; # LATIN CAPITAL LETTER T WITH STROKE
+0168; C; 0169; # LATIN CAPITAL LETTER U WITH TILDE
+016A; C; 016B; # LATIN CAPITAL LETTER U WITH MACRON
+016C; C; 016D; # LATIN CAPITAL LETTER U WITH BREVE
+016E; C; 016F; # LATIN CAPITAL LETTER U WITH RING ABOVE
+0170; C; 0171; # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+0172; C; 0173; # LATIN CAPITAL LETTER U WITH OGONEK
+0174; C; 0175; # LATIN CAPITAL LETTER W WITH CIRCUMFLEX
+0176; C; 0177; # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
+0178; C; 00FF; # LATIN CAPITAL LETTER Y WITH DIAERESIS
+0179; C; 017A; # LATIN CAPITAL LETTER Z WITH ACUTE
+017B; C; 017C; # LATIN CAPITAL LETTER Z WITH DOT ABOVE
+017D; C; 017E; # LATIN CAPITAL LETTER Z WITH CARON
+017F; C; 0073; # LATIN SMALL LETTER LONG S
+0181; C; 0253; # LATIN CAPITAL LETTER B WITH HOOK
+0182; C; 0183; # LATIN CAPITAL LETTER B WITH TOPBAR
+0184; C; 0185; # LATIN CAPITAL LETTER TONE SIX
+0186; C; 0254; # LATIN CAPITAL LETTER OPEN O
+0187; C; 0188; # LATIN CAPITAL LETTER C WITH HOOK
+0189; C; 0256; # LATIN CAPITAL LETTER AFRICAN D
+018A; C; 0257; # LATIN CAPITAL LETTER D WITH HOOK
+018B; C; 018C; # LATIN CAPITAL LETTER D WITH TOPBAR
+018E; C; 01DD; # LATIN CAPITAL LETTER REVERSED E
+018F; C; 0259; # LATIN CAPITAL LETTER SCHWA
+0190; C; 025B; # LATIN CAPITAL LETTER OPEN E
+0191; C; 0192; # LATIN CAPITAL LETTER F WITH HOOK
+0193; C; 0260; # LATIN CAPITAL LETTER G WITH HOOK
+0194; C; 0263; # LATIN CAPITAL LETTER GAMMA
+0196; C; 0269; # LATIN CAPITAL LETTER IOTA
+0197; C; 0268; # LATIN CAPITAL LETTER I WITH STROKE
+0198; C; 0199; # LATIN CAPITAL LETTER K WITH HOOK
+019C; C; 026F; # LATIN CAPITAL LETTER TURNED M
+019D; C; 0272; # LATIN CAPITAL LETTER N WITH LEFT HOOK
+019F; C; 0275; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE
+01A0; C; 01A1; # LATIN CAPITAL LETTER O WITH HORN
+01A2; C; 01A3; # LATIN CAPITAL LETTER OI
+01A4; C; 01A5; # LATIN CAPITAL LETTER P WITH HOOK
+01A6; C; 0280; # LATIN LETTER YR
+01A7; C; 01A8; # LATIN CAPITAL LETTER TONE TWO
+01A9; C; 0283; # LATIN CAPITAL LETTER ESH
+01AC; C; 01AD; # LATIN CAPITAL LETTER T WITH HOOK
+01AE; C; 0288; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
+01AF; C; 01B0; # LATIN CAPITAL LETTER U WITH HORN
+01B1; C; 028A; # LATIN CAPITAL LETTER UPSILON
+01B2; C; 028B; # LATIN CAPITAL LETTER V WITH HOOK
+01B3; C; 01B4; # LATIN CAPITAL LETTER Y WITH HOOK
+01B5; C; 01B6; # LATIN CAPITAL LETTER Z WITH STROKE
+01B7; C; 0292; # LATIN CAPITAL LETTER EZH
+01B8; C; 01B9; # LATIN CAPITAL LETTER EZH REVERSED
+01BC; C; 01BD; # LATIN CAPITAL LETTER TONE FIVE
+01C4; C; 01C6; # LATIN CAPITAL LETTER DZ WITH CARON
+01C5; C; 01C6; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
+01C7; C; 01C9; # LATIN CAPITAL LETTER LJ
+01C8; C; 01C9; # LATIN CAPITAL LETTER L WITH SMALL LETTER J
+01CA; C; 01CC; # LATIN CAPITAL LETTER NJ
+01CB; C; 01CC; # LATIN CAPITAL LETTER N WITH SMALL LETTER J
+01CD; C; 01CE; # LATIN CAPITAL LETTER A WITH CARON
+01CF; C; 01D0; # LATIN CAPITAL LETTER I WITH CARON
+01D1; C; 01D2; # LATIN CAPITAL LETTER O WITH CARON
+01D3; C; 01D4; # LATIN CAPITAL LETTER U WITH CARON
+01D5; C; 01D6; # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
+01D7; C; 01D8; # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
+01D9; C; 01DA; # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
+01DB; C; 01DC; # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
+01DE; C; 01DF; # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
+01E0; C; 01E1; # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
+01E2; C; 01E3; # LATIN CAPITAL LETTER AE WITH MACRON
+01E4; C; 01E5; # LATIN CAPITAL LETTER G WITH STROKE
+01E6; C; 01E7; # LATIN CAPITAL LETTER G WITH CARON
+01E8; C; 01E9; # LATIN CAPITAL LETTER K WITH CARON
+01EA; C; 01EB; # LATIN CAPITAL LETTER O WITH OGONEK
+01EC; C; 01ED; # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
+01EE; C; 01EF; # LATIN CAPITAL LETTER EZH WITH CARON
+01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
+01F1; C; 01F3; # LATIN CAPITAL LETTER DZ
+01F2; C; 01F3; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z
+01F4; C; 01F5; # LATIN CAPITAL LETTER G WITH ACUTE
+01F6; C; 0195; # LATIN CAPITAL LETTER HWAIR
+01F7; C; 01BF; # LATIN CAPITAL LETTER WYNN
+01F8; C; 01F9; # LATIN CAPITAL LETTER N WITH GRAVE
+01FA; C; 01FB; # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
+01FC; C; 01FD; # LATIN CAPITAL LETTER AE WITH ACUTE
+01FE; C; 01FF; # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE
+0200; C; 0201; # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE
+0202; C; 0203; # LATIN CAPITAL LETTER A WITH INVERTED BREVE
+0204; C; 0205; # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
+0206; C; 0207; # LATIN CAPITAL LETTER E WITH INVERTED BREVE
+0208; C; 0209; # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
+020A; C; 020B; # LATIN CAPITAL LETTER I WITH INVERTED BREVE
+020C; C; 020D; # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
+020E; C; 020F; # LATIN CAPITAL LETTER O WITH INVERTED BREVE
+0210; C; 0211; # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
+0212; C; 0213; # LATIN CAPITAL LETTER R WITH INVERTED BREVE
+0214; C; 0215; # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE
+0216; C; 0217; # LATIN CAPITAL LETTER U WITH INVERTED BREVE
+0218; C; 0219; # LATIN CAPITAL LETTER S WITH COMMA BELOW
+021A; C; 021B; # LATIN CAPITAL LETTER T WITH COMMA BELOW
+021C; C; 021D; # LATIN CAPITAL LETTER YOGH
+021E; C; 021F; # LATIN CAPITAL LETTER H WITH CARON
+0220; C; 019E; # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
+0222; C; 0223; # LATIN CAPITAL LETTER OU
+0224; C; 0225; # LATIN CAPITAL LETTER Z WITH HOOK
+0226; C; 0227; # LATIN CAPITAL LETTER A WITH DOT ABOVE
+0228; C; 0229; # LATIN CAPITAL LETTER E WITH CEDILLA
+022A; C; 022B; # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON
+022C; C; 022D; # LATIN CAPITAL LETTER O WITH TILDE AND MACRON
+022E; C; 022F; # LATIN CAPITAL LETTER O WITH DOT ABOVE
+0230; C; 0231; # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON
+0232; C; 0233; # LATIN CAPITAL LETTER Y WITH MACRON
+023A; C; 2C65; # LATIN CAPITAL LETTER A WITH STROKE
+023B; C; 023C; # LATIN CAPITAL LETTER C WITH STROKE
+023D; C; 019A; # LATIN CAPITAL LETTER L WITH BAR
+023E; C; 2C66; # LATIN CAPITAL LETTER T WITH DIAGONAL STROKE
+0241; C; 0242; # LATIN CAPITAL LETTER GLOTTAL STOP
+0243; C; 0180; # LATIN CAPITAL LETTER B WITH STROKE
+0244; C; 0289; # LATIN CAPITAL LETTER U BAR
+0245; C; 028C; # LATIN CAPITAL LETTER TURNED V
+0246; C; 0247; # LATIN CAPITAL LETTER E WITH STROKE
+0248; C; 0249; # LATIN CAPITAL LETTER J WITH STROKE
+024A; C; 024B; # LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL
+024C; C; 024D; # LATIN CAPITAL LETTER R WITH STROKE
+024E; C; 024F; # LATIN CAPITAL LETTER Y WITH STROKE
+0345; C; 03B9; # COMBINING GREEK YPOGEGRAMMENI
+0370; C; 0371; # GREEK CAPITAL LETTER HETA
+0372; C; 0373; # GREEK CAPITAL LETTER ARCHAIC SAMPI
+0376; C; 0377; # GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA
+037F; C; 03F3; # GREEK CAPITAL LETTER YOT
+0386; C; 03AC; # GREEK CAPITAL LETTER ALPHA WITH TONOS
+0388; C; 03AD; # GREEK CAPITAL LETTER EPSILON WITH TONOS
+0389; C; 03AE; # GREEK CAPITAL LETTER ETA WITH TONOS
+038A; C; 03AF; # GREEK CAPITAL LETTER IOTA WITH TONOS
+038C; C; 03CC; # GREEK CAPITAL LETTER OMICRON WITH TONOS
+038E; C; 03CD; # GREEK CAPITAL LETTER UPSILON WITH TONOS
+038F; C; 03CE; # GREEK CAPITAL LETTER OMEGA WITH TONOS
+0390; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+0391; C; 03B1; # GREEK CAPITAL LETTER ALPHA
+0392; C; 03B2; # GREEK CAPITAL LETTER BETA
+0393; C; 03B3; # GREEK CAPITAL LETTER GAMMA
+0394; C; 03B4; # GREEK CAPITAL LETTER DELTA
+0395; C; 03B5; # GREEK CAPITAL LETTER EPSILON
+0396; C; 03B6; # GREEK CAPITAL LETTER ZETA
+0397; C; 03B7; # GREEK CAPITAL LETTER ETA
+0398; C; 03B8; # GREEK CAPITAL LETTER THETA
+0399; C; 03B9; # GREEK CAPITAL LETTER IOTA
+039A; C; 03BA; # GREEK CAPITAL LETTER KAPPA
+039B; C; 03BB; # GREEK CAPITAL LETTER LAMDA
+039C; C; 03BC; # GREEK CAPITAL LETTER MU
+039D; C; 03BD; # GREEK CAPITAL LETTER NU
+039E; C; 03BE; # GREEK CAPITAL LETTER XI
+039F; C; 03BF; # GREEK CAPITAL LETTER OMICRON
+03A0; C; 03C0; # GREEK CAPITAL LETTER PI
+03A1; C; 03C1; # GREEK CAPITAL LETTER RHO
+03A3; C; 03C3; # GREEK CAPITAL LETTER SIGMA
+03A4; C; 03C4; # GREEK CAPITAL LETTER TAU
+03A5; C; 03C5; # GREEK CAPITAL LETTER UPSILON
+03A6; C; 03C6; # GREEK CAPITAL LETTER PHI
+03A7; C; 03C7; # GREEK CAPITAL LETTER CHI
+03A8; C; 03C8; # GREEK CAPITAL LETTER PSI
+03A9; C; 03C9; # GREEK CAPITAL LETTER OMEGA
+03AA; C; 03CA; # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+03AB; C; 03CB; # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+03B0; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+03C2; C; 03C3; # GREEK SMALL LETTER FINAL SIGMA
+03CF; C; 03D7; # GREEK CAPITAL KAI SYMBOL
+03D0; C; 03B2; # GREEK BETA SYMBOL
+03D1; C; 03B8; # GREEK THETA SYMBOL
+03D5; C; 03C6; # GREEK PHI SYMBOL
+03D6; C; 03C0; # GREEK PI SYMBOL
+03D8; C; 03D9; # GREEK LETTER ARCHAIC KOPPA
+03DA; C; 03DB; # GREEK LETTER STIGMA
+03DC; C; 03DD; # GREEK LETTER DIGAMMA
+03DE; C; 03DF; # GREEK LETTER KOPPA
+03E0; C; 03E1; # GREEK LETTER SAMPI
+03E2; C; 03E3; # COPTIC CAPITAL LETTER SHEI
+03E4; C; 03E5; # COPTIC CAPITAL LETTER FEI
+03E6; C; 03E7; # COPTIC CAPITAL LETTER KHEI
+03E8; C; 03E9; # COPTIC CAPITAL LETTER HORI
+03EA; C; 03EB; # COPTIC CAPITAL LETTER GANGIA
+03EC; C; 03ED; # COPTIC CAPITAL LETTER SHIMA
+03EE; C; 03EF; # COPTIC CAPITAL LETTER DEI
+03F0; C; 03BA; # GREEK KAPPA SYMBOL
+03F1; C; 03C1; # GREEK RHO SYMBOL
+03F4; C; 03B8; # GREEK CAPITAL THETA SYMBOL
+03F5; C; 03B5; # GREEK LUNATE EPSILON SYMBOL
+03F7; C; 03F8; # GREEK CAPITAL LETTER SHO
+03F9; C; 03F2; # GREEK CAPITAL LUNATE SIGMA SYMBOL
+03FA; C; 03FB; # GREEK CAPITAL LETTER SAN
+03FD; C; 037B; # GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL
+03FE; C; 037C; # GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL
+03FF; C; 037D; # GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL
+0400; C; 0450; # CYRILLIC CAPITAL LETTER IE WITH GRAVE
+0401; C; 0451; # CYRILLIC CAPITAL LETTER IO
+0402; C; 0452; # CYRILLIC CAPITAL LETTER DJE
+0403; C; 0453; # CYRILLIC CAPITAL LETTER GJE
+0404; C; 0454; # CYRILLIC CAPITAL LETTER UKRAINIAN IE
+0405; C; 0455; # CYRILLIC CAPITAL LETTER DZE
+0406; C; 0456; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+0407; C; 0457; # CYRILLIC CAPITAL LETTER YI
+0408; C; 0458; # CYRILLIC CAPITAL LETTER JE
+0409; C; 0459; # CYRILLIC CAPITAL LETTER LJE
+040A; C; 045A; # CYRILLIC CAPITAL LETTER NJE
+040B; C; 045B; # CYRILLIC CAPITAL LETTER TSHE
+040C; C; 045C; # CYRILLIC CAPITAL LETTER KJE
+040D; C; 045D; # CYRILLIC CAPITAL LETTER I WITH GRAVE
+040E; C; 045E; # CYRILLIC CAPITAL LETTER SHORT U
+040F; C; 045F; # CYRILLIC CAPITAL LETTER DZHE
+0410; C; 0430; # CYRILLIC CAPITAL LETTER A
+0411; C; 0431; # CYRILLIC CAPITAL LETTER BE
+0412; C; 0432; # CYRILLIC CAPITAL LETTER VE
+0413; C; 0433; # CYRILLIC CAPITAL LETTER GHE
+0414; C; 0434; # CYRILLIC CAPITAL LETTER DE
+0415; C; 0435; # CYRILLIC CAPITAL LETTER IE
+0416; C; 0436; # CYRILLIC CAPITAL LETTER ZHE
+0417; C; 0437; # CYRILLIC CAPITAL LETTER ZE
+0418; C; 0438; # CYRILLIC CAPITAL LETTER I
+0419; C; 0439; # CYRILLIC CAPITAL LETTER SHORT I
+041A; C; 043A; # CYRILLIC CAPITAL LETTER KA
+041B; C; 043B; # CYRILLIC CAPITAL LETTER EL
+041C; C; 043C; # CYRILLIC CAPITAL LETTER EM
+041D; C; 043D; # CYRILLIC CAPITAL LETTER EN
+041E; C; 043E; # CYRILLIC CAPITAL LETTER O
+041F; C; 043F; # CYRILLIC CAPITAL LETTER PE
+0420; C; 0440; # CYRILLIC CAPITAL LETTER ER
+0421; C; 0441; # CYRILLIC CAPITAL LETTER ES
+0422; C; 0442; # CYRILLIC CAPITAL LETTER TE
+0423; C; 0443; # CYRILLIC CAPITAL LETTER U
+0424; C; 0444; # CYRILLIC CAPITAL LETTER EF
+0425; C; 0445; # CYRILLIC CAPITAL LETTER HA
+0426; C; 0446; # CYRILLIC CAPITAL LETTER TSE
+0427; C; 0447; # CYRILLIC CAPITAL LETTER CHE
+0428; C; 0448; # CYRILLIC CAPITAL LETTER SHA
+0429; C; 0449; # CYRILLIC CAPITAL LETTER SHCHA
+042A; C; 044A; # CYRILLIC CAPITAL LETTER HARD SIGN
+042B; C; 044B; # CYRILLIC CAPITAL LETTER YERU
+042C; C; 044C; # CYRILLIC CAPITAL LETTER SOFT SIGN
+042D; C; 044D; # CYRILLIC CAPITAL LETTER E
+042E; C; 044E; # CYRILLIC CAPITAL LETTER YU
+042F; C; 044F; # CYRILLIC CAPITAL LETTER YA
+0460; C; 0461; # CYRILLIC CAPITAL LETTER OMEGA
+0462; C; 0463; # CYRILLIC CAPITAL LETTER YAT
+0464; C; 0465; # CYRILLIC CAPITAL LETTER IOTIFIED E
+0466; C; 0467; # CYRILLIC CAPITAL LETTER LITTLE YUS
+0468; C; 0469; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
+046A; C; 046B; # CYRILLIC CAPITAL LETTER BIG YUS
+046C; C; 046D; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
+046E; C; 046F; # CYRILLIC CAPITAL LETTER KSI
+0470; C; 0471; # CYRILLIC CAPITAL LETTER PSI
+0472; C; 0473; # CYRILLIC CAPITAL LETTER FITA
+0474; C; 0475; # CYRILLIC CAPITAL LETTER IZHITSA
+0476; C; 0477; # CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT
+0478; C; 0479; # CYRILLIC CAPITAL LETTER UK
+047A; C; 047B; # CYRILLIC CAPITAL LETTER ROUND OMEGA
+047C; C; 047D; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
+047E; C; 047F; # CYRILLIC CAPITAL LETTER OT
+0480; C; 0481; # CYRILLIC CAPITAL LETTER KOPPA
+048A; C; 048B; # CYRILLIC CAPITAL LETTER SHORT I WITH TAIL
+048C; C; 048D; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN
+048E; C; 048F; # CYRILLIC CAPITAL LETTER ER WITH TICK
+0490; C; 0491; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN
+0492; C; 0493; # CYRILLIC CAPITAL LETTER GHE WITH STROKE
+0494; C; 0495; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
+0496; C; 0497; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
+0498; C; 0499; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
+049A; C; 049B; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER
+049C; C; 049D; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
+049E; C; 049F; # CYRILLIC CAPITAL LETTER KA WITH STROKE
+04A0; C; 04A1; # CYRILLIC CAPITAL LETTER BASHKIR KA
+04A2; C; 04A3; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER
+04A4; C; 04A5; # CYRILLIC CAPITAL LIGATURE EN GHE
+04A6; C; 04A7; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
+04A8; C; 04A9; # CYRILLIC CAPITAL LETTER ABKHASIAN HA
+04AA; C; 04AB; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER
+04AC; C; 04AD; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER
+04AE; C; 04AF; # CYRILLIC CAPITAL LETTER STRAIGHT U
+04B0; C; 04B1; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
+04B2; C; 04B3; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER
+04B4; C; 04B5; # CYRILLIC CAPITAL LIGATURE TE TSE
+04B6; C; 04B7; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
+04B8; C; 04B9; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
+04BA; C; 04BB; # CYRILLIC CAPITAL LETTER SHHA
+04BC; C; 04BD; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE
+04BE; C; 04BF; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
+04C0; C; 04CF; # CYRILLIC LETTER PALOCHKA
+04C1; C; 04C2; # CYRILLIC CAPITAL LETTER ZHE WITH BREVE
+04C3; C; 04C4; # CYRILLIC CAPITAL LETTER KA WITH HOOK
+04C5; C; 04C6; # CYRILLIC CAPITAL LETTER EL WITH TAIL
+04C7; C; 04C8; # CYRILLIC CAPITAL LETTER EN WITH HOOK
+04C9; C; 04CA; # CYRILLIC CAPITAL LETTER EN WITH TAIL
+04CB; C; 04CC; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
+04CD; C; 04CE; # CYRILLIC CAPITAL LETTER EM WITH TAIL
+04D0; C; 04D1; # CYRILLIC CAPITAL LETTER A WITH BREVE
+04D2; C; 04D3; # CYRILLIC CAPITAL LETTER A WITH DIAERESIS
+04D4; C; 04D5; # CYRILLIC CAPITAL LIGATURE A IE
+04D6; C; 04D7; # CYRILLIC CAPITAL LETTER IE WITH BREVE
+04D8; C; 04D9; # CYRILLIC CAPITAL LETTER SCHWA
+04DA; C; 04DB; # CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS
+04DC; C; 04DD; # CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS
+04DE; C; 04DF; # CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS
+04E0; C; 04E1; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE
+04E2; C; 04E3; # CYRILLIC CAPITAL LETTER I WITH MACRON
+04E4; C; 04E5; # CYRILLIC CAPITAL LETTER I WITH DIAERESIS
+04E6; C; 04E7; # CYRILLIC CAPITAL LETTER O WITH DIAERESIS
+04E8; C; 04E9; # CYRILLIC CAPITAL LETTER BARRED O
+04EA; C; 04EB; # CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS
+04EC; C; 04ED; # CYRILLIC CAPITAL LETTER E WITH DIAERESIS
+04EE; C; 04EF; # CYRILLIC CAPITAL LETTER U WITH MACRON
+04F0; C; 04F1; # CYRILLIC CAPITAL LETTER U WITH DIAERESIS
+04F2; C; 04F3; # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE
+04F4; C; 04F5; # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS
+04F6; C; 04F7; # CYRILLIC CAPITAL LETTER GHE WITH DESCENDER
+04F8; C; 04F9; # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS
+04FA; C; 04FB; # CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK
+04FC; C; 04FD; # CYRILLIC CAPITAL LETTER HA WITH HOOK
+04FE; C; 04FF; # CYRILLIC CAPITAL LETTER HA WITH STROKE
+0500; C; 0501; # CYRILLIC CAPITAL LETTER KOMI DE
+0502; C; 0503; # CYRILLIC CAPITAL LETTER KOMI DJE
+0504; C; 0505; # CYRILLIC CAPITAL LETTER KOMI ZJE
+0506; C; 0507; # CYRILLIC CAPITAL LETTER KOMI DZJE
+0508; C; 0509; # CYRILLIC CAPITAL LETTER KOMI LJE
+050A; C; 050B; # CYRILLIC CAPITAL LETTER KOMI NJE
+050C; C; 050D; # CYRILLIC CAPITAL LETTER KOMI SJE
+050E; C; 050F; # CYRILLIC CAPITAL LETTER KOMI TJE
+0510; C; 0511; # CYRILLIC CAPITAL LETTER REVERSED ZE
+0512; C; 0513; # CYRILLIC CAPITAL LETTER EL WITH HOOK
+0514; C; 0515; # CYRILLIC CAPITAL LETTER LHA
+0516; C; 0517; # CYRILLIC CAPITAL LETTER RHA
+0518; C; 0519; # CYRILLIC CAPITAL LETTER YAE
+051A; C; 051B; # CYRILLIC CAPITAL LETTER QA
+051C; C; 051D; # CYRILLIC CAPITAL LETTER WE
+051E; C; 051F; # CYRILLIC CAPITAL LETTER ALEUT KA
+0520; C; 0521; # CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK
+0522; C; 0523; # CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK
+0524; C; 0525; # CYRILLIC CAPITAL LETTER PE WITH DESCENDER
+0526; C; 0527; # CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER
+0528; C; 0529; # CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK
+052A; C; 052B; # CYRILLIC CAPITAL LETTER DZZHE
+052C; C; 052D; # CYRILLIC CAPITAL LETTER DCHE
+052E; C; 052F; # CYRILLIC CAPITAL LETTER EL WITH DESCENDER
+0531; C; 0561; # ARMENIAN CAPITAL LETTER AYB
+0532; C; 0562; # ARMENIAN CAPITAL LETTER BEN
+0533; C; 0563; # ARMENIAN CAPITAL LETTER GIM
+0534; C; 0564; # ARMENIAN CAPITAL LETTER DA
+0535; C; 0565; # ARMENIAN CAPITAL LETTER ECH
+0536; C; 0566; # ARMENIAN CAPITAL LETTER ZA
+0537; C; 0567; # ARMENIAN CAPITAL LETTER EH
+0538; C; 0568; # ARMENIAN CAPITAL LETTER ET
+0539; C; 0569; # ARMENIAN CAPITAL LETTER TO
+053A; C; 056A; # ARMENIAN CAPITAL LETTER ZHE
+053B; C; 056B; # ARMENIAN CAPITAL LETTER INI
+053C; C; 056C; # ARMENIAN CAPITAL LETTER LIWN
+053D; C; 056D; # ARMENIAN CAPITAL LETTER XEH
+053E; C; 056E; # ARMENIAN CAPITAL LETTER CA
+053F; C; 056F; # ARMENIAN CAPITAL LETTER KEN
+0540; C; 0570; # ARMENIAN CAPITAL LETTER HO
+0541; C; 0571; # ARMENIAN CAPITAL LETTER JA
+0542; C; 0572; # ARMENIAN CAPITAL LETTER GHAD
+0543; C; 0573; # ARMENIAN CAPITAL LETTER CHEH
+0544; C; 0574; # ARMENIAN CAPITAL LETTER MEN
+0545; C; 0575; # ARMENIAN CAPITAL LETTER YI
+0546; C; 0576; # ARMENIAN CAPITAL LETTER NOW
+0547; C; 0577; # ARMENIAN CAPITAL LETTER SHA
+0548; C; 0578; # ARMENIAN CAPITAL LETTER VO
+0549; C; 0579; # ARMENIAN CAPITAL LETTER CHA
+054A; C; 057A; # ARMENIAN CAPITAL LETTER PEH
+054B; C; 057B; # ARMENIAN CAPITAL LETTER JHEH
+054C; C; 057C; # ARMENIAN CAPITAL LETTER RA
+054D; C; 057D; # ARMENIAN CAPITAL LETTER SEH
+054E; C; 057E; # ARMENIAN CAPITAL LETTER VEW
+054F; C; 057F; # ARMENIAN CAPITAL LETTER TIWN
+0550; C; 0580; # ARMENIAN CAPITAL LETTER REH
+0551; C; 0581; # ARMENIAN CAPITAL LETTER CO
+0552; C; 0582; # ARMENIAN CAPITAL LETTER YIWN
+0553; C; 0583; # ARMENIAN CAPITAL LETTER PIWR
+0554; C; 0584; # ARMENIAN CAPITAL LETTER KEH
+0555; C; 0585; # ARMENIAN CAPITAL LETTER OH
+0556; C; 0586; # ARMENIAN CAPITAL LETTER FEH
+0587; F; 0565 0582; # ARMENIAN SMALL LIGATURE ECH YIWN
+10A0; C; 2D00; # GEORGIAN CAPITAL LETTER AN
+10A1; C; 2D01; # GEORGIAN CAPITAL LETTER BAN
+10A2; C; 2D02; # GEORGIAN CAPITAL LETTER GAN
+10A3; C; 2D03; # GEORGIAN CAPITAL LETTER DON
+10A4; C; 2D04; # GEORGIAN CAPITAL LETTER EN
+10A5; C; 2D05; # GEORGIAN CAPITAL LETTER VIN
+10A6; C; 2D06; # GEORGIAN CAPITAL LETTER ZEN
+10A7; C; 2D07; # GEORGIAN CAPITAL LETTER TAN
+10A8; C; 2D08; # GEORGIAN CAPITAL LETTER IN
+10A9; C; 2D09; # GEORGIAN CAPITAL LETTER KAN
+10AA; C; 2D0A; # GEORGIAN CAPITAL LETTER LAS
+10AB; C; 2D0B; # GEORGIAN CAPITAL LETTER MAN
+10AC; C; 2D0C; # GEORGIAN CAPITAL LETTER NAR
+10AD; C; 2D0D; # GEORGIAN CAPITAL LETTER ON
+10AE; C; 2D0E; # GEORGIAN CAPITAL LETTER PAR
+10AF; C; 2D0F; # GEORGIAN CAPITAL LETTER ZHAR
+10B0; C; 2D10; # GEORGIAN CAPITAL LETTER RAE
+10B1; C; 2D11; # GEORGIAN CAPITAL LETTER SAN
+10B2; C; 2D12; # GEORGIAN CAPITAL LETTER TAR
+10B3; C; 2D13; # GEORGIAN CAPITAL LETTER UN
+10B4; C; 2D14; # GEORGIAN CAPITAL LETTER PHAR
+10B5; C; 2D15; # GEORGIAN CAPITAL LETTER KHAR
+10B6; C; 2D16; # GEORGIAN CAPITAL LETTER GHAN
+10B7; C; 2D17; # GEORGIAN CAPITAL LETTER QAR
+10B8; C; 2D18; # GEORGIAN CAPITAL LETTER SHIN
+10B9; C; 2D19; # GEORGIAN CAPITAL LETTER CHIN
+10BA; C; 2D1A; # GEORGIAN CAPITAL LETTER CAN
+10BB; C; 2D1B; # GEORGIAN CAPITAL LETTER JIL
+10BC; C; 2D1C; # GEORGIAN CAPITAL LETTER CIL
+10BD; C; 2D1D; # GEORGIAN CAPITAL LETTER CHAR
+10BE; C; 2D1E; # GEORGIAN CAPITAL LETTER XAN
+10BF; C; 2D1F; # GEORGIAN CAPITAL LETTER JHAN
+10C0; C; 2D20; # GEORGIAN CAPITAL LETTER HAE
+10C1; C; 2D21; # GEORGIAN CAPITAL LETTER HE
+10C2; C; 2D22; # GEORGIAN CAPITAL LETTER HIE
+10C3; C; 2D23; # GEORGIAN CAPITAL LETTER WE
+10C4; C; 2D24; # GEORGIAN CAPITAL LETTER HAR
+10C5; C; 2D25; # GEORGIAN CAPITAL LETTER HOE
+10C7; C; 2D27; # GEORGIAN CAPITAL LETTER YN
+10CD; C; 2D2D; # GEORGIAN CAPITAL LETTER AEN
+13F8; C; 13F0; # CHEROKEE SMALL LETTER YE
+13F9; C; 13F1; # CHEROKEE SMALL LETTER YI
+13FA; C; 13F2; # CHEROKEE SMALL LETTER YO
+13FB; C; 13F3; # CHEROKEE SMALL LETTER YU
+13FC; C; 13F4; # CHEROKEE SMALL LETTER YV
+13FD; C; 13F5; # CHEROKEE SMALL LETTER MV
+1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW
+1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE
+1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW
+1E06; C; 1E07; # LATIN CAPITAL LETTER B WITH LINE BELOW
+1E08; C; 1E09; # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
+1E0A; C; 1E0B; # LATIN CAPITAL LETTER D WITH DOT ABOVE
+1E0C; C; 1E0D; # LATIN CAPITAL LETTER D WITH DOT BELOW
+1E0E; C; 1E0F; # LATIN CAPITAL LETTER D WITH LINE BELOW
+1E10; C; 1E11; # LATIN CAPITAL LETTER D WITH CEDILLA
+1E12; C; 1E13; # LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW
+1E14; C; 1E15; # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
+1E16; C; 1E17; # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
+1E18; C; 1E19; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW
+1E1A; C; 1E1B; # LATIN CAPITAL LETTER E WITH TILDE BELOW
+1E1C; C; 1E1D; # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
+1E1E; C; 1E1F; # LATIN CAPITAL LETTER F WITH DOT ABOVE
+1E20; C; 1E21; # LATIN CAPITAL LETTER G WITH MACRON
+1E22; C; 1E23; # LATIN CAPITAL LETTER H WITH DOT ABOVE
+1E24; C; 1E25; # LATIN CAPITAL LETTER H WITH DOT BELOW
+1E26; C; 1E27; # LATIN CAPITAL LETTER H WITH DIAERESIS
+1E28; C; 1E29; # LATIN CAPITAL LETTER H WITH CEDILLA
+1E2A; C; 1E2B; # LATIN CAPITAL LETTER H WITH BREVE BELOW
+1E2C; C; 1E2D; # LATIN CAPITAL LETTER I WITH TILDE BELOW
+1E2E; C; 1E2F; # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
+1E30; C; 1E31; # LATIN CAPITAL LETTER K WITH ACUTE
+1E32; C; 1E33; # LATIN CAPITAL LETTER K WITH DOT BELOW
+1E34; C; 1E35; # LATIN CAPITAL LETTER K WITH LINE BELOW
+1E36; C; 1E37; # LATIN CAPITAL LETTER L WITH DOT BELOW
+1E38; C; 1E39; # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
+1E3A; C; 1E3B; # LATIN CAPITAL LETTER L WITH LINE BELOW
+1E3C; C; 1E3D; # LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW
+1E3E; C; 1E3F; # LATIN CAPITAL LETTER M WITH ACUTE
+1E40; C; 1E41; # LATIN CAPITAL LETTER M WITH DOT ABOVE
+1E42; C; 1E43; # LATIN CAPITAL LETTER M WITH DOT BELOW
+1E44; C; 1E45; # LATIN CAPITAL LETTER N WITH DOT ABOVE
+1E46; C; 1E47; # LATIN CAPITAL LETTER N WITH DOT BELOW
+1E48; C; 1E49; # LATIN CAPITAL LETTER N WITH LINE BELOW
+1E4A; C; 1E4B; # LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW
+1E4C; C; 1E4D; # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
+1E4E; C; 1E4F; # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
+1E50; C; 1E51; # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
+1E52; C; 1E53; # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
+1E54; C; 1E55; # LATIN CAPITAL LETTER P WITH ACUTE
+1E56; C; 1E57; # LATIN CAPITAL LETTER P WITH DOT ABOVE
+1E58; C; 1E59; # LATIN CAPITAL LETTER R WITH DOT ABOVE
+1E5A; C; 1E5B; # LATIN CAPITAL LETTER R WITH DOT BELOW
+1E5C; C; 1E5D; # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
+1E5E; C; 1E5F; # LATIN CAPITAL LETTER R WITH LINE BELOW
+1E60; C; 1E61; # LATIN CAPITAL LETTER S WITH DOT ABOVE
+1E62; C; 1E63; # LATIN CAPITAL LETTER S WITH DOT BELOW
+1E64; C; 1E65; # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
+1E66; C; 1E67; # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
+1E68; C; 1E69; # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
+1E6A; C; 1E6B; # LATIN CAPITAL LETTER T WITH DOT ABOVE
+1E6C; C; 1E6D; # LATIN CAPITAL LETTER T WITH DOT BELOW
+1E6E; C; 1E6F; # LATIN CAPITAL LETTER T WITH LINE BELOW
+1E70; C; 1E71; # LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW
+1E72; C; 1E73; # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
+1E74; C; 1E75; # LATIN CAPITAL LETTER U WITH TILDE BELOW
+1E76; C; 1E77; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW
+1E78; C; 1E79; # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
+1E7A; C; 1E7B; # LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS
+1E7C; C; 1E7D; # LATIN CAPITAL LETTER V WITH TILDE
+1E7E; C; 1E7F; # LATIN CAPITAL LETTER V WITH DOT BELOW
+1E80; C; 1E81; # LATIN CAPITAL LETTER W WITH GRAVE
+1E82; C; 1E83; # LATIN CAPITAL LETTER W WITH ACUTE
+1E84; C; 1E85; # LATIN CAPITAL LETTER W WITH DIAERESIS
+1E86; C; 1E87; # LATIN CAPITAL LETTER W WITH DOT ABOVE
+1E88; C; 1E89; # LATIN CAPITAL LETTER W WITH DOT BELOW
+1E8A; C; 1E8B; # LATIN CAPITAL LETTER X WITH DOT ABOVE
+1E8C; C; 1E8D; # LATIN CAPITAL LETTER X WITH DIAERESIS
+1E8E; C; 1E8F; # LATIN CAPITAL LETTER Y WITH DOT ABOVE
+1E90; C; 1E91; # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
+1E92; C; 1E93; # LATIN CAPITAL LETTER Z WITH DOT BELOW
+1E94; C; 1E95; # LATIN CAPITAL LETTER Z WITH LINE BELOW
+1E96; F; 0068 0331; # LATIN SMALL LETTER H WITH LINE BELOW
+1E97; F; 0074 0308; # LATIN SMALL LETTER T WITH DIAERESIS
+1E98; F; 0077 030A; # LATIN SMALL LETTER W WITH RING ABOVE
+1E99; F; 0079 030A; # LATIN SMALL LETTER Y WITH RING ABOVE
+1E9A; F; 0061 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING
+1E9B; C; 1E61; # LATIN SMALL LETTER LONG S WITH DOT ABOVE
+1E9E; F; 0073 0073; # LATIN CAPITAL LETTER SHARP S
+1E9E; S; 00DF; # LATIN CAPITAL LETTER SHARP S
+1EA0; C; 1EA1; # LATIN CAPITAL LETTER A WITH DOT BELOW
+1EA2; C; 1EA3; # LATIN CAPITAL LETTER A WITH HOOK ABOVE
+1EA4; C; 1EA5; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
+1EA6; C; 1EA7; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
+1EA8; C; 1EA9; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
+1EAA; C; 1EAB; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
+1EAC; C; 1EAD; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
+1EAE; C; 1EAF; # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
+1EB0; C; 1EB1; # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
+1EB2; C; 1EB3; # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
+1EB4; C; 1EB5; # LATIN CAPITAL LETTER A WITH BREVE AND TILDE
+1EB6; C; 1EB7; # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
+1EB8; C; 1EB9; # LATIN CAPITAL LETTER E WITH DOT BELOW
+1EBA; C; 1EBB; # LATIN CAPITAL LETTER E WITH HOOK ABOVE
+1EBC; C; 1EBD; # LATIN CAPITAL LETTER E WITH TILDE
+1EBE; C; 1EBF; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
+1EC0; C; 1EC1; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
+1EC2; C; 1EC3; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
+1EC4; C; 1EC5; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
+1EC6; C; 1EC7; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
+1EC8; C; 1EC9; # LATIN CAPITAL LETTER I WITH HOOK ABOVE
+1ECA; C; 1ECB; # LATIN CAPITAL LETTER I WITH DOT BELOW
+1ECC; C; 1ECD; # LATIN CAPITAL LETTER O WITH DOT BELOW
+1ECE; C; 1ECF; # LATIN CAPITAL LETTER O WITH HOOK ABOVE
+1ED0; C; 1ED1; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
+1ED2; C; 1ED3; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
+1ED4; C; 1ED5; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
+1ED6; C; 1ED7; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
+1ED8; C; 1ED9; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
+1EDA; C; 1EDB; # LATIN CAPITAL LETTER O WITH HORN AND ACUTE
+1EDC; C; 1EDD; # LATIN CAPITAL LETTER O WITH HORN AND GRAVE
+1EDE; C; 1EDF; # LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE
+1EE0; C; 1EE1; # LATIN CAPITAL LETTER O WITH HORN AND TILDE
+1EE2; C; 1EE3; # LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW
+1EE4; C; 1EE5; # LATIN CAPITAL LETTER U WITH DOT BELOW
+1EE6; C; 1EE7; # LATIN CAPITAL LETTER U WITH HOOK ABOVE
+1EE8; C; 1EE9; # LATIN CAPITAL LETTER U WITH HORN AND ACUTE
+1EEA; C; 1EEB; # LATIN CAPITAL LETTER U WITH HORN AND GRAVE
+1EEC; C; 1EED; # LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE
+1EEE; C; 1EEF; # LATIN CAPITAL LETTER U WITH HORN AND TILDE
+1EF0; C; 1EF1; # LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW
+1EF2; C; 1EF3; # LATIN CAPITAL LETTER Y WITH GRAVE
+1EF4; C; 1EF5; # LATIN CAPITAL LETTER Y WITH DOT BELOW
+1EF6; C; 1EF7; # LATIN CAPITAL LETTER Y WITH HOOK ABOVE
+1EF8; C; 1EF9; # LATIN CAPITAL LETTER Y WITH TILDE
+1EFA; C; 1EFB; # LATIN CAPITAL LETTER MIDDLE-WELSH LL
+1EFC; C; 1EFD; # LATIN CAPITAL LETTER MIDDLE-WELSH V
+1EFE; C; 1EFF; # LATIN CAPITAL LETTER Y WITH LOOP
+1F08; C; 1F00; # GREEK CAPITAL LETTER ALPHA WITH PSILI
+1F09; C; 1F01; # GREEK CAPITAL LETTER ALPHA WITH DASIA
+1F0A; C; 1F02; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA
+1F0B; C; 1F03; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA
+1F0C; C; 1F04; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA
+1F0D; C; 1F05; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA
+1F0E; C; 1F06; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI
+1F0F; C; 1F07; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI
+1F18; C; 1F10; # GREEK CAPITAL LETTER EPSILON WITH PSILI
+1F19; C; 1F11; # GREEK CAPITAL LETTER EPSILON WITH DASIA
+1F1A; C; 1F12; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA
+1F1B; C; 1F13; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA
+1F1C; C; 1F14; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA
+1F1D; C; 1F15; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
+1F28; C; 1F20; # GREEK CAPITAL LETTER ETA WITH PSILI
+1F29; C; 1F21; # GREEK CAPITAL LETTER ETA WITH DASIA
+1F2A; C; 1F22; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA
+1F2B; C; 1F23; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA
+1F2C; C; 1F24; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA
+1F2D; C; 1F25; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA
+1F2E; C; 1F26; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI
+1F2F; C; 1F27; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI
+1F38; C; 1F30; # GREEK CAPITAL LETTER IOTA WITH PSILI
+1F39; C; 1F31; # GREEK CAPITAL LETTER IOTA WITH DASIA
+1F3A; C; 1F32; # GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA
+1F3B; C; 1F33; # GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA
+1F3C; C; 1F34; # GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA
+1F3D; C; 1F35; # GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA
+1F3E; C; 1F36; # GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI
+1F3F; C; 1F37; # GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI
+1F48; C; 1F40; # GREEK CAPITAL LETTER OMICRON WITH PSILI
+1F49; C; 1F41; # GREEK CAPITAL LETTER OMICRON WITH DASIA
+1F4A; C; 1F42; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA
+1F4B; C; 1F43; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA
+1F4C; C; 1F44; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA
+1F4D; C; 1F45; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
+1F50; F; 03C5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI
+1F52; F; 03C5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
+1F54; F; 03C5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
+1F56; F; 03C5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI
+1F59; C; 1F51; # GREEK CAPITAL LETTER UPSILON WITH DASIA
+1F5B; C; 1F53; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
+1F5D; C; 1F55; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
+1F5F; C; 1F57; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI
+1F68; C; 1F60; # GREEK CAPITAL LETTER OMEGA WITH PSILI
+1F69; C; 1F61; # GREEK CAPITAL LETTER OMEGA WITH DASIA
+1F6A; C; 1F62; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA
+1F6B; C; 1F63; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA
+1F6C; C; 1F64; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA
+1F6D; C; 1F65; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA
+1F6E; C; 1F66; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI
+1F6F; C; 1F67; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI
+1F80; F; 1F00 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
+1F81; F; 1F01 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI
+1F82; F; 1F02 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+1F83; F; 1F03 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI
+1F84; F; 1F04 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI
+1F85; F; 1F05 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI
+1F86; F; 1F06 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
+1F87; F; 1F07 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+1F88; F; 1F00 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
+1F88; S; 1F80; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
+1F89; F; 1F01 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
+1F89; S; 1F81; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
+1F8A; F; 1F02 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1F8A; S; 1F82; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1F8B; F; 1F03 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1F8B; S; 1F83; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1F8C; F; 1F04 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1F8C; S; 1F84; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1F8D; F; 1F05 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1F8D; S; 1F85; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1F8E; F; 1F06 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1F8E; S; 1F86; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1F8F; F; 1F07 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1F8F; S; 1F87; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1F90; F; 1F20 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI
+1F91; F; 1F21 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI
+1F92; F; 1F22 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+1F93; F; 1F23 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI
+1F94; F; 1F24 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI
+1F95; F; 1F25 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI
+1F96; F; 1F26 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
+1F97; F; 1F27 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+1F98; F; 1F20 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
+1F98; S; 1F90; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
+1F99; F; 1F21 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
+1F99; S; 1F91; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
+1F9A; F; 1F22 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1F9A; S; 1F92; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1F9B; F; 1F23 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1F9B; S; 1F93; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1F9C; F; 1F24 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1F9C; S; 1F94; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1F9D; F; 1F25 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1F9D; S; 1F95; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1F9E; F; 1F26 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1F9E; S; 1F96; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1F9F; F; 1F27 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1F9F; S; 1F97; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1FA0; F; 1F60 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI
+1FA1; F; 1F61 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI
+1FA2; F; 1F62 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
+1FA3; F; 1F63 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI
+1FA4; F; 1F64 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI
+1FA5; F; 1F65 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI
+1FA6; F; 1F66 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
+1FA7; F; 1F67 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
+1FA8; F; 1F60 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
+1FA8; S; 1FA0; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
+1FA9; F; 1F61 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
+1FA9; S; 1FA1; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
+1FAA; F; 1F62 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1FAA; S; 1FA2; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
+1FAB; F; 1F63 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1FAB; S; 1FA3; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
+1FAC; F; 1F64 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1FAC; S; 1FA4; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
+1FAD; F; 1F65 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1FAD; S; 1FA5; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
+1FAE; F; 1F66 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1FAE; S; 1FA6; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
+1FAF; F; 1F67 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1FAF; S; 1FA7; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
+1FB2; F; 1F70 03B9; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
+1FB3; F; 03B1 03B9; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
+1FB4; F; 03AC 03B9; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
+1FB6; F; 03B1 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI
+1FB7; F; 03B1 0342 03B9; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
+1FB8; C; 1FB0; # GREEK CAPITAL LETTER ALPHA WITH VRACHY
+1FB9; C; 1FB1; # GREEK CAPITAL LETTER ALPHA WITH MACRON
+1FBA; C; 1F70; # GREEK CAPITAL LETTER ALPHA WITH VARIA
+1FBB; C; 1F71; # GREEK CAPITAL LETTER ALPHA WITH OXIA
+1FBC; F; 03B1 03B9; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
+1FBC; S; 1FB3; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
+1FBE; C; 03B9; # GREEK PROSGEGRAMMENI
+1FC2; F; 1F74 03B9; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI
+1FC3; F; 03B7 03B9; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI
+1FC4; F; 03AE 03B9; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
+1FC6; F; 03B7 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI
+1FC7; F; 03B7 0342 03B9; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
+1FC8; C; 1F72; # GREEK CAPITAL LETTER EPSILON WITH VARIA
+1FC9; C; 1F73; # GREEK CAPITAL LETTER EPSILON WITH OXIA
+1FCA; C; 1F74; # GREEK CAPITAL LETTER ETA WITH VARIA
+1FCB; C; 1F75; # GREEK CAPITAL LETTER ETA WITH OXIA
+1FCC; F; 03B7 03B9; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
+1FCC; S; 1FC3; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
+1FD2; F; 03B9 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
+1FD3; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
+1FD6; F; 03B9 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI
+1FD7; F; 03B9 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
+1FD8; C; 1FD0; # GREEK CAPITAL LETTER IOTA WITH VRACHY
+1FD9; C; 1FD1; # GREEK CAPITAL LETTER IOTA WITH MACRON
+1FDA; C; 1F76; # GREEK CAPITAL LETTER IOTA WITH VARIA
+1FDB; C; 1F77; # GREEK CAPITAL LETTER IOTA WITH OXIA
+1FE2; F; 03C5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
+1FE3; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
+1FE4; F; 03C1 0313; # GREEK SMALL LETTER RHO WITH PSILI
+1FE6; F; 03C5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI
+1FE7; F; 03C5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
+1FE8; C; 1FE0; # GREEK CAPITAL LETTER UPSILON WITH VRACHY
+1FE9; C; 1FE1; # GREEK CAPITAL LETTER UPSILON WITH MACRON
+1FEA; C; 1F7A; # GREEK CAPITAL LETTER UPSILON WITH VARIA
+1FEB; C; 1F7B; # GREEK CAPITAL LETTER UPSILON WITH OXIA
+1FEC; C; 1FE5; # GREEK CAPITAL LETTER RHO WITH DASIA
+1FF2; F; 1F7C 03B9; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
+1FF3; F; 03C9 03B9; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI
+1FF4; F; 03CE 03B9; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
+1FF6; F; 03C9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI
+1FF7; F; 03C9 0342 03B9; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
+1FF8; C; 1F78; # GREEK CAPITAL LETTER OMICRON WITH VARIA
+1FF9; C; 1F79; # GREEK CAPITAL LETTER OMICRON WITH OXIA
+1FFA; C; 1F7C; # GREEK CAPITAL LETTER OMEGA WITH VARIA
+1FFB; C; 1F7D; # GREEK CAPITAL LETTER OMEGA WITH OXIA
+1FFC; F; 03C9 03B9; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
+1FFC; S; 1FF3; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
+2126; C; 03C9; # OHM SIGN
+212A; C; 006B; # KELVIN SIGN
+212B; C; 00E5; # ANGSTROM SIGN
+2132; C; 214E; # TURNED CAPITAL F
+2160; C; 2170; # ROMAN NUMERAL ONE
+2161; C; 2171; # ROMAN NUMERAL TWO
+2162; C; 2172; # ROMAN NUMERAL THREE
+2163; C; 2173; # ROMAN NUMERAL FOUR
+2164; C; 2174; # ROMAN NUMERAL FIVE
+2165; C; 2175; # ROMAN NUMERAL SIX
+2166; C; 2176; # ROMAN NUMERAL SEVEN
+2167; C; 2177; # ROMAN NUMERAL EIGHT
+2168; C; 2178; # ROMAN NUMERAL NINE
+2169; C; 2179; # ROMAN NUMERAL TEN
+216A; C; 217A; # ROMAN NUMERAL ELEVEN
+216B; C; 217B; # ROMAN NUMERAL TWELVE
+216C; C; 217C; # ROMAN NUMERAL FIFTY
+216D; C; 217D; # ROMAN NUMERAL ONE HUNDRED
+216E; C; 217E; # ROMAN NUMERAL FIVE HUNDRED
+216F; C; 217F; # ROMAN NUMERAL ONE THOUSAND
+2183; C; 2184; # ROMAN NUMERAL REVERSED ONE HUNDRED
+24B6; C; 24D0; # CIRCLED LATIN CAPITAL LETTER A
+24B7; C; 24D1; # CIRCLED LATIN CAPITAL LETTER B
+24B8; C; 24D2; # CIRCLED LATIN CAPITAL LETTER C
+24B9; C; 24D3; # CIRCLED LATIN CAPITAL LETTER D
+24BA; C; 24D4; # CIRCLED LATIN CAPITAL LETTER E
+24BB; C; 24D5; # CIRCLED LATIN CAPITAL LETTER F
+24BC; C; 24D6; # CIRCLED LATIN CAPITAL LETTER G
+24BD; C; 24D7; # CIRCLED LATIN CAPITAL LETTER H
+24BE; C; 24D8; # CIRCLED LATIN CAPITAL LETTER I
+24BF; C; 24D9; # CIRCLED LATIN CAPITAL LETTER J
+24C0; C; 24DA; # CIRCLED LATIN CAPITAL LETTER K
+24C1; C; 24DB; # CIRCLED LATIN CAPITAL LETTER L
+24C2; C; 24DC; # CIRCLED LATIN CAPITAL LETTER M
+24C3; C; 24DD; # CIRCLED LATIN CAPITAL LETTER N
+24C4; C; 24DE; # CIRCLED LATIN CAPITAL LETTER O
+24C5; C; 24DF; # CIRCLED LATIN CAPITAL LETTER P
+24C6; C; 24E0; # CIRCLED LATIN CAPITAL LETTER Q
+24C7; C; 24E1; # CIRCLED LATIN CAPITAL LETTER R
+24C8; C; 24E2; # CIRCLED LATIN CAPITAL LETTER S
+24C9; C; 24E3; # CIRCLED LATIN CAPITAL LETTER T
+24CA; C; 24E4; # CIRCLED LATIN CAPITAL LETTER U
+24CB; C; 24E5; # CIRCLED LATIN CAPITAL LETTER V
+24CC; C; 24E6; # CIRCLED LATIN CAPITAL LETTER W
+24CD; C; 24E7; # CIRCLED LATIN CAPITAL LETTER X
+24CE; C; 24E8; # CIRCLED LATIN CAPITAL LETTER Y
+24CF; C; 24E9; # CIRCLED LATIN CAPITAL LETTER Z
+2C00; C; 2C30; # GLAGOLITIC CAPITAL LETTER AZU
+2C01; C; 2C31; # GLAGOLITIC CAPITAL LETTER BUKY
+2C02; C; 2C32; # GLAGOLITIC CAPITAL LETTER VEDE
+2C03; C; 2C33; # GLAGOLITIC CAPITAL LETTER GLAGOLI
+2C04; C; 2C34; # GLAGOLITIC CAPITAL LETTER DOBRO
+2C05; C; 2C35; # GLAGOLITIC CAPITAL LETTER YESTU
+2C06; C; 2C36; # GLAGOLITIC CAPITAL LETTER ZHIVETE
+2C07; C; 2C37; # GLAGOLITIC CAPITAL LETTER DZELO
+2C08; C; 2C38; # GLAGOLITIC CAPITAL LETTER ZEMLJA
+2C09; C; 2C39; # GLAGOLITIC CAPITAL LETTER IZHE
+2C0A; C; 2C3A; # GLAGOLITIC CAPITAL LETTER INITIAL IZHE
+2C0B; C; 2C3B; # GLAGOLITIC CAPITAL LETTER I
+2C0C; C; 2C3C; # GLAGOLITIC CAPITAL LETTER DJERVI
+2C0D; C; 2C3D; # GLAGOLITIC CAPITAL LETTER KAKO
+2C0E; C; 2C3E; # GLAGOLITIC CAPITAL LETTER LJUDIJE
+2C0F; C; 2C3F; # GLAGOLITIC CAPITAL LETTER MYSLITE
+2C10; C; 2C40; # GLAGOLITIC CAPITAL LETTER NASHI
+2C11; C; 2C41; # GLAGOLITIC CAPITAL LETTER ONU
+2C12; C; 2C42; # GLAGOLITIC CAPITAL LETTER POKOJI
+2C13; C; 2C43; # GLAGOLITIC CAPITAL LETTER RITSI
+2C14; C; 2C44; # GLAGOLITIC CAPITAL LETTER SLOVO
+2C15; C; 2C45; # GLAGOLITIC CAPITAL LETTER TVRIDO
+2C16; C; 2C46; # GLAGOLITIC CAPITAL LETTER UKU
+2C17; C; 2C47; # GLAGOLITIC CAPITAL LETTER FRITU
+2C18; C; 2C48; # GLAGOLITIC CAPITAL LETTER HERU
+2C19; C; 2C49; # GLAGOLITIC CAPITAL LETTER OTU
+2C1A; C; 2C4A; # GLAGOLITIC CAPITAL LETTER PE
+2C1B; C; 2C4B; # GLAGOLITIC CAPITAL LETTER SHTA
+2C1C; C; 2C4C; # GLAGOLITIC CAPITAL LETTER TSI
+2C1D; C; 2C4D; # GLAGOLITIC CAPITAL LETTER CHRIVI
+2C1E; C; 2C4E; # GLAGOLITIC CAPITAL LETTER SHA
+2C1F; C; 2C4F; # GLAGOLITIC CAPITAL LETTER YERU
+2C20; C; 2C50; # GLAGOLITIC CAPITAL LETTER YERI
+2C21; C; 2C51; # GLAGOLITIC CAPITAL LETTER YATI
+2C22; C; 2C52; # GLAGOLITIC CAPITAL LETTER SPIDERY HA
+2C23; C; 2C53; # GLAGOLITIC CAPITAL LETTER YU
+2C24; C; 2C54; # GLAGOLITIC CAPITAL LETTER SMALL YUS
+2C25; C; 2C55; # GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL
+2C26; C; 2C56; # GLAGOLITIC CAPITAL LETTER YO
+2C27; C; 2C57; # GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS
+2C28; C; 2C58; # GLAGOLITIC CAPITAL LETTER BIG YUS
+2C29; C; 2C59; # GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS
+2C2A; C; 2C5A; # GLAGOLITIC CAPITAL LETTER FITA
+2C2B; C; 2C5B; # GLAGOLITIC CAPITAL LETTER IZHITSA
+2C2C; C; 2C5C; # GLAGOLITIC CAPITAL LETTER SHTAPIC
+2C2D; C; 2C5D; # GLAGOLITIC CAPITAL LETTER TROKUTASTI A
+2C2E; C; 2C5E; # GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
+2C60; C; 2C61; # LATIN CAPITAL LETTER L WITH DOUBLE BAR
+2C62; C; 026B; # LATIN CAPITAL LETTER L WITH MIDDLE TILDE
+2C63; C; 1D7D; # LATIN CAPITAL LETTER P WITH STROKE
+2C64; C; 027D; # LATIN CAPITAL LETTER R WITH TAIL
+2C67; C; 2C68; # LATIN CAPITAL LETTER H WITH DESCENDER
+2C69; C; 2C6A; # LATIN CAPITAL LETTER K WITH DESCENDER
+2C6B; C; 2C6C; # LATIN CAPITAL LETTER Z WITH DESCENDER
+2C6D; C; 0251; # LATIN CAPITAL LETTER ALPHA
+2C6E; C; 0271; # LATIN CAPITAL LETTER M WITH HOOK
+2C6F; C; 0250; # LATIN CAPITAL LETTER TURNED A
+2C70; C; 0252; # LATIN CAPITAL LETTER TURNED ALPHA
+2C72; C; 2C73; # LATIN CAPITAL LETTER W WITH HOOK
+2C75; C; 2C76; # LATIN CAPITAL LETTER HALF H
+2C7E; C; 023F; # LATIN CAPITAL LETTER S WITH SWASH TAIL
+2C7F; C; 0240; # LATIN CAPITAL LETTER Z WITH SWASH TAIL
+2C80; C; 2C81; # COPTIC CAPITAL LETTER ALFA
+2C82; C; 2C83; # COPTIC CAPITAL LETTER VIDA
+2C84; C; 2C85; # COPTIC CAPITAL LETTER GAMMA
+2C86; C; 2C87; # COPTIC CAPITAL LETTER DALDA
+2C88; C; 2C89; # COPTIC CAPITAL LETTER EIE
+2C8A; C; 2C8B; # COPTIC CAPITAL LETTER SOU
+2C8C; C; 2C8D; # COPTIC CAPITAL LETTER ZATA
+2C8E; C; 2C8F; # COPTIC CAPITAL LETTER HATE
+2C90; C; 2C91; # COPTIC CAPITAL LETTER THETHE
+2C92; C; 2C93; # COPTIC CAPITAL LETTER IAUDA
+2C94; C; 2C95; # COPTIC CAPITAL LETTER KAPA
+2C96; C; 2C97; # COPTIC CAPITAL LETTER LAULA
+2C98; C; 2C99; # COPTIC CAPITAL LETTER MI
+2C9A; C; 2C9B; # COPTIC CAPITAL LETTER NI
+2C9C; C; 2C9D; # COPTIC CAPITAL LETTER KSI
+2C9E; C; 2C9F; # COPTIC CAPITAL LETTER O
+2CA0; C; 2CA1; # COPTIC CAPITAL LETTER PI
+2CA2; C; 2CA3; # COPTIC CAPITAL LETTER RO
+2CA4; C; 2CA5; # COPTIC CAPITAL LETTER SIMA
+2CA6; C; 2CA7; # COPTIC CAPITAL LETTER TAU
+2CA8; C; 2CA9; # COPTIC CAPITAL LETTER UA
+2CAA; C; 2CAB; # COPTIC CAPITAL LETTER FI
+2CAC; C; 2CAD; # COPTIC CAPITAL LETTER KHI
+2CAE; C; 2CAF; # COPTIC CAPITAL LETTER PSI
+2CB0; C; 2CB1; # COPTIC CAPITAL LETTER OOU
+2CB2; C; 2CB3; # COPTIC CAPITAL LETTER DIALECT-P ALEF
+2CB4; C; 2CB5; # COPTIC CAPITAL LETTER OLD COPTIC AIN
+2CB6; C; 2CB7; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE
+2CB8; C; 2CB9; # COPTIC CAPITAL LETTER DIALECT-P KAPA
+2CBA; C; 2CBB; # COPTIC CAPITAL LETTER DIALECT-P NI
+2CBC; C; 2CBD; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI
+2CBE; C; 2CBF; # COPTIC CAPITAL LETTER OLD COPTIC OOU
+2CC0; C; 2CC1; # COPTIC CAPITAL LETTER SAMPI
+2CC2; C; 2CC3; # COPTIC CAPITAL LETTER CROSSED SHEI
+2CC4; C; 2CC5; # COPTIC CAPITAL LETTER OLD COPTIC SHEI
+2CC6; C; 2CC7; # COPTIC CAPITAL LETTER OLD COPTIC ESH
+2CC8; C; 2CC9; # COPTIC CAPITAL LETTER AKHMIMIC KHEI
+2CCA; C; 2CCB; # COPTIC CAPITAL LETTER DIALECT-P HORI
+2CCC; C; 2CCD; # COPTIC CAPITAL LETTER OLD COPTIC HORI
+2CCE; C; 2CCF; # COPTIC CAPITAL LETTER OLD COPTIC HA
+2CD0; C; 2CD1; # COPTIC CAPITAL LETTER L-SHAPED HA
+2CD2; C; 2CD3; # COPTIC CAPITAL LETTER OLD COPTIC HEI
+2CD4; C; 2CD5; # COPTIC CAPITAL LETTER OLD COPTIC HAT
+2CD6; C; 2CD7; # COPTIC CAPITAL LETTER OLD COPTIC GANGIA
+2CD8; C; 2CD9; # COPTIC CAPITAL LETTER OLD COPTIC DJA
+2CDA; C; 2CDB; # COPTIC CAPITAL LETTER OLD COPTIC SHIMA
+2CDC; C; 2CDD; # COPTIC CAPITAL LETTER OLD NUBIAN SHIMA
+2CDE; C; 2CDF; # COPTIC CAPITAL LETTER OLD NUBIAN NGI
+2CE0; C; 2CE1; # COPTIC CAPITAL LETTER OLD NUBIAN NYI
+2CE2; C; 2CE3; # COPTIC CAPITAL LETTER OLD NUBIAN WAU
+2CEB; C; 2CEC; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI
+2CED; C; 2CEE; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA
+2CF2; C; 2CF3; # COPTIC CAPITAL LETTER BOHAIRIC KHEI
+A640; C; A641; # CYRILLIC CAPITAL LETTER ZEMLYA
+A642; C; A643; # CYRILLIC CAPITAL LETTER DZELO
+A644; C; A645; # CYRILLIC CAPITAL LETTER REVERSED DZE
+A646; C; A647; # CYRILLIC CAPITAL LETTER IOTA
+A648; C; A649; # CYRILLIC CAPITAL LETTER DJERV
+A64A; C; A64B; # CYRILLIC CAPITAL LETTER MONOGRAPH UK
+A64C; C; A64D; # CYRILLIC CAPITAL LETTER BROAD OMEGA
+A64E; C; A64F; # CYRILLIC CAPITAL LETTER NEUTRAL YER
+A650; C; A651; # CYRILLIC CAPITAL LETTER YERU WITH BACK YER
+A652; C; A653; # CYRILLIC CAPITAL LETTER IOTIFIED YAT
+A654; C; A655; # CYRILLIC CAPITAL LETTER REVERSED YU
+A656; C; A657; # CYRILLIC CAPITAL LETTER IOTIFIED A
+A658; C; A659; # CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS
+A65A; C; A65B; # CYRILLIC CAPITAL LETTER BLENDED YUS
+A65C; C; A65D; # CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS
+A65E; C; A65F; # CYRILLIC CAPITAL LETTER YN
+A660; C; A661; # CYRILLIC CAPITAL LETTER REVERSED TSE
+A662; C; A663; # CYRILLIC CAPITAL LETTER SOFT DE
+A664; C; A665; # CYRILLIC CAPITAL LETTER SOFT EL
+A666; C; A667; # CYRILLIC CAPITAL LETTER SOFT EM
+A668; C; A669; # CYRILLIC CAPITAL LETTER MONOCULAR O
+A66A; C; A66B; # CYRILLIC CAPITAL LETTER BINOCULAR O
+A66C; C; A66D; # CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O
+A680; C; A681; # CYRILLIC CAPITAL LETTER DWE
+A682; C; A683; # CYRILLIC CAPITAL LETTER DZWE
+A684; C; A685; # CYRILLIC CAPITAL LETTER ZHWE
+A686; C; A687; # CYRILLIC CAPITAL LETTER CCHE
+A688; C; A689; # CYRILLIC CAPITAL LETTER DZZE
+A68A; C; A68B; # CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK
+A68C; C; A68D; # CYRILLIC CAPITAL LETTER TWE
+A68E; C; A68F; # CYRILLIC CAPITAL LETTER TSWE
+A690; C; A691; # CYRILLIC CAPITAL LETTER TSSE
+A692; C; A693; # CYRILLIC CAPITAL LETTER TCHE
+A694; C; A695; # CYRILLIC CAPITAL LETTER HWE
+A696; C; A697; # CYRILLIC CAPITAL LETTER SHWE
+A698; C; A699; # CYRILLIC CAPITAL LETTER DOUBLE O
+A69A; C; A69B; # CYRILLIC CAPITAL LETTER CROSSED O
+A722; C; A723; # LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF
+A724; C; A725; # LATIN CAPITAL LETTER EGYPTOLOGICAL AIN
+A726; C; A727; # LATIN CAPITAL LETTER HENG
+A728; C; A729; # LATIN CAPITAL LETTER TZ
+A72A; C; A72B; # LATIN CAPITAL LETTER TRESILLO
+A72C; C; A72D; # LATIN CAPITAL LETTER CUATRILLO
+A72E; C; A72F; # LATIN CAPITAL LETTER CUATRILLO WITH COMMA
+A732; C; A733; # LATIN CAPITAL LETTER AA
+A734; C; A735; # LATIN CAPITAL LETTER AO
+A736; C; A737; # LATIN CAPITAL LETTER AU
+A738; C; A739; # LATIN CAPITAL LETTER AV
+A73A; C; A73B; # LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR
+A73C; C; A73D; # LATIN CAPITAL LETTER AY
+A73E; C; A73F; # LATIN CAPITAL LETTER REVERSED C WITH DOT
+A740; C; A741; # LATIN CAPITAL LETTER K WITH STROKE
+A742; C; A743; # LATIN CAPITAL LETTER K WITH DIAGONAL STROKE
+A744; C; A745; # LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE
+A746; C; A747; # LATIN CAPITAL LETTER BROKEN L
+A748; C; A749; # LATIN CAPITAL LETTER L WITH HIGH STROKE
+A74A; C; A74B; # LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY
+A74C; C; A74D; # LATIN CAPITAL LETTER O WITH LOOP
+A74E; C; A74F; # LATIN CAPITAL LETTER OO
+A750; C; A751; # LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER
+A752; C; A753; # LATIN CAPITAL LETTER P WITH FLOURISH
+A754; C; A755; # LATIN CAPITAL LETTER P WITH SQUIRREL TAIL
+A756; C; A757; # LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER
+A758; C; A759; # LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE
+A75A; C; A75B; # LATIN CAPITAL LETTER R ROTUNDA
+A75C; C; A75D; # LATIN CAPITAL LETTER RUM ROTUNDA
+A75E; C; A75F; # LATIN CAPITAL LETTER V WITH DIAGONAL STROKE
+A760; C; A761; # LATIN CAPITAL LETTER VY
+A762; C; A763; # LATIN CAPITAL LETTER VISIGOTHIC Z
+A764; C; A765; # LATIN CAPITAL LETTER THORN WITH STROKE
+A766; C; A767; # LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER
+A768; C; A769; # LATIN CAPITAL LETTER VEND
+A76A; C; A76B; # LATIN CAPITAL LETTER ET
+A76C; C; A76D; # LATIN CAPITAL LETTER IS
+A76E; C; A76F; # LATIN CAPITAL LETTER CON
+A779; C; A77A; # LATIN CAPITAL LETTER INSULAR D
+A77B; C; A77C; # LATIN CAPITAL LETTER INSULAR F
+A77D; C; 1D79; # LATIN CAPITAL LETTER INSULAR G
+A77E; C; A77F; # LATIN CAPITAL LETTER TURNED INSULAR G
+A780; C; A781; # LATIN CAPITAL LETTER TURNED L
+A782; C; A783; # LATIN CAPITAL LETTER INSULAR R
+A784; C; A785; # LATIN CAPITAL LETTER INSULAR S
+A786; C; A787; # LATIN CAPITAL LETTER INSULAR T
+A78B; C; A78C; # LATIN CAPITAL LETTER SALTILLO
+A78D; C; 0265; # LATIN CAPITAL LETTER TURNED H
+A790; C; A791; # LATIN CAPITAL LETTER N WITH DESCENDER
+A792; C; A793; # LATIN CAPITAL LETTER C WITH BAR
+A796; C; A797; # LATIN CAPITAL LETTER B WITH FLOURISH
+A798; C; A799; # LATIN CAPITAL LETTER F WITH STROKE
+A79A; C; A79B; # LATIN CAPITAL LETTER VOLAPUK AE
+A79C; C; A79D; # LATIN CAPITAL LETTER VOLAPUK OE
+A79E; C; A79F; # LATIN CAPITAL LETTER VOLAPUK UE
+A7A0; C; A7A1; # LATIN CAPITAL LETTER G WITH OBLIQUE STROKE
+A7A2; C; A7A3; # LATIN CAPITAL LETTER K WITH OBLIQUE STROKE
+A7A4; C; A7A5; # LATIN CAPITAL LETTER N WITH OBLIQUE STROKE
+A7A6; C; A7A7; # LATIN CAPITAL LETTER R WITH OBLIQUE STROKE
+A7A8; C; A7A9; # LATIN CAPITAL LETTER S WITH OBLIQUE STROKE
+A7AA; C; 0266; # LATIN CAPITAL LETTER H WITH HOOK
+A7AB; C; 025C; # LATIN CAPITAL LETTER REVERSED OPEN E
+A7AC; C; 0261; # LATIN CAPITAL LETTER SCRIPT G
+A7AD; C; 026C; # LATIN CAPITAL LETTER L WITH BELT
+A7B0; C; 029E; # LATIN CAPITAL LETTER TURNED K
+A7B1; C; 0287; # LATIN CAPITAL LETTER TURNED T
+A7B2; C; 029D; # LATIN CAPITAL LETTER J WITH CROSSED-TAIL
+A7B3; C; AB53; # LATIN CAPITAL LETTER CHI
+A7B4; C; A7B5; # LATIN CAPITAL LETTER BETA
+A7B6; C; A7B7; # LATIN CAPITAL LETTER OMEGA
+AB70; C; 13A0; # CHEROKEE SMALL LETTER A
+AB71; C; 13A1; # CHEROKEE SMALL LETTER E
+AB72; C; 13A2; # CHEROKEE SMALL LETTER I
+AB73; C; 13A3; # CHEROKEE SMALL LETTER O
+AB74; C; 13A4; # CHEROKEE SMALL LETTER U
+AB75; C; 13A5; # CHEROKEE SMALL LETTER V
+AB76; C; 13A6; # CHEROKEE SMALL LETTER GA
+AB77; C; 13A7; # CHEROKEE SMALL LETTER KA
+AB78; C; 13A8; # CHEROKEE SMALL LETTER GE
+AB79; C; 13A9; # CHEROKEE SMALL LETTER GI
+AB7A; C; 13AA; # CHEROKEE SMALL LETTER GO
+AB7B; C; 13AB; # CHEROKEE SMALL LETTER GU
+AB7C; C; 13AC; # CHEROKEE SMALL LETTER GV
+AB7D; C; 13AD; # CHEROKEE SMALL LETTER HA
+AB7E; C; 13AE; # CHEROKEE SMALL LETTER HE
+AB7F; C; 13AF; # CHEROKEE SMALL LETTER HI
+AB80; C; 13B0; # CHEROKEE SMALL LETTER HO
+AB81; C; 13B1; # CHEROKEE SMALL LETTER HU
+AB82; C; 13B2; # CHEROKEE SMALL LETTER HV
+AB83; C; 13B3; # CHEROKEE SMALL LETTER LA
+AB84; C; 13B4; # CHEROKEE SMALL LETTER LE
+AB85; C; 13B5; # CHEROKEE SMALL LETTER LI
+AB86; C; 13B6; # CHEROKEE SMALL LETTER LO
+AB87; C; 13B7; # CHEROKEE SMALL LETTER LU
+AB88; C; 13B8; # CHEROKEE SMALL LETTER LV
+AB89; C; 13B9; # CHEROKEE SMALL LETTER MA
+AB8A; C; 13BA; # CHEROKEE SMALL LETTER ME
+AB8B; C; 13BB; # CHEROKEE SMALL LETTER MI
+AB8C; C; 13BC; # CHEROKEE SMALL LETTER MO
+AB8D; C; 13BD; # CHEROKEE SMALL LETTER MU
+AB8E; C; 13BE; # CHEROKEE SMALL LETTER NA
+AB8F; C; 13BF; # CHEROKEE SMALL LETTER HNA
+AB90; C; 13C0; # CHEROKEE SMALL LETTER NAH
+AB91; C; 13C1; # CHEROKEE SMALL LETTER NE
+AB92; C; 13C2; # CHEROKEE SMALL LETTER NI
+AB93; C; 13C3; # CHEROKEE SMALL LETTER NO
+AB94; C; 13C4; # CHEROKEE SMALL LETTER NU
+AB95; C; 13C5; # CHEROKEE SMALL LETTER NV
+AB96; C; 13C6; # CHEROKEE SMALL LETTER QUA
+AB97; C; 13C7; # CHEROKEE SMALL LETTER QUE
+AB98; C; 13C8; # CHEROKEE SMALL LETTER QUI
+AB99; C; 13C9; # CHEROKEE SMALL LETTER QUO
+AB9A; C; 13CA; # CHEROKEE SMALL LETTER QUU
+AB9B; C; 13CB; # CHEROKEE SMALL LETTER QUV
+AB9C; C; 13CC; # CHEROKEE SMALL LETTER SA
+AB9D; C; 13CD; # CHEROKEE SMALL LETTER S
+AB9E; C; 13CE; # CHEROKEE SMALL LETTER SE
+AB9F; C; 13CF; # CHEROKEE SMALL LETTER SI
+ABA0; C; 13D0; # CHEROKEE SMALL LETTER SO
+ABA1; C; 13D1; # CHEROKEE SMALL LETTER SU
+ABA2; C; 13D2; # CHEROKEE SMALL LETTER SV
+ABA3; C; 13D3; # CHEROKEE SMALL LETTER DA
+ABA4; C; 13D4; # CHEROKEE SMALL LETTER TA
+ABA5; C; 13D5; # CHEROKEE SMALL LETTER DE
+ABA6; C; 13D6; # CHEROKEE SMALL LETTER TE
+ABA7; C; 13D7; # CHEROKEE SMALL LETTER DI
+ABA8; C; 13D8; # CHEROKEE SMALL LETTER TI
+ABA9; C; 13D9; # CHEROKEE SMALL LETTER DO
+ABAA; C; 13DA; # CHEROKEE SMALL LETTER DU
+ABAB; C; 13DB; # CHEROKEE SMALL LETTER DV
+ABAC; C; 13DC; # CHEROKEE SMALL LETTER DLA
+ABAD; C; 13DD; # CHEROKEE SMALL LETTER TLA
+ABAE; C; 13DE; # CHEROKEE SMALL LETTER TLE
+ABAF; C; 13DF; # CHEROKEE SMALL LETTER TLI
+ABB0; C; 13E0; # CHEROKEE SMALL LETTER TLO
+ABB1; C; 13E1; # CHEROKEE SMALL LETTER TLU
+ABB2; C; 13E2; # CHEROKEE SMALL LETTER TLV
+ABB3; C; 13E3; # CHEROKEE SMALL LETTER TSA
+ABB4; C; 13E4; # CHEROKEE SMALL LETTER TSE
+ABB5; C; 13E5; # CHEROKEE SMALL LETTER TSI
+ABB6; C; 13E6; # CHEROKEE SMALL LETTER TSO
+ABB7; C; 13E7; # CHEROKEE SMALL LETTER TSU
+ABB8; C; 13E8; # CHEROKEE SMALL LETTER TSV
+ABB9; C; 13E9; # CHEROKEE SMALL LETTER WA
+ABBA; C; 13EA; # CHEROKEE SMALL LETTER WE
+ABBB; C; 13EB; # CHEROKEE SMALL LETTER WI
+ABBC; C; 13EC; # CHEROKEE SMALL LETTER WO
+ABBD; C; 13ED; # CHEROKEE SMALL LETTER WU
+ABBE; C; 13EE; # CHEROKEE SMALL LETTER WV
+ABBF; C; 13EF; # CHEROKEE SMALL LETTER YA
+FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF
+FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI
+FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL
+FB03; F; 0066 0066 0069; # LATIN SMALL LIGATURE FFI
+FB04; F; 0066 0066 006C; # LATIN SMALL LIGATURE FFL
+FB05; F; 0073 0074; # LATIN SMALL LIGATURE LONG S T
+FB06; F; 0073 0074; # LATIN SMALL LIGATURE ST
+FB13; F; 0574 0576; # ARMENIAN SMALL LIGATURE MEN NOW
+FB14; F; 0574 0565; # ARMENIAN SMALL LIGATURE MEN ECH
+FB15; F; 0574 056B; # ARMENIAN SMALL LIGATURE MEN INI
+FB16; F; 057E 0576; # ARMENIAN SMALL LIGATURE VEW NOW
+FB17; F; 0574 056D; # ARMENIAN SMALL LIGATURE MEN XEH
+FF21; C; FF41; # FULLWIDTH LATIN CAPITAL LETTER A
+FF22; C; FF42; # FULLWIDTH LATIN CAPITAL LETTER B
+FF23; C; FF43; # FULLWIDTH LATIN CAPITAL LETTER C
+FF24; C; FF44; # FULLWIDTH LATIN CAPITAL LETTER D
+FF25; C; FF45; # FULLWIDTH LATIN CAPITAL LETTER E
+FF26; C; FF46; # FULLWIDTH LATIN CAPITAL LETTER F
+FF27; C; FF47; # FULLWIDTH LATIN CAPITAL LETTER G
+FF28; C; FF48; # FULLWIDTH LATIN CAPITAL LETTER H
+FF29; C; FF49; # FULLWIDTH LATIN CAPITAL LETTER I
+FF2A; C; FF4A; # FULLWIDTH LATIN CAPITAL LETTER J
+FF2B; C; FF4B; # FULLWIDTH LATIN CAPITAL LETTER K
+FF2C; C; FF4C; # FULLWIDTH LATIN CAPITAL LETTER L
+FF2D; C; FF4D; # FULLWIDTH LATIN CAPITAL LETTER M
+FF2E; C; FF4E; # FULLWIDTH LATIN CAPITAL LETTER N
+FF2F; C; FF4F; # FULLWIDTH LATIN CAPITAL LETTER O
+FF30; C; FF50; # FULLWIDTH LATIN CAPITAL LETTER P
+FF31; C; FF51; # FULLWIDTH LATIN CAPITAL LETTER Q
+FF32; C; FF52; # FULLWIDTH LATIN CAPITAL LETTER R
+FF33; C; FF53; # FULLWIDTH LATIN CAPITAL LETTER S
+FF34; C; FF54; # FULLWIDTH LATIN CAPITAL LETTER T
+FF35; C; FF55; # FULLWIDTH LATIN CAPITAL LETTER U
+FF36; C; FF56; # FULLWIDTH LATIN CAPITAL LETTER V
+FF37; C; FF57; # FULLWIDTH LATIN CAPITAL LETTER W
+FF38; C; FF58; # FULLWIDTH LATIN CAPITAL LETTER X
+FF39; C; FF59; # FULLWIDTH LATIN CAPITAL LETTER Y
+FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
+10400; C; 10428; # DESERET CAPITAL LETTER LONG I
+10401; C; 10429; # DESERET CAPITAL LETTER LONG E
+10402; C; 1042A; # DESERET CAPITAL LETTER LONG A
+10403; C; 1042B; # DESERET CAPITAL LETTER LONG AH
+10404; C; 1042C; # DESERET CAPITAL LETTER LONG O
+10405; C; 1042D; # DESERET CAPITAL LETTER LONG OO
+10406; C; 1042E; # DESERET CAPITAL LETTER SHORT I
+10407; C; 1042F; # DESERET CAPITAL LETTER SHORT E
+10408; C; 10430; # DESERET CAPITAL LETTER SHORT A
+10409; C; 10431; # DESERET CAPITAL LETTER SHORT AH
+1040A; C; 10432; # DESERET CAPITAL LETTER SHORT O
+1040B; C; 10433; # DESERET CAPITAL LETTER SHORT OO
+1040C; C; 10434; # DESERET CAPITAL LETTER AY
+1040D; C; 10435; # DESERET CAPITAL LETTER OW
+1040E; C; 10436; # DESERET CAPITAL LETTER WU
+1040F; C; 10437; # DESERET CAPITAL LETTER YEE
+10410; C; 10438; # DESERET CAPITAL LETTER H
+10411; C; 10439; # DESERET CAPITAL LETTER PEE
+10412; C; 1043A; # DESERET CAPITAL LETTER BEE
+10413; C; 1043B; # DESERET CAPITAL LETTER TEE
+10414; C; 1043C; # DESERET CAPITAL LETTER DEE
+10415; C; 1043D; # DESERET CAPITAL LETTER CHEE
+10416; C; 1043E; # DESERET CAPITAL LETTER JEE
+10417; C; 1043F; # DESERET CAPITAL LETTER KAY
+10418; C; 10440; # DESERET CAPITAL LETTER GAY
+10419; C; 10441; # DESERET CAPITAL LETTER EF
+1041A; C; 10442; # DESERET CAPITAL LETTER VEE
+1041B; C; 10443; # DESERET CAPITAL LETTER ETH
+1041C; C; 10444; # DESERET CAPITAL LETTER THEE
+1041D; C; 10445; # DESERET CAPITAL LETTER ES
+1041E; C; 10446; # DESERET CAPITAL LETTER ZEE
+1041F; C; 10447; # DESERET CAPITAL LETTER ESH
+10420; C; 10448; # DESERET CAPITAL LETTER ZHEE
+10421; C; 10449; # DESERET CAPITAL LETTER ER
+10422; C; 1044A; # DESERET CAPITAL LETTER EL
+10423; C; 1044B; # DESERET CAPITAL LETTER EM
+10424; C; 1044C; # DESERET CAPITAL LETTER EN
+10425; C; 1044D; # DESERET CAPITAL LETTER ENG
+10426; C; 1044E; # DESERET CAPITAL LETTER OI
+10427; C; 1044F; # DESERET CAPITAL LETTER EW
+10C80; C; 10CC0; # OLD HUNGARIAN CAPITAL LETTER A
+10C81; C; 10CC1; # OLD HUNGARIAN CAPITAL LETTER AA
+10C82; C; 10CC2; # OLD HUNGARIAN CAPITAL LETTER EB
+10C83; C; 10CC3; # OLD HUNGARIAN CAPITAL LETTER AMB
+10C84; C; 10CC4; # OLD HUNGARIAN CAPITAL LETTER EC
+10C85; C; 10CC5; # OLD HUNGARIAN CAPITAL LETTER ENC
+10C86; C; 10CC6; # OLD HUNGARIAN CAPITAL LETTER ECS
+10C87; C; 10CC7; # OLD HUNGARIAN CAPITAL LETTER ED
+10C88; C; 10CC8; # OLD HUNGARIAN CAPITAL LETTER AND
+10C89; C; 10CC9; # OLD HUNGARIAN CAPITAL LETTER E
+10C8A; C; 10CCA; # OLD HUNGARIAN CAPITAL LETTER CLOSE E
+10C8B; C; 10CCB; # OLD HUNGARIAN CAPITAL LETTER EE
+10C8C; C; 10CCC; # OLD HUNGARIAN CAPITAL LETTER EF
+10C8D; C; 10CCD; # OLD HUNGARIAN CAPITAL LETTER EG
+10C8E; C; 10CCE; # OLD HUNGARIAN CAPITAL LETTER EGY
+10C8F; C; 10CCF; # OLD HUNGARIAN CAPITAL LETTER EH
+10C90; C; 10CD0; # OLD HUNGARIAN CAPITAL LETTER I
+10C91; C; 10CD1; # OLD HUNGARIAN CAPITAL LETTER II
+10C92; C; 10CD2; # OLD HUNGARIAN CAPITAL LETTER EJ
+10C93; C; 10CD3; # OLD HUNGARIAN CAPITAL LETTER EK
+10C94; C; 10CD4; # OLD HUNGARIAN CAPITAL LETTER AK
+10C95; C; 10CD5; # OLD HUNGARIAN CAPITAL LETTER UNK
+10C96; C; 10CD6; # OLD HUNGARIAN CAPITAL LETTER EL
+10C97; C; 10CD7; # OLD HUNGARIAN CAPITAL LETTER ELY
+10C98; C; 10CD8; # OLD HUNGARIAN CAPITAL LETTER EM
+10C99; C; 10CD9; # OLD HUNGARIAN CAPITAL LETTER EN
+10C9A; C; 10CDA; # OLD HUNGARIAN CAPITAL LETTER ENY
+10C9B; C; 10CDB; # OLD HUNGARIAN CAPITAL LETTER O
+10C9C; C; 10CDC; # OLD HUNGARIAN CAPITAL LETTER OO
+10C9D; C; 10CDD; # OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG OE
+10C9E; C; 10CDE; # OLD HUNGARIAN CAPITAL LETTER RUDIMENTA OE
+10C9F; C; 10CDF; # OLD HUNGARIAN CAPITAL LETTER OEE
+10CA0; C; 10CE0; # OLD HUNGARIAN CAPITAL LETTER EP
+10CA1; C; 10CE1; # OLD HUNGARIAN CAPITAL LETTER EMP
+10CA2; C; 10CE2; # OLD HUNGARIAN CAPITAL LETTER ER
+10CA3; C; 10CE3; # OLD HUNGARIAN CAPITAL LETTER SHORT ER
+10CA4; C; 10CE4; # OLD HUNGARIAN CAPITAL LETTER ES
+10CA5; C; 10CE5; # OLD HUNGARIAN CAPITAL LETTER ESZ
+10CA6; C; 10CE6; # OLD HUNGARIAN CAPITAL LETTER ET
+10CA7; C; 10CE7; # OLD HUNGARIAN CAPITAL LETTER ENT
+10CA8; C; 10CE8; # OLD HUNGARIAN CAPITAL LETTER ETY
+10CA9; C; 10CE9; # OLD HUNGARIAN CAPITAL LETTER ECH
+10CAA; C; 10CEA; # OLD HUNGARIAN CAPITAL LETTER U
+10CAB; C; 10CEB; # OLD HUNGARIAN CAPITAL LETTER UU
+10CAC; C; 10CEC; # OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG UE
+10CAD; C; 10CED; # OLD HUNGARIAN CAPITAL LETTER RUDIMENTA UE
+10CAE; C; 10CEE; # OLD HUNGARIAN CAPITAL LETTER EV
+10CAF; C; 10CEF; # OLD HUNGARIAN CAPITAL LETTER EZ
+10CB0; C; 10CF0; # OLD HUNGARIAN CAPITAL LETTER EZS
+10CB1; C; 10CF1; # OLD HUNGARIAN CAPITAL LETTER ENT-SHAPED SIGN
+10CB2; C; 10CF2; # OLD HUNGARIAN CAPITAL LETTER US
+118A0; C; 118C0; # WARANG CITI CAPITAL LETTER NGAA
+118A1; C; 118C1; # WARANG CITI CAPITAL LETTER A
+118A2; C; 118C2; # WARANG CITI CAPITAL LETTER WI
+118A3; C; 118C3; # WARANG CITI CAPITAL LETTER YU
+118A4; C; 118C4; # WARANG CITI CAPITAL LETTER YA
+118A5; C; 118C5; # WARANG CITI CAPITAL LETTER YO
+118A6; C; 118C6; # WARANG CITI CAPITAL LETTER II
+118A7; C; 118C7; # WARANG CITI CAPITAL LETTER UU
+118A8; C; 118C8; # WARANG CITI CAPITAL LETTER E
+118A9; C; 118C9; # WARANG CITI CAPITAL LETTER O
+118AA; C; 118CA; # WARANG CITI CAPITAL LETTER ANG
+118AB; C; 118CB; # WARANG CITI CAPITAL LETTER GA
+118AC; C; 118CC; # WARANG CITI CAPITAL LETTER KO
+118AD; C; 118CD; # WARANG CITI CAPITAL LETTER ENY
+118AE; C; 118CE; # WARANG CITI CAPITAL LETTER YUJ
+118AF; C; 118CF; # WARANG CITI CAPITAL LETTER UC
+118B0; C; 118D0; # WARANG CITI CAPITAL LETTER ENN
+118B1; C; 118D1; # WARANG CITI CAPITAL LETTER ODD
+118B2; C; 118D2; # WARANG CITI CAPITAL LETTER TTE
+118B3; C; 118D3; # WARANG CITI CAPITAL LETTER NUNG
+118B4; C; 118D4; # WARANG CITI CAPITAL LETTER DA
+118B5; C; 118D5; # WARANG CITI CAPITAL LETTER AT
+118B6; C; 118D6; # WARANG CITI CAPITAL LETTER AM
+118B7; C; 118D7; # WARANG CITI CAPITAL LETTER BU
+118B8; C; 118D8; # WARANG CITI CAPITAL LETTER PU
+118B9; C; 118D9; # WARANG CITI CAPITAL LETTER HIYO
+118BA; C; 118DA; # WARANG CITI CAPITAL LETTER HOLO
+118BB; C; 118DB; # WARANG CITI CAPITAL LETTER HORR
+118BC; C; 118DC; # WARANG CITI CAPITAL LETTER HAR
+118BD; C; 118DD; # WARANG CITI CAPITAL LETTER SSUU
+118BE; C; 118DE; # WARANG CITI CAPITAL LETTER SII
+118BF; C; 118DF; # WARANG CITI CAPITAL LETTER VIYO
+#
+# EOF
diff --git a/src/third_party/unicode-8.0.0/PropList.txt b/src/third_party/unicode-8.0.0/PropList.txt
new file mode 100644
index 00000000000..2eb2926e072
--- /dev/null
+++ b/src/third_party/unicode-8.0.0/PropList.txt
@@ -0,0 +1,1525 @@
+# PropList-8.0.0.txt
+# Date: 2015-05-16, 17:50:38 GMT [MD]
+#
+# Unicode Character Database
+# Copyright (c) 1991-2015 Unicode, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+# For documentation, see http://www.unicode.org/reports/tr44/
+
+# ================================================
+
+0009..000D ; White_Space # Cc [5] <control-0009>..<control-000D>
+0020 ; White_Space # Zs SPACE
+0085 ; White_Space # Cc <control-0085>
+00A0 ; White_Space # Zs NO-BREAK SPACE
+1680 ; White_Space # Zs OGHAM SPACE MARK
+2000..200A ; White_Space # Zs [11] EN QUAD..HAIR SPACE
+2028 ; White_Space # Zl LINE SEPARATOR
+2029 ; White_Space # Zp PARAGRAPH SEPARATOR
+202F ; White_Space # Zs NARROW NO-BREAK SPACE
+205F ; White_Space # Zs MEDIUM MATHEMATICAL SPACE
+3000 ; White_Space # Zs IDEOGRAPHIC SPACE
+
+# Total code points: 25
+
+# ================================================
+
+061C ; Bidi_Control # Cf ARABIC LETTER MARK
+200E..200F ; Bidi_Control # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK
+202A..202E ; Bidi_Control # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
+2066..2069 ; Bidi_Control # Cf [4] LEFT-TO-RIGHT ISOLATE..POP DIRECTIONAL ISOLATE
+
+# Total code points: 12
+
+# ================================================
+
+200C..200D ; Join_Control # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
+
+# Total code points: 2
+
+# ================================================
+
+002D ; Dash # Pd HYPHEN-MINUS
+058A ; Dash # Pd ARMENIAN HYPHEN
+05BE ; Dash # Pd HEBREW PUNCTUATION MAQAF
+1400 ; Dash # Pd CANADIAN SYLLABICS HYPHEN
+1806 ; Dash # Pd MONGOLIAN TODO SOFT HYPHEN
+2010..2015 ; Dash # Pd [6] HYPHEN..HORIZONTAL BAR
+2053 ; Dash # Po SWUNG DASH
+207B ; Dash # Sm SUPERSCRIPT MINUS
+208B ; Dash # Sm SUBSCRIPT MINUS
+2212 ; Dash # Sm MINUS SIGN
+2E17 ; Dash # Pd DOUBLE OBLIQUE HYPHEN
+2E1A ; Dash # Pd HYPHEN WITH DIAERESIS
+2E3A..2E3B ; Dash # Pd [2] TWO-EM DASH..THREE-EM DASH
+2E40 ; Dash # Pd DOUBLE HYPHEN
+301C ; Dash # Pd WAVE DASH
+3030 ; Dash # Pd WAVY DASH
+30A0 ; Dash # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN
+FE31..FE32 ; Dash # Pd [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH
+FE58 ; Dash # Pd SMALL EM DASH
+FE63 ; Dash # Pd SMALL HYPHEN-MINUS
+FF0D ; Dash # Pd FULLWIDTH HYPHEN-MINUS
+
+# Total code points: 28
+
+# ================================================
+
+002D ; Hyphen # Pd HYPHEN-MINUS
+00AD ; Hyphen # Cf SOFT HYPHEN
+058A ; Hyphen # Pd ARMENIAN HYPHEN
+1806 ; Hyphen # Pd MONGOLIAN TODO SOFT HYPHEN
+2010..2011 ; Hyphen # Pd [2] HYPHEN..NON-BREAKING HYPHEN
+2E17 ; Hyphen # Pd DOUBLE OBLIQUE HYPHEN
+30FB ; Hyphen # Po KATAKANA MIDDLE DOT
+FE63 ; Hyphen # Pd SMALL HYPHEN-MINUS
+FF0D ; Hyphen # Pd FULLWIDTH HYPHEN-MINUS
+FF65 ; Hyphen # Po HALFWIDTH KATAKANA MIDDLE DOT
+
+# Total code points: 11
+
+# ================================================
+
+0022 ; Quotation_Mark # Po QUOTATION MARK
+0027 ; Quotation_Mark # Po APOSTROPHE
+00AB ; Quotation_Mark # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+00BB ; Quotation_Mark # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+2018 ; Quotation_Mark # Pi LEFT SINGLE QUOTATION MARK
+2019 ; Quotation_Mark # Pf RIGHT SINGLE QUOTATION MARK
+201A ; Quotation_Mark # Ps SINGLE LOW-9 QUOTATION MARK
+201B..201C ; Quotation_Mark # Pi [2] SINGLE HIGH-REVERSED-9 QUOTATION MARK..LEFT DOUBLE QUOTATION MARK
+201D ; Quotation_Mark # Pf RIGHT DOUBLE QUOTATION MARK
+201E ; Quotation_Mark # Ps DOUBLE LOW-9 QUOTATION MARK
+201F ; Quotation_Mark # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK
+2039 ; Quotation_Mark # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+203A ; Quotation_Mark # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+2E42 ; Quotation_Mark # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK
+300C ; Quotation_Mark # Ps LEFT CORNER BRACKET
+300D ; Quotation_Mark # Pe RIGHT CORNER BRACKET
+300E ; Quotation_Mark # Ps LEFT WHITE CORNER BRACKET
+300F ; Quotation_Mark # Pe RIGHT WHITE CORNER BRACKET
+301D ; Quotation_Mark # Ps REVERSED DOUBLE PRIME QUOTATION MARK
+301E..301F ; Quotation_Mark # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK
+FE41 ; Quotation_Mark # Ps PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET
+FE42 ; Quotation_Mark # Pe PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET
+FE43 ; Quotation_Mark # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET
+FE44 ; Quotation_Mark # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET
+FF02 ; Quotation_Mark # Po FULLWIDTH QUOTATION MARK
+FF07 ; Quotation_Mark # Po FULLWIDTH APOSTROPHE
+FF62 ; Quotation_Mark # Ps HALFWIDTH LEFT CORNER BRACKET
+FF63 ; Quotation_Mark # Pe HALFWIDTH RIGHT CORNER BRACKET
+
+# Total code points: 30
+
+# ================================================
+
+0021 ; Terminal_Punctuation # Po EXCLAMATION MARK
+002C ; Terminal_Punctuation # Po COMMA
+002E ; Terminal_Punctuation # Po FULL STOP
+003A..003B ; Terminal_Punctuation # Po [2] COLON..SEMICOLON
+003F ; Terminal_Punctuation # Po QUESTION MARK
+037E ; Terminal_Punctuation # Po GREEK QUESTION MARK
+0387 ; Terminal_Punctuation # Po GREEK ANO TELEIA
+0589 ; Terminal_Punctuation # Po ARMENIAN FULL STOP
+05C3 ; Terminal_Punctuation # Po HEBREW PUNCTUATION SOF PASUQ
+060C ; Terminal_Punctuation # Po ARABIC COMMA
+061B ; Terminal_Punctuation # Po ARABIC SEMICOLON
+061F ; Terminal_Punctuation # Po ARABIC QUESTION MARK
+06D4 ; Terminal_Punctuation # Po ARABIC FULL STOP
+0700..070A ; Terminal_Punctuation # Po [11] SYRIAC END OF PARAGRAPH..SYRIAC CONTRACTION
+070C ; Terminal_Punctuation # Po SYRIAC HARKLEAN METOBELUS
+07F8..07F9 ; Terminal_Punctuation # Po [2] NKO COMMA..NKO EXCLAMATION MARK
+0830..083E ; Terminal_Punctuation # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU
+085E ; Terminal_Punctuation # Po MANDAIC PUNCTUATION
+0964..0965 ; Terminal_Punctuation # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA
+0E5A..0E5B ; Terminal_Punctuation # Po [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT
+0F08 ; Terminal_Punctuation # Po TIBETAN MARK SBRUL SHAD
+0F0D..0F12 ; Terminal_Punctuation # Po [6] TIBETAN MARK SHAD..TIBETAN MARK RGYA GRAM SHAD
+104A..104B ; Terminal_Punctuation # Po [2] MYANMAR SIGN LITTLE SECTION..MYANMAR SIGN SECTION
+1361..1368 ; Terminal_Punctuation # Po [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR
+166D..166E ; Terminal_Punctuation # Po [2] CANADIAN SYLLABICS CHI SIGN..CANADIAN SYLLABICS FULL STOP
+16EB..16ED ; Terminal_Punctuation # Po [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION
+1735..1736 ; Terminal_Punctuation # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION
+17D4..17D6 ; Terminal_Punctuation # Po [3] KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH
+17DA ; Terminal_Punctuation # Po KHMER SIGN KOOMUUT
+1802..1805 ; Terminal_Punctuation # Po [4] MONGOLIAN COMMA..MONGOLIAN FOUR DOTS
+1808..1809 ; Terminal_Punctuation # Po [2] MONGOLIAN MANCHU COMMA..MONGOLIAN MANCHU FULL STOP
+1944..1945 ; Terminal_Punctuation # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK
+1AA8..1AAB ; Terminal_Punctuation # Po [4] TAI THAM SIGN KAAN..TAI THAM SIGN SATKAANKUU
+1B5A..1B5B ; Terminal_Punctuation # Po [2] BALINESE PANTI..BALINESE PAMADA
+1B5D..1B5F ; Terminal_Punctuation # Po [3] BALINESE CARIK PAMUNGKAH..BALINESE CARIK PAREREN
+1C3B..1C3F ; Terminal_Punctuation # Po [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK
+1C7E..1C7F ; Terminal_Punctuation # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD
+203C..203D ; Terminal_Punctuation # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG
+2047..2049 ; Terminal_Punctuation # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK
+2E2E ; Terminal_Punctuation # Po REVERSED QUESTION MARK
+2E3C ; Terminal_Punctuation # Po STENOGRAPHIC FULL STOP
+2E41 ; Terminal_Punctuation # Po REVERSED COMMA
+3001..3002 ; Terminal_Punctuation # Po [2] IDEOGRAPHIC COMMA..IDEOGRAPHIC FULL STOP
+A4FE..A4FF ; Terminal_Punctuation # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP
+A60D..A60F ; Terminal_Punctuation # Po [3] VAI COMMA..VAI QUESTION MARK
+A6F3..A6F7 ; Terminal_Punctuation # Po [5] BAMUM FULL STOP..BAMUM QUESTION MARK
+A876..A877 ; Terminal_Punctuation # Po [2] PHAGS-PA MARK SHAD..PHAGS-PA MARK DOUBLE SHAD
+A8CE..A8CF ; Terminal_Punctuation # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA
+A92F ; Terminal_Punctuation # Po KAYAH LI SIGN SHYA
+A9C7..A9C9 ; Terminal_Punctuation # Po [3] JAVANESE PADA PANGKAT..JAVANESE PADA LUNGSI
+AA5D..AA5F ; Terminal_Punctuation # Po [3] CHAM PUNCTUATION DANDA..CHAM PUNCTUATION TRIPLE DANDA
+AADF ; Terminal_Punctuation # Po TAI VIET SYMBOL KOI KOI
+AAF0..AAF1 ; Terminal_Punctuation # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM
+ABEB ; Terminal_Punctuation # Po MEETEI MAYEK CHEIKHEI
+FE50..FE52 ; Terminal_Punctuation # Po [3] SMALL COMMA..SMALL FULL STOP
+FE54..FE57 ; Terminal_Punctuation # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK
+FF01 ; Terminal_Punctuation # Po FULLWIDTH EXCLAMATION MARK
+FF0C ; Terminal_Punctuation # Po FULLWIDTH COMMA
+FF0E ; Terminal_Punctuation # Po FULLWIDTH FULL STOP
+FF1A..FF1B ; Terminal_Punctuation # Po [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON
+FF1F ; Terminal_Punctuation # Po FULLWIDTH QUESTION MARK
+FF61 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC FULL STOP
+FF64 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA
+1039F ; Terminal_Punctuation # Po UGARITIC WORD DIVIDER
+103D0 ; Terminal_Punctuation # Po OLD PERSIAN WORD DIVIDER
+10857 ; Terminal_Punctuation # Po IMPERIAL ARAMAIC SECTION SIGN
+1091F ; Terminal_Punctuation # Po PHOENICIAN WORD SEPARATOR
+10A56..10A57 ; Terminal_Punctuation # Po [2] KHAROSHTHI PUNCTUATION DANDA..KHAROSHTHI PUNCTUATION DOUBLE DANDA
+10AF0..10AF5 ; Terminal_Punctuation # Po [6] MANICHAEAN PUNCTUATION STAR..MANICHAEAN PUNCTUATION TWO DOTS
+10B3A..10B3F ; Terminal_Punctuation # Po [6] TINY TWO DOTS OVER ONE DOT PUNCTUATION..LARGE ONE RING OVER TWO RINGS PUNCTUATION
+10B99..10B9C ; Terminal_Punctuation # Po [4] PSALTER PAHLAVI SECTION MARK..PSALTER PAHLAVI FOUR DOTS WITH DOT
+11047..1104D ; Terminal_Punctuation # Po [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS
+110BE..110C1 ; Terminal_Punctuation # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA
+11141..11143 ; Terminal_Punctuation # Po [3] CHAKMA DANDA..CHAKMA QUESTION MARK
+111C5..111C6 ; Terminal_Punctuation # Po [2] SHARADA DANDA..SHARADA DOUBLE DANDA
+111CD ; Terminal_Punctuation # Po SHARADA SUTRA MARK
+111DE..111DF ; Terminal_Punctuation # Po [2] SHARADA SECTION MARK-1..SHARADA SECTION MARK-2
+11238..1123C ; Terminal_Punctuation # Po [5] KHOJKI DANDA..KHOJKI DOUBLE SECTION MARK
+112A9 ; Terminal_Punctuation # Po MULTANI SECTION MARK
+115C2..115C5 ; Terminal_Punctuation # Po [4] SIDDHAM DANDA..SIDDHAM SEPARATOR BAR
+115C9..115D7 ; Terminal_Punctuation # Po [15] SIDDHAM END OF TEXT MARK..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES
+11641..11642 ; Terminal_Punctuation # Po [2] MODI DANDA..MODI DOUBLE DANDA
+1173C..1173E ; Terminal_Punctuation # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI
+12470..12474 ; Terminal_Punctuation # Po [5] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON
+16A6E..16A6F ; Terminal_Punctuation # Po [2] MRO DANDA..MRO DOUBLE DANDA
+16AF5 ; Terminal_Punctuation # Po BASSA VAH FULL STOP
+16B37..16B39 ; Terminal_Punctuation # Po [3] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN CIM CHEEM
+16B44 ; Terminal_Punctuation # Po PAHAWH HMONG SIGN XAUS
+1BC9F ; Terminal_Punctuation # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP
+1DA87..1DA8A ; Terminal_Punctuation # Po [4] SIGNWRITING COMMA..SIGNWRITING COLON
+
+# Total code points: 238
+
+# ================================================
+
+005E ; Other_Math # Sk CIRCUMFLEX ACCENT
+03D0..03D2 ; Other_Math # L& [3] GREEK BETA SYMBOL..GREEK UPSILON WITH HOOK SYMBOL
+03D5 ; Other_Math # L& GREEK PHI SYMBOL
+03F0..03F1 ; Other_Math # L& [2] GREEK KAPPA SYMBOL..GREEK RHO SYMBOL
+03F4..03F5 ; Other_Math # L& [2] GREEK CAPITAL THETA SYMBOL..GREEK LUNATE EPSILON SYMBOL
+2016 ; Other_Math # Po DOUBLE VERTICAL LINE
+2032..2034 ; Other_Math # Po [3] PRIME..TRIPLE PRIME
+2040 ; Other_Math # Pc CHARACTER TIE
+2061..2064 ; Other_Math # Cf [4] FUNCTION APPLICATION..INVISIBLE PLUS
+207D ; Other_Math # Ps SUPERSCRIPT LEFT PARENTHESIS
+207E ; Other_Math # Pe SUPERSCRIPT RIGHT PARENTHESIS
+208D ; Other_Math # Ps SUBSCRIPT LEFT PARENTHESIS
+208E ; Other_Math # Pe SUBSCRIPT RIGHT PARENTHESIS
+20D0..20DC ; Other_Math # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
+20E1 ; Other_Math # Mn COMBINING LEFT RIGHT ARROW ABOVE
+20E5..20E6 ; Other_Math # Mn [2] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING DOUBLE VERTICAL STROKE OVERLAY
+20EB..20EF ; Other_Math # Mn [5] COMBINING LONG DOUBLE SOLIDUS OVERLAY..COMBINING RIGHT ARROW BELOW
+2102 ; Other_Math # L& DOUBLE-STRUCK CAPITAL C
+2107 ; Other_Math # L& EULER CONSTANT
+210A..2113 ; Other_Math # L& [10] SCRIPT SMALL G..SCRIPT SMALL L
+2115 ; Other_Math # L& DOUBLE-STRUCK CAPITAL N
+2119..211D ; Other_Math # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R
+2124 ; Other_Math # L& DOUBLE-STRUCK CAPITAL Z
+2128 ; Other_Math # L& BLACK-LETTER CAPITAL Z
+2129 ; Other_Math # So TURNED GREEK SMALL LETTER IOTA
+212C..212D ; Other_Math # L& [2] SCRIPT CAPITAL B..BLACK-LETTER CAPITAL C
+212F..2131 ; Other_Math # L& [3] SCRIPT SMALL E..SCRIPT CAPITAL F
+2133..2134 ; Other_Math # L& [2] SCRIPT CAPITAL M..SCRIPT SMALL O
+2135..2138 ; Other_Math # Lo [4] ALEF SYMBOL..DALET SYMBOL
+213C..213F ; Other_Math # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI
+2145..2149 ; Other_Math # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J
+2195..2199 ; Other_Math # So [5] UP DOWN ARROW..SOUTH WEST ARROW
+219C..219F ; Other_Math # So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW
+21A1..21A2 ; Other_Math # So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL
+21A4..21A5 ; Other_Math # So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR
+21A7 ; Other_Math # So DOWNWARDS ARROW FROM BAR
+21A9..21AD ; Other_Math # So [5] LEFTWARDS ARROW WITH HOOK..LEFT RIGHT WAVE ARROW
+21B0..21B1 ; Other_Math # So [2] UPWARDS ARROW WITH TIP LEFTWARDS..UPWARDS ARROW WITH TIP RIGHTWARDS
+21B6..21B7 ; Other_Math # So [2] ANTICLOCKWISE TOP SEMICIRCLE ARROW..CLOCKWISE TOP SEMICIRCLE ARROW
+21BC..21CD ; Other_Math # So [18] LEFTWARDS HARPOON WITH BARB UPWARDS..LEFTWARDS DOUBLE ARROW WITH STROKE
+21D0..21D1 ; Other_Math # So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW
+21D3 ; Other_Math # So DOWNWARDS DOUBLE ARROW
+21D5..21DB ; Other_Math # So [7] UP DOWN DOUBLE ARROW..RIGHTWARDS TRIPLE ARROW
+21DD ; Other_Math # So RIGHTWARDS SQUIGGLE ARROW
+21E4..21E5 ; Other_Math # So [2] LEFTWARDS ARROW TO BAR..RIGHTWARDS ARROW TO BAR
+2308 ; Other_Math # Ps LEFT CEILING
+2309 ; Other_Math # Pe RIGHT CEILING
+230A ; Other_Math # Ps LEFT FLOOR
+230B ; Other_Math # Pe RIGHT FLOOR
+23B4..23B5 ; Other_Math # So [2] TOP SQUARE BRACKET..BOTTOM SQUARE BRACKET
+23B7 ; Other_Math # So RADICAL SYMBOL BOTTOM
+23D0 ; Other_Math # So VERTICAL LINE EXTENSION
+23E2 ; Other_Math # So WHITE TRAPEZIUM
+25A0..25A1 ; Other_Math # So [2] BLACK SQUARE..WHITE SQUARE
+25AE..25B6 ; Other_Math # So [9] BLACK VERTICAL RECTANGLE..BLACK RIGHT-POINTING TRIANGLE
+25BC..25C0 ; Other_Math # So [5] BLACK DOWN-POINTING TRIANGLE..BLACK LEFT-POINTING TRIANGLE
+25C6..25C7 ; Other_Math # So [2] BLACK DIAMOND..WHITE DIAMOND
+25CA..25CB ; Other_Math # So [2] LOZENGE..WHITE CIRCLE
+25CF..25D3 ; Other_Math # So [5] BLACK CIRCLE..CIRCLE WITH UPPER HALF BLACK
+25E2 ; Other_Math # So BLACK LOWER RIGHT TRIANGLE
+25E4 ; Other_Math # So BLACK UPPER LEFT TRIANGLE
+25E7..25EC ; Other_Math # So [6] SQUARE WITH LEFT HALF BLACK..WHITE UP-POINTING TRIANGLE WITH DOT
+2605..2606 ; Other_Math # So [2] BLACK STAR..WHITE STAR
+2640 ; Other_Math # So FEMALE SIGN
+2642 ; Other_Math # So MALE SIGN
+2660..2663 ; Other_Math # So [4] BLACK SPADE SUIT..BLACK CLUB SUIT
+266D..266E ; Other_Math # So [2] MUSIC FLAT SIGN..MUSIC NATURAL SIGN
+27C5 ; Other_Math # Ps LEFT S-SHAPED BAG DELIMITER
+27C6 ; Other_Math # Pe RIGHT S-SHAPED BAG DELIMITER
+27E6 ; Other_Math # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET
+27E7 ; Other_Math # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET
+27E8 ; Other_Math # Ps MATHEMATICAL LEFT ANGLE BRACKET
+27E9 ; Other_Math # Pe MATHEMATICAL RIGHT ANGLE BRACKET
+27EA ; Other_Math # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET
+27EB ; Other_Math # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET
+27EC ; Other_Math # Ps MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET
+27ED ; Other_Math # Pe MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET
+27EE ; Other_Math # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS
+27EF ; Other_Math # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS
+2983 ; Other_Math # Ps LEFT WHITE CURLY BRACKET
+2984 ; Other_Math # Pe RIGHT WHITE CURLY BRACKET
+2985 ; Other_Math # Ps LEFT WHITE PARENTHESIS
+2986 ; Other_Math # Pe RIGHT WHITE PARENTHESIS
+2987 ; Other_Math # Ps Z NOTATION LEFT IMAGE BRACKET
+2988 ; Other_Math # Pe Z NOTATION RIGHT IMAGE BRACKET
+2989 ; Other_Math # Ps Z NOTATION LEFT BINDING BRACKET
+298A ; Other_Math # Pe Z NOTATION RIGHT BINDING BRACKET
+298B ; Other_Math # Ps LEFT SQUARE BRACKET WITH UNDERBAR
+298C ; Other_Math # Pe RIGHT SQUARE BRACKET WITH UNDERBAR
+298D ; Other_Math # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER
+298E ; Other_Math # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER
+298F ; Other_Math # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER
+2990 ; Other_Math # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER
+2991 ; Other_Math # Ps LEFT ANGLE BRACKET WITH DOT
+2992 ; Other_Math # Pe RIGHT ANGLE BRACKET WITH DOT
+2993 ; Other_Math # Ps LEFT ARC LESS-THAN BRACKET
+2994 ; Other_Math # Pe RIGHT ARC GREATER-THAN BRACKET
+2995 ; Other_Math # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET
+2996 ; Other_Math # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET
+2997 ; Other_Math # Ps LEFT BLACK TORTOISE SHELL BRACKET
+2998 ; Other_Math # Pe RIGHT BLACK TORTOISE SHELL BRACKET
+29D8 ; Other_Math # Ps LEFT WIGGLY FENCE
+29D9 ; Other_Math # Pe RIGHT WIGGLY FENCE
+29DA ; Other_Math # Ps LEFT DOUBLE WIGGLY FENCE
+29DB ; Other_Math # Pe RIGHT DOUBLE WIGGLY FENCE
+29FC ; Other_Math # Ps LEFT-POINTING CURVED ANGLE BRACKET
+29FD ; Other_Math # Pe RIGHT-POINTING CURVED ANGLE BRACKET
+FE61 ; Other_Math # Po SMALL ASTERISK
+FE63 ; Other_Math # Pd SMALL HYPHEN-MINUS
+FE68 ; Other_Math # Po SMALL REVERSE SOLIDUS
+FF3C ; Other_Math # Po FULLWIDTH REVERSE SOLIDUS
+FF3E ; Other_Math # Sk FULLWIDTH CIRCUMFLEX ACCENT
+1D400..1D454 ; Other_Math # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G
+1D456..1D49C ; Other_Math # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A
+1D49E..1D49F ; Other_Math # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D
+1D4A2 ; Other_Math # L& MATHEMATICAL SCRIPT CAPITAL G
+1D4A5..1D4A6 ; Other_Math # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K
+1D4A9..1D4AC ; Other_Math # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q
+1D4AE..1D4B9 ; Other_Math # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D
+1D4BB ; Other_Math # L& MATHEMATICAL SCRIPT SMALL F
+1D4BD..1D4C3 ; Other_Math # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N
+1D4C5..1D505 ; Other_Math # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B
+1D507..1D50A ; Other_Math # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G
+1D50D..1D514 ; Other_Math # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q
+1D516..1D51C ; Other_Math # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y
+1D51E..1D539 ; Other_Math # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B
+1D53B..1D53E ; Other_Math # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G
+1D540..1D544 ; Other_Math # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M
+1D546 ; Other_Math # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O
+1D54A..1D550 ; Other_Math # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y
+1D552..1D6A5 ; Other_Math # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J
+1D6A8..1D6C0 ; Other_Math # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA
+1D6C2..1D6DA ; Other_Math # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA
+1D6DC..1D6FA ; Other_Math # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA
+1D6FC..1D714 ; Other_Math # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA
+1D716..1D734 ; Other_Math # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA
+1D736..1D74E ; Other_Math # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA
+1D750..1D76E ; Other_Math # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA
+1D770..1D788 ; Other_Math # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA
+1D78A..1D7A8 ; Other_Math # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA
+1D7AA..1D7C2 ; Other_Math # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA
+1D7C4..1D7CB ; Other_Math # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA
+1D7CE..1D7FF ; Other_Math # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE
+1EE00..1EE03 ; Other_Math # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL
+1EE05..1EE1F ; Other_Math # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF
+1EE21..1EE22 ; Other_Math # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM
+1EE24 ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL HEH
+1EE27 ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL HAH
+1EE29..1EE32 ; Other_Math # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF
+1EE34..1EE37 ; Other_Math # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH
+1EE39 ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL DAD
+1EE3B ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL GHAIN
+1EE42 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED JEEM
+1EE47 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED HAH
+1EE49 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED YEH
+1EE4B ; Other_Math # Lo ARABIC MATHEMATICAL TAILED LAM
+1EE4D..1EE4F ; Other_Math # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN
+1EE51..1EE52 ; Other_Math # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF
+1EE54 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED SHEEN
+1EE57 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED KHAH
+1EE59 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED DAD
+1EE5B ; Other_Math # Lo ARABIC MATHEMATICAL TAILED GHAIN
+1EE5D ; Other_Math # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON
+1EE5F ; Other_Math # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF
+1EE61..1EE62 ; Other_Math # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM
+1EE64 ; Other_Math # Lo ARABIC MATHEMATICAL STRETCHED HEH
+1EE67..1EE6A ; Other_Math # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF
+1EE6C..1EE72 ; Other_Math # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF
+1EE74..1EE77 ; Other_Math # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH
+1EE79..1EE7C ; Other_Math # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH
+1EE7E ; Other_Math # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH
+1EE80..1EE89 ; Other_Math # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH
+1EE8B..1EE9B ; Other_Math # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN
+1EEA1..1EEA3 ; Other_Math # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL
+1EEA5..1EEA9 ; Other_Math # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH
+1EEAB..1EEBB ; Other_Math # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN
+
+# Total code points: 1362
+
+# ================================================
+
+0030..0039 ; Hex_Digit # Nd [10] DIGIT ZERO..DIGIT NINE
+0041..0046 ; Hex_Digit # L& [6] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER F
+0061..0066 ; Hex_Digit # L& [6] LATIN SMALL LETTER A..LATIN SMALL LETTER F
+FF10..FF19 ; Hex_Digit # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
+FF21..FF26 ; Hex_Digit # L& [6] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER F
+FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER F
+
+# Total code points: 44
+
+# ================================================
+
+0030..0039 ; ASCII_Hex_Digit # Nd [10] DIGIT ZERO..DIGIT NINE
+0041..0046 ; ASCII_Hex_Digit # L& [6] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER F
+0061..0066 ; ASCII_Hex_Digit # L& [6] LATIN SMALL LETTER A..LATIN SMALL LETTER F
+
+# Total code points: 22
+
+# ================================================
+
+0345 ; Other_Alphabetic # Mn COMBINING GREEK YPOGEGRAMMENI
+05B0..05BD ; Other_Alphabetic # Mn [14] HEBREW POINT SHEVA..HEBREW POINT METEG
+05BF ; Other_Alphabetic # Mn HEBREW POINT RAFE
+05C1..05C2 ; Other_Alphabetic # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT
+05C4..05C5 ; Other_Alphabetic # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT
+05C7 ; Other_Alphabetic # Mn HEBREW POINT QAMATS QATAN
+0610..061A ; Other_Alphabetic # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
+064B..0657 ; Other_Alphabetic # Mn [13] ARABIC FATHATAN..ARABIC INVERTED DAMMA
+0659..065F ; Other_Alphabetic # Mn [7] ARABIC ZWARAKAY..ARABIC WAVY HAMZA BELOW
+0670 ; Other_Alphabetic # Mn ARABIC LETTER SUPERSCRIPT ALEF
+06D6..06DC ; Other_Alphabetic # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN
+06E1..06E4 ; Other_Alphabetic # Mn [4] ARABIC SMALL HIGH DOTLESS HEAD OF KHAH..ARABIC SMALL HIGH MADDA
+06E7..06E8 ; Other_Alphabetic # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON
+06ED ; Other_Alphabetic # Mn ARABIC SMALL LOW MEEM
+0711 ; Other_Alphabetic # Mn SYRIAC LETTER SUPERSCRIPT ALAPH
+0730..073F ; Other_Alphabetic # Mn [16] SYRIAC PTHAHA ABOVE..SYRIAC RWAHA
+07A6..07B0 ; Other_Alphabetic # Mn [11] THAANA ABAFILI..THAANA SUKUN
+0816..0817 ; Other_Alphabetic # Mn [2] SAMARITAN MARK IN..SAMARITAN MARK IN-ALAF
+081B..0823 ; Other_Alphabetic # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A
+0825..0827 ; Other_Alphabetic # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
+0829..082C ; Other_Alphabetic # Mn [4] SAMARITAN VOWEL SIGN LONG I..SAMARITAN VOWEL SIGN SUKUN
+08E3..08E9 ; Other_Alphabetic # Mn [7] ARABIC TURNED DAMMA BELOW..ARABIC CURLY KASRATAN
+08F0..0902 ; Other_Alphabetic # Mn [19] ARABIC OPEN FATHATAN..DEVANAGARI SIGN ANUSVARA
+0903 ; Other_Alphabetic # Mc DEVANAGARI SIGN VISARGA
+093A ; Other_Alphabetic # Mn DEVANAGARI VOWEL SIGN OE
+093B ; Other_Alphabetic # Mc DEVANAGARI VOWEL SIGN OOE
+093E..0940 ; Other_Alphabetic # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II
+0941..0948 ; Other_Alphabetic # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI
+0949..094C ; Other_Alphabetic # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU
+094E..094F ; Other_Alphabetic # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW
+0955..0957 ; Other_Alphabetic # Mn [3] DEVANAGARI VOWEL SIGN CANDRA LONG E..DEVANAGARI VOWEL SIGN UUE
+0962..0963 ; Other_Alphabetic # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL
+0981 ; Other_Alphabetic # Mn BENGALI SIGN CANDRABINDU
+0982..0983 ; Other_Alphabetic # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA
+09BE..09C0 ; Other_Alphabetic # Mc [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II
+09C1..09C4 ; Other_Alphabetic # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR
+09C7..09C8 ; Other_Alphabetic # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI
+09CB..09CC ; Other_Alphabetic # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU
+09D7 ; Other_Alphabetic # Mc BENGALI AU LENGTH MARK
+09E2..09E3 ; Other_Alphabetic # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL
+0A01..0A02 ; Other_Alphabetic # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI
+0A03 ; Other_Alphabetic # Mc GURMUKHI SIGN VISARGA
+0A3E..0A40 ; Other_Alphabetic # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II
+0A41..0A42 ; Other_Alphabetic # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU
+0A47..0A48 ; Other_Alphabetic # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI
+0A4B..0A4C ; Other_Alphabetic # Mn [2] GURMUKHI VOWEL SIGN OO..GURMUKHI VOWEL SIGN AU
+0A51 ; Other_Alphabetic # Mn GURMUKHI SIGN UDAAT
+0A70..0A71 ; Other_Alphabetic # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK
+0A75 ; Other_Alphabetic # Mn GURMUKHI SIGN YAKASH
+0A81..0A82 ; Other_Alphabetic # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA
+0A83 ; Other_Alphabetic # Mc GUJARATI SIGN VISARGA
+0ABE..0AC0 ; Other_Alphabetic # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II
+0AC1..0AC5 ; Other_Alphabetic # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E
+0AC7..0AC8 ; Other_Alphabetic # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI
+0AC9 ; Other_Alphabetic # Mc GUJARATI VOWEL SIGN CANDRA O
+0ACB..0ACC ; Other_Alphabetic # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU
+0AE2..0AE3 ; Other_Alphabetic # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL
+0B01 ; Other_Alphabetic # Mn ORIYA SIGN CANDRABINDU
+0B02..0B03 ; Other_Alphabetic # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA
+0B3E ; Other_Alphabetic # Mc ORIYA VOWEL SIGN AA
+0B3F ; Other_Alphabetic # Mn ORIYA VOWEL SIGN I
+0B40 ; Other_Alphabetic # Mc ORIYA VOWEL SIGN II
+0B41..0B44 ; Other_Alphabetic # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR
+0B47..0B48 ; Other_Alphabetic # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI
+0B4B..0B4C ; Other_Alphabetic # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU
+0B56 ; Other_Alphabetic # Mn ORIYA AI LENGTH MARK
+0B57 ; Other_Alphabetic # Mc ORIYA AU LENGTH MARK
+0B62..0B63 ; Other_Alphabetic # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL
+0B82 ; Other_Alphabetic # Mn TAMIL SIGN ANUSVARA
+0BBE..0BBF ; Other_Alphabetic # Mc [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I
+0BC0 ; Other_Alphabetic # Mn TAMIL VOWEL SIGN II
+0BC1..0BC2 ; Other_Alphabetic # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU
+0BC6..0BC8 ; Other_Alphabetic # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI
+0BCA..0BCC ; Other_Alphabetic # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU
+0BD7 ; Other_Alphabetic # Mc TAMIL AU LENGTH MARK
+0C00 ; Other_Alphabetic # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE
+0C01..0C03 ; Other_Alphabetic # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA
+0C3E..0C40 ; Other_Alphabetic # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II
+0C41..0C44 ; Other_Alphabetic # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR
+0C46..0C48 ; Other_Alphabetic # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI
+0C4A..0C4C ; Other_Alphabetic # Mn [3] TELUGU VOWEL SIGN O..TELUGU VOWEL SIGN AU
+0C55..0C56 ; Other_Alphabetic # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK
+0C62..0C63 ; Other_Alphabetic # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL
+0C81 ; Other_Alphabetic # Mn KANNADA SIGN CANDRABINDU
+0C82..0C83 ; Other_Alphabetic # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA
+0CBE ; Other_Alphabetic # Mc KANNADA VOWEL SIGN AA
+0CBF ; Other_Alphabetic # Mn KANNADA VOWEL SIGN I
+0CC0..0CC4 ; Other_Alphabetic # Mc [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR
+0CC6 ; Other_Alphabetic # Mn KANNADA VOWEL SIGN E
+0CC7..0CC8 ; Other_Alphabetic # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI
+0CCA..0CCB ; Other_Alphabetic # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO
+0CCC ; Other_Alphabetic # Mn KANNADA VOWEL SIGN AU
+0CD5..0CD6 ; Other_Alphabetic # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
+0CE2..0CE3 ; Other_Alphabetic # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
+0D01 ; Other_Alphabetic # Mn MALAYALAM SIGN CANDRABINDU
+0D02..0D03 ; Other_Alphabetic # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
+0D3E..0D40 ; Other_Alphabetic # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II
+0D41..0D44 ; Other_Alphabetic # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR
+0D46..0D48 ; Other_Alphabetic # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI
+0D4A..0D4C ; Other_Alphabetic # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU
+0D57 ; Other_Alphabetic # Mc MALAYALAM AU LENGTH MARK
+0D62..0D63 ; Other_Alphabetic # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL
+0D82..0D83 ; Other_Alphabetic # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA
+0DCF..0DD1 ; Other_Alphabetic # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA
+0DD2..0DD4 ; Other_Alphabetic # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA
+0DD6 ; Other_Alphabetic # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA
+0DD8..0DDF ; Other_Alphabetic # Mc [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA
+0DF2..0DF3 ; Other_Alphabetic # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA
+0E31 ; Other_Alphabetic # Mn THAI CHARACTER MAI HAN-AKAT
+0E34..0E3A ; Other_Alphabetic # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU
+0E4D ; Other_Alphabetic # Mn THAI CHARACTER NIKHAHIT
+0EB1 ; Other_Alphabetic # Mn LAO VOWEL SIGN MAI KAN
+0EB4..0EB9 ; Other_Alphabetic # Mn [6] LAO VOWEL SIGN I..LAO VOWEL SIGN UU
+0EBB..0EBC ; Other_Alphabetic # Mn [2] LAO VOWEL SIGN MAI KON..LAO SEMIVOWEL SIGN LO
+0ECD ; Other_Alphabetic # Mn LAO NIGGAHITA
+0F71..0F7E ; Other_Alphabetic # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO
+0F7F ; Other_Alphabetic # Mc TIBETAN SIGN RNAM BCAD
+0F80..0F81 ; Other_Alphabetic # Mn [2] TIBETAN VOWEL SIGN REVERSED I..TIBETAN VOWEL SIGN REVERSED II
+0F8D..0F97 ; Other_Alphabetic # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA
+0F99..0FBC ; Other_Alphabetic # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA
+102B..102C ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA
+102D..1030 ; Other_Alphabetic # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU
+1031 ; Other_Alphabetic # Mc MYANMAR VOWEL SIGN E
+1032..1036 ; Other_Alphabetic # Mn [5] MYANMAR VOWEL SIGN AI..MYANMAR SIGN ANUSVARA
+1038 ; Other_Alphabetic # Mc MYANMAR SIGN VISARGA
+103B..103C ; Other_Alphabetic # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA
+103D..103E ; Other_Alphabetic # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA
+1056..1057 ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR
+1058..1059 ; Other_Alphabetic # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL
+105E..1060 ; Other_Alphabetic # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA
+1062 ; Other_Alphabetic # Mc MYANMAR VOWEL SIGN SGAW KAREN EU
+1067..1068 ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR VOWEL SIGN WESTERN PWO KAREN UE
+1071..1074 ; Other_Alphabetic # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE
+1082 ; Other_Alphabetic # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA
+1083..1084 ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E
+1085..1086 ; Other_Alphabetic # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y
+109C ; Other_Alphabetic # Mc MYANMAR VOWEL SIGN AITON A
+109D ; Other_Alphabetic # Mn MYANMAR VOWEL SIGN AITON AI
+135F ; Other_Alphabetic # Mn ETHIOPIC COMBINING GEMINATION MARK
+1712..1713 ; Other_Alphabetic # Mn [2] TAGALOG VOWEL SIGN I..TAGALOG VOWEL SIGN U
+1732..1733 ; Other_Alphabetic # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U
+1752..1753 ; Other_Alphabetic # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U
+1772..1773 ; Other_Alphabetic # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U
+17B6 ; Other_Alphabetic # Mc KHMER VOWEL SIGN AA
+17B7..17BD ; Other_Alphabetic # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA
+17BE..17C5 ; Other_Alphabetic # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU
+17C6 ; Other_Alphabetic # Mn KHMER SIGN NIKAHIT
+17C7..17C8 ; Other_Alphabetic # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU
+18A9 ; Other_Alphabetic # Mn MONGOLIAN LETTER ALI GALI DAGALGA
+1920..1922 ; Other_Alphabetic # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U
+1923..1926 ; Other_Alphabetic # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU
+1927..1928 ; Other_Alphabetic # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O
+1929..192B ; Other_Alphabetic # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA
+1930..1931 ; Other_Alphabetic # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA
+1932 ; Other_Alphabetic # Mn LIMBU SMALL LETTER ANUSVARA
+1933..1938 ; Other_Alphabetic # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA
+1A17..1A18 ; Other_Alphabetic # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U
+1A19..1A1A ; Other_Alphabetic # Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O
+1A1B ; Other_Alphabetic # Mn BUGINESE VOWEL SIGN AE
+1A55 ; Other_Alphabetic # Mc TAI THAM CONSONANT SIGN MEDIAL RA
+1A56 ; Other_Alphabetic # Mn TAI THAM CONSONANT SIGN MEDIAL LA
+1A57 ; Other_Alphabetic # Mc TAI THAM CONSONANT SIGN LA TANG LAI
+1A58..1A5E ; Other_Alphabetic # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA
+1A61 ; Other_Alphabetic # Mc TAI THAM VOWEL SIGN A
+1A62 ; Other_Alphabetic # Mn TAI THAM VOWEL SIGN MAI SAT
+1A63..1A64 ; Other_Alphabetic # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA
+1A65..1A6C ; Other_Alphabetic # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW
+1A6D..1A72 ; Other_Alphabetic # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI
+1A73..1A74 ; Other_Alphabetic # Mn [2] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN MAI KANG
+1B00..1B03 ; Other_Alphabetic # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
+1B04 ; Other_Alphabetic # Mc BALINESE SIGN BISAH
+1B35 ; Other_Alphabetic # Mc BALINESE VOWEL SIGN TEDUNG
+1B36..1B3A ; Other_Alphabetic # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA
+1B3B ; Other_Alphabetic # Mc BALINESE VOWEL SIGN RA REPA TEDUNG
+1B3C ; Other_Alphabetic # Mn BALINESE VOWEL SIGN LA LENGA
+1B3D..1B41 ; Other_Alphabetic # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG
+1B42 ; Other_Alphabetic # Mn BALINESE VOWEL SIGN PEPET
+1B43 ; Other_Alphabetic # Mc BALINESE VOWEL SIGN PEPET TEDUNG
+1B80..1B81 ; Other_Alphabetic # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR
+1B82 ; Other_Alphabetic # Mc SUNDANESE SIGN PANGWISAD
+1BA1 ; Other_Alphabetic # Mc SUNDANESE CONSONANT SIGN PAMINGKAL
+1BA2..1BA5 ; Other_Alphabetic # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU
+1BA6..1BA7 ; Other_Alphabetic # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG
+1BA8..1BA9 ; Other_Alphabetic # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG
+1BAC..1BAD ; Other_Alphabetic # Mn [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA
+1BE7 ; Other_Alphabetic # Mc BATAK VOWEL SIGN E
+1BE8..1BE9 ; Other_Alphabetic # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE
+1BEA..1BEC ; Other_Alphabetic # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O
+1BED ; Other_Alphabetic # Mn BATAK VOWEL SIGN KARO O
+1BEE ; Other_Alphabetic # Mc BATAK VOWEL SIGN U
+1BEF..1BF1 ; Other_Alphabetic # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H
+1C24..1C2B ; Other_Alphabetic # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU
+1C2C..1C33 ; Other_Alphabetic # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T
+1C34..1C35 ; Other_Alphabetic # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG
+1CF2..1CF3 ; Other_Alphabetic # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA
+1DE7..1DF4 ; Other_Alphabetic # Mn [14] COMBINING LATIN SMALL LETTER ALPHA..COMBINING LATIN SMALL LETTER U WITH DIAERESIS
+24B6..24E9 ; Other_Alphabetic # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z
+2DE0..2DFF ; Other_Alphabetic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
+A674..A67B ; Other_Alphabetic # Mn [8] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC LETTER OMEGA
+A69E..A69F ; Other_Alphabetic # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E
+A823..A824 ; Other_Alphabetic # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I
+A825..A826 ; Other_Alphabetic # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E
+A827 ; Other_Alphabetic # Mc SYLOTI NAGRI VOWEL SIGN OO
+A880..A881 ; Other_Alphabetic # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA
+A8B4..A8C3 ; Other_Alphabetic # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU
+A926..A92A ; Other_Alphabetic # Mn [5] KAYAH LI VOWEL UE..KAYAH LI VOWEL O
+A947..A951 ; Other_Alphabetic # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R
+A952 ; Other_Alphabetic # Mc REJANG CONSONANT SIGN H
+A980..A982 ; Other_Alphabetic # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR
+A983 ; Other_Alphabetic # Mc JAVANESE SIGN WIGNYAN
+A9B4..A9B5 ; Other_Alphabetic # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG
+A9B6..A9B9 ; Other_Alphabetic # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT
+A9BA..A9BB ; Other_Alphabetic # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE
+A9BC ; Other_Alphabetic # Mn JAVANESE VOWEL SIGN PEPET
+A9BD..A9BF ; Other_Alphabetic # Mc [3] JAVANESE CONSONANT SIGN KERET..JAVANESE CONSONANT SIGN CAKRA
+AA29..AA2E ; Other_Alphabetic # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE
+AA2F..AA30 ; Other_Alphabetic # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI
+AA31..AA32 ; Other_Alphabetic # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE
+AA33..AA34 ; Other_Alphabetic # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA
+AA35..AA36 ; Other_Alphabetic # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA
+AA43 ; Other_Alphabetic # Mn CHAM CONSONANT SIGN FINAL NG
+AA4C ; Other_Alphabetic # Mn CHAM CONSONANT SIGN FINAL M
+AA4D ; Other_Alphabetic # Mc CHAM CONSONANT SIGN FINAL H
+AAB0 ; Other_Alphabetic # Mn TAI VIET MAI KANG
+AAB2..AAB4 ; Other_Alphabetic # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U
+AAB7..AAB8 ; Other_Alphabetic # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA
+AABE ; Other_Alphabetic # Mn TAI VIET VOWEL AM
+AAEB ; Other_Alphabetic # Mc MEETEI MAYEK VOWEL SIGN II
+AAEC..AAED ; Other_Alphabetic # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI
+AAEE..AAEF ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU
+AAF5 ; Other_Alphabetic # Mc MEETEI MAYEK VOWEL SIGN VISARGA
+ABE3..ABE4 ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP
+ABE5 ; Other_Alphabetic # Mn MEETEI MAYEK VOWEL SIGN ANAP
+ABE6..ABE7 ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP
+ABE8 ; Other_Alphabetic # Mn MEETEI MAYEK VOWEL SIGN UNAP
+ABE9..ABEA ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG
+FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA
+10376..1037A ; Other_Alphabetic # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII
+10A01..10A03 ; Other_Alphabetic # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R
+10A05..10A06 ; Other_Alphabetic # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O
+10A0C..10A0F ; Other_Alphabetic # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA
+11000 ; Other_Alphabetic # Mc BRAHMI SIGN CANDRABINDU
+11001 ; Other_Alphabetic # Mn BRAHMI SIGN ANUSVARA
+11002 ; Other_Alphabetic # Mc BRAHMI SIGN VISARGA
+11038..11045 ; Other_Alphabetic # Mn [14] BRAHMI VOWEL SIGN AA..BRAHMI VOWEL SIGN AU
+11082 ; Other_Alphabetic # Mc KAITHI SIGN VISARGA
+110B0..110B2 ; Other_Alphabetic # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II
+110B3..110B6 ; Other_Alphabetic # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI
+110B7..110B8 ; Other_Alphabetic # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU
+11100..11102 ; Other_Alphabetic # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA
+11127..1112B ; Other_Alphabetic # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU
+1112C ; Other_Alphabetic # Mc CHAKMA VOWEL SIGN E
+1112D..11132 ; Other_Alphabetic # Mn [6] CHAKMA VOWEL SIGN AI..CHAKMA AU MARK
+11180..11181 ; Other_Alphabetic # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA
+11182 ; Other_Alphabetic # Mc SHARADA SIGN VISARGA
+111B3..111B5 ; Other_Alphabetic # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II
+111B6..111BE ; Other_Alphabetic # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O
+111BF ; Other_Alphabetic # Mc SHARADA VOWEL SIGN AU
+1122C..1122E ; Other_Alphabetic # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II
+1122F..11231 ; Other_Alphabetic # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI
+11232..11233 ; Other_Alphabetic # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU
+11234 ; Other_Alphabetic # Mn KHOJKI SIGN ANUSVARA
+11237 ; Other_Alphabetic # Mn KHOJKI SIGN SHADDA
+112DF ; Other_Alphabetic # Mn KHUDAWADI SIGN ANUSVARA
+112E0..112E2 ; Other_Alphabetic # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II
+112E3..112E8 ; Other_Alphabetic # Mn [6] KHUDAWADI VOWEL SIGN U..KHUDAWADI VOWEL SIGN AU
+11300..11301 ; Other_Alphabetic # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU
+11302..11303 ; Other_Alphabetic # Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA
+1133E..1133F ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN AA..GRANTHA VOWEL SIGN I
+11340 ; Other_Alphabetic # Mn GRANTHA VOWEL SIGN II
+11341..11344 ; Other_Alphabetic # Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR
+11347..11348 ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI
+1134B..1134C ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN OO..GRANTHA VOWEL SIGN AU
+11357 ; Other_Alphabetic # Mc GRANTHA AU LENGTH MARK
+11362..11363 ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL
+114B0..114B2 ; Other_Alphabetic # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II
+114B3..114B8 ; Other_Alphabetic # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL
+114B9 ; Other_Alphabetic # Mc TIRHUTA VOWEL SIGN E
+114BA ; Other_Alphabetic # Mn TIRHUTA VOWEL SIGN SHORT E
+114BB..114BE ; Other_Alphabetic # Mc [4] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN AU
+114BF..114C0 ; Other_Alphabetic # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA
+114C1 ; Other_Alphabetic # Mc TIRHUTA SIGN VISARGA
+115AF..115B1 ; Other_Alphabetic # Mc [3] SIDDHAM VOWEL SIGN AA..SIDDHAM VOWEL SIGN II
+115B2..115B5 ; Other_Alphabetic # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR
+115B8..115BB ; Other_Alphabetic # Mc [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU
+115BC..115BD ; Other_Alphabetic # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA
+115BE ; Other_Alphabetic # Mc SIDDHAM SIGN VISARGA
+115DC..115DD ; Other_Alphabetic # Mn [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU
+11630..11632 ; Other_Alphabetic # Mc [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II
+11633..1163A ; Other_Alphabetic # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI
+1163B..1163C ; Other_Alphabetic # Mc [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU
+1163D ; Other_Alphabetic # Mn MODI SIGN ANUSVARA
+1163E ; Other_Alphabetic # Mc MODI SIGN VISARGA
+11640 ; Other_Alphabetic # Mn MODI SIGN ARDHACANDRA
+116AB ; Other_Alphabetic # Mn TAKRI SIGN ANUSVARA
+116AC ; Other_Alphabetic # Mc TAKRI SIGN VISARGA
+116AD ; Other_Alphabetic # Mn TAKRI VOWEL SIGN AA
+116AE..116AF ; Other_Alphabetic # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II
+116B0..116B5 ; Other_Alphabetic # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU
+1171D..1171F ; Other_Alphabetic # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA
+11720..11721 ; Other_Alphabetic # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA
+11722..11725 ; Other_Alphabetic # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU
+11726 ; Other_Alphabetic # Mc AHOM VOWEL SIGN E
+11727..1172A ; Other_Alphabetic # Mn [4] AHOM VOWEL SIGN AW..AHOM VOWEL SIGN AM
+16B30..16B36 ; Other_Alphabetic # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
+16F51..16F7E ; Other_Alphabetic # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG
+1BC9E ; Other_Alphabetic # Mn DUPLOYAN DOUBLE MARK
+1F130..1F149 ; Other_Alphabetic # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z
+1F150..1F169 ; Other_Alphabetic # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z
+1F170..1F189 ; Other_Alphabetic # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z
+
+# Total code points: 1116
+
+# ================================================
+
+3006 ; Ideographic # Lo IDEOGRAPHIC CLOSING MARK
+3007 ; Ideographic # Nl IDEOGRAPHIC NUMBER ZERO
+3021..3029 ; Ideographic # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE
+3038..303A ; Ideographic # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY
+3400..4DB5 ; Ideographic # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5
+4E00..9FD5 ; Ideographic # Lo [20950] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FD5
+F900..FA6D ; Ideographic # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D
+FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
+20000..2A6D6 ; Ideographic # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6
+2A700..2B734 ; Ideographic # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
+2B740..2B81D ; Ideographic # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
+2B820..2CEA1 ; Ideographic # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
+2F800..2FA1D ; Ideographic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
+
+# Total code points: 81404
+
+# ================================================
+
+005E ; Diacritic # Sk CIRCUMFLEX ACCENT
+0060 ; Diacritic # Sk GRAVE ACCENT
+00A8 ; Diacritic # Sk DIAERESIS
+00AF ; Diacritic # Sk MACRON
+00B4 ; Diacritic # Sk ACUTE ACCENT
+00B7 ; Diacritic # Po MIDDLE DOT
+00B8 ; Diacritic # Sk CEDILLA
+02B0..02C1 ; Diacritic # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP
+02C2..02C5 ; Diacritic # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD
+02C6..02D1 ; Diacritic # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON
+02D2..02DF ; Diacritic # Sk [14] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT
+02E0..02E4 ; Diacritic # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
+02E5..02EB ; Diacritic # Sk [7] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK
+02EC ; Diacritic # Lm MODIFIER LETTER VOICING
+02ED ; Diacritic # Sk MODIFIER LETTER UNASPIRATED
+02EE ; Diacritic # Lm MODIFIER LETTER DOUBLE APOSTROPHE
+02EF..02FF ; Diacritic # Sk [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW
+0300..034E ; Diacritic # Mn [79] COMBINING GRAVE ACCENT..COMBINING UPWARDS ARROW BELOW
+0350..0357 ; Diacritic # Mn [8] COMBINING RIGHT ARROWHEAD ABOVE..COMBINING RIGHT HALF RING ABOVE
+035D..0362 ; Diacritic # Mn [6] COMBINING DOUBLE BREVE..COMBINING DOUBLE RIGHTWARDS ARROW BELOW
+0374 ; Diacritic # Lm GREEK NUMERAL SIGN
+0375 ; Diacritic # Sk GREEK LOWER NUMERAL SIGN
+037A ; Diacritic # Lm GREEK YPOGEGRAMMENI
+0384..0385 ; Diacritic # Sk [2] GREEK TONOS..GREEK DIALYTIKA TONOS
+0483..0487 ; Diacritic # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE
+0559 ; Diacritic # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING
+0591..05A1 ; Diacritic # Mn [17] HEBREW ACCENT ETNAHTA..HEBREW ACCENT PAZER
+05A3..05BD ; Diacritic # Mn [27] HEBREW ACCENT MUNAH..HEBREW POINT METEG
+05BF ; Diacritic # Mn HEBREW POINT RAFE
+05C1..05C2 ; Diacritic # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT
+05C4 ; Diacritic # Mn HEBREW MARK UPPER DOT
+064B..0652 ; Diacritic # Mn [8] ARABIC FATHATAN..ARABIC SUKUN
+0657..0658 ; Diacritic # Mn [2] ARABIC INVERTED DAMMA..ARABIC MARK NOON GHUNNA
+06DF..06E0 ; Diacritic # Mn [2] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH UPRIGHT RECTANGULAR ZERO
+06E5..06E6 ; Diacritic # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH
+06EA..06EC ; Diacritic # Mn [3] ARABIC EMPTY CENTRE LOW STOP..ARABIC ROUNDED HIGH STOP WITH FILLED CENTRE
+0730..074A ; Diacritic # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH
+07A6..07B0 ; Diacritic # Mn [11] THAANA ABAFILI..THAANA SUKUN
+07EB..07F3 ; Diacritic # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
+07F4..07F5 ; Diacritic # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE
+0818..0819 ; Diacritic # Mn [2] SAMARITAN MARK OCCLUSION..SAMARITAN MARK DAGESH
+08E3..08FE ; Diacritic # Mn [28] ARABIC TURNED DAMMA BELOW..ARABIC DAMMA WITH DOT
+093C ; Diacritic # Mn DEVANAGARI SIGN NUKTA
+094D ; Diacritic # Mn DEVANAGARI SIGN VIRAMA
+0951..0954 ; Diacritic # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT
+0971 ; Diacritic # Lm DEVANAGARI SIGN HIGH SPACING DOT
+09BC ; Diacritic # Mn BENGALI SIGN NUKTA
+09CD ; Diacritic # Mn BENGALI SIGN VIRAMA
+0A3C ; Diacritic # Mn GURMUKHI SIGN NUKTA
+0A4D ; Diacritic # Mn GURMUKHI SIGN VIRAMA
+0ABC ; Diacritic # Mn GUJARATI SIGN NUKTA
+0ACD ; Diacritic # Mn GUJARATI SIGN VIRAMA
+0B3C ; Diacritic # Mn ORIYA SIGN NUKTA
+0B4D ; Diacritic # Mn ORIYA SIGN VIRAMA
+0BCD ; Diacritic # Mn TAMIL SIGN VIRAMA
+0C4D ; Diacritic # Mn TELUGU SIGN VIRAMA
+0CBC ; Diacritic # Mn KANNADA SIGN NUKTA
+0CCD ; Diacritic # Mn KANNADA SIGN VIRAMA
+0D4D ; Diacritic # Mn MALAYALAM SIGN VIRAMA
+0DCA ; Diacritic # Mn SINHALA SIGN AL-LAKUNA
+0E47..0E4C ; Diacritic # Mn [6] THAI CHARACTER MAITAIKHU..THAI CHARACTER THANTHAKHAT
+0E4E ; Diacritic # Mn THAI CHARACTER YAMAKKAN
+0EC8..0ECC ; Diacritic # Mn [5] LAO TONE MAI EK..LAO CANCELLATION MARK
+0F18..0F19 ; Diacritic # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS
+0F35 ; Diacritic # Mn TIBETAN MARK NGAS BZUNG NYI ZLA
+0F37 ; Diacritic # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS
+0F39 ; Diacritic # Mn TIBETAN MARK TSA -PHRU
+0F3E..0F3F ; Diacritic # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES
+0F82..0F84 ; Diacritic # Mn [3] TIBETAN SIGN NYI ZLA NAA DA..TIBETAN MARK HALANTA
+0F86..0F87 ; Diacritic # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS
+0FC6 ; Diacritic # Mn TIBETAN SYMBOL PADMA GDAN
+1037 ; Diacritic # Mn MYANMAR SIGN DOT BELOW
+1039..103A ; Diacritic # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT
+1087..108C ; Diacritic # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3
+108D ; Diacritic # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE
+108F ; Diacritic # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5
+109A..109B ; Diacritic # Mc [2] MYANMAR SIGN KHAMTI TONE-1..MYANMAR SIGN KHAMTI TONE-3
+17C9..17D3 ; Diacritic # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT
+17DD ; Diacritic # Mn KHMER SIGN ATTHACAN
+1939..193B ; Diacritic # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I
+1A75..1A7C ; Diacritic # Mn [8] TAI THAM SIGN TONE-1..TAI THAM SIGN KHUEN-LUE KARAN
+1A7F ; Diacritic # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT
+1AB0..1ABD ; Diacritic # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW
+1B34 ; Diacritic # Mn BALINESE SIGN REREKAN
+1B44 ; Diacritic # Mc BALINESE ADEG ADEG
+1B6B..1B73 ; Diacritic # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
+1BAA ; Diacritic # Mc SUNDANESE SIGN PAMAAEH
+1BAB ; Diacritic # Mn SUNDANESE SIGN VIRAMA
+1C36..1C37 ; Diacritic # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA
+1C78..1C7D ; Diacritic # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
+1CD0..1CD2 ; Diacritic # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
+1CD3 ; Diacritic # Po VEDIC SIGN NIHSHVASA
+1CD4..1CE0 ; Diacritic # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA
+1CE1 ; Diacritic # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA
+1CE2..1CE8 ; Diacritic # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
+1CED ; Diacritic # Mn VEDIC SIGN TIRYAK
+1CF4 ; Diacritic # Mn VEDIC TONE CANDRA ABOVE
+1CF8..1CF9 ; Diacritic # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
+1D2C..1D6A ; Diacritic # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI
+1DC4..1DCF ; Diacritic # Mn [12] COMBINING MACRON-ACUTE..COMBINING ZIGZAG BELOW
+1DF5 ; Diacritic # Mn COMBINING UP TACK ABOVE
+1DFD..1DFF ; Diacritic # Mn [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
+1FBD ; Diacritic # Sk GREEK KORONIS
+1FBF..1FC1 ; Diacritic # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI
+1FCD..1FCF ; Diacritic # Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI
+1FDD..1FDF ; Diacritic # Sk [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI
+1FED..1FEF ; Diacritic # Sk [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA
+1FFD..1FFE ; Diacritic # Sk [2] GREEK OXIA..GREEK DASIA
+2CEF..2CF1 ; Diacritic # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS
+2E2F ; Diacritic # Lm VERTICAL TILDE
+302A..302D ; Diacritic # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK
+302E..302F ; Diacritic # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK
+3099..309A ; Diacritic # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
+309B..309C ; Diacritic # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
+30FC ; Diacritic # Lm KATAKANA-HIRAGANA PROLONGED SOUND MARK
+A66F ; Diacritic # Mn COMBINING CYRILLIC VZMET
+A67C..A67D ; Diacritic # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK
+A67F ; Diacritic # Lm CYRILLIC PAYEROK
+A69C..A69D ; Diacritic # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN
+A6F0..A6F1 ; Diacritic # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS
+A717..A71F ; Diacritic # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK
+A720..A721 ; Diacritic # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE
+A788 ; Diacritic # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT
+A7F8..A7F9 ; Diacritic # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE
+A8C4 ; Diacritic # Mn SAURASHTRA SIGN VIRAMA
+A8E0..A8F1 ; Diacritic # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
+A92B..A92D ; Diacritic # Mn [3] KAYAH LI TONE PLOPHU..KAYAH LI TONE CALYA PLOPHU
+A92E ; Diacritic # Po KAYAH LI SIGN CWI
+A953 ; Diacritic # Mc REJANG VIRAMA
+A9B3 ; Diacritic # Mn JAVANESE SIGN CECAK TELU
+A9C0 ; Diacritic # Mc JAVANESE PANGKON
+A9E5 ; Diacritic # Mn MYANMAR SIGN SHAN SAW
+AA7B ; Diacritic # Mc MYANMAR SIGN PAO KAREN TONE
+AA7C ; Diacritic # Mn MYANMAR SIGN TAI LAING TONE-2
+AA7D ; Diacritic # Mc MYANMAR SIGN TAI LAING TONE-5
+AABF ; Diacritic # Mn TAI VIET TONE MAI EK
+AAC0 ; Diacritic # Lo TAI VIET TONE MAI NUENG
+AAC1 ; Diacritic # Mn TAI VIET TONE MAI THO
+AAC2 ; Diacritic # Lo TAI VIET TONE MAI SONG
+AAF6 ; Diacritic # Mn MEETEI MAYEK VIRAMA
+AB5B ; Diacritic # Sk MODIFIER BREVE WITH INVERTED BREVE
+AB5C..AB5F ; Diacritic # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK
+ABEC ; Diacritic # Mc MEETEI MAYEK LUM IYEK
+ABED ; Diacritic # Mn MEETEI MAYEK APUN IYEK
+FB1E ; Diacritic # Mn HEBREW POINT JUDEO-SPANISH VARIKA
+FE20..FE2F ; Diacritic # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF
+FF3E ; Diacritic # Sk FULLWIDTH CIRCUMFLEX ACCENT
+FF40 ; Diacritic # Sk FULLWIDTH GRAVE ACCENT
+FF70 ; Diacritic # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
+FF9E..FF9F ; Diacritic # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
+FFE3 ; Diacritic # Sk FULLWIDTH MACRON
+102E0 ; Diacritic # Mn COPTIC EPACT THOUSANDS MARK
+10AE5..10AE6 ; Diacritic # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
+110B9..110BA ; Diacritic # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA
+11133..11134 ; Diacritic # Mn [2] CHAKMA VIRAMA..CHAKMA MAAYYAA
+11173 ; Diacritic # Mn MAHAJANI SIGN NUKTA
+111C0 ; Diacritic # Mc SHARADA SIGN VIRAMA
+111CA..111CC ; Diacritic # Mn [3] SHARADA SIGN NUKTA..SHARADA EXTRA SHORT VOWEL MARK
+11235 ; Diacritic # Mc KHOJKI SIGN VIRAMA
+11236 ; Diacritic # Mn KHOJKI SIGN NUKTA
+112E9..112EA ; Diacritic # Mn [2] KHUDAWADI SIGN NUKTA..KHUDAWADI SIGN VIRAMA
+1133C ; Diacritic # Mn GRANTHA SIGN NUKTA
+1134D ; Diacritic # Mc GRANTHA SIGN VIRAMA
+11366..1136C ; Diacritic # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX
+11370..11374 ; Diacritic # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA
+114C2..114C3 ; Diacritic # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA
+115BF..115C0 ; Diacritic # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA
+1163F ; Diacritic # Mn MODI SIGN VIRAMA
+116B6 ; Diacritic # Mc TAKRI SIGN VIRAMA
+116B7 ; Diacritic # Mn TAKRI SIGN NUKTA
+1172B ; Diacritic # Mn AHOM SIGN KILLER
+16AF0..16AF4 ; Diacritic # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE
+16F8F..16F92 ; Diacritic # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW
+16F93..16F9F ; Diacritic # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8
+1D167..1D169 ; Diacritic # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
+1D16D..1D172 ; Diacritic # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5
+1D17B..1D182 ; Diacritic # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
+1D185..1D18B ; Diacritic # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
+1D1AA..1D1AD ; Diacritic # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
+1E8D0..1E8D6 ; Diacritic # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS
+
+# Total code points: 773
+
+# ================================================
+
+00B7 ; Extender # Po MIDDLE DOT
+02D0..02D1 ; Extender # Lm [2] MODIFIER LETTER TRIANGULAR COLON..MODIFIER LETTER HALF TRIANGULAR COLON
+0640 ; Extender # Lm ARABIC TATWEEL
+07FA ; Extender # Lm NKO LAJANYALAN
+0E46 ; Extender # Lm THAI CHARACTER MAIYAMOK
+0EC6 ; Extender # Lm LAO KO LA
+180A ; Extender # Po MONGOLIAN NIRUGU
+1843 ; Extender # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN
+1AA7 ; Extender # Lm TAI THAM SIGN MAI YAMOK
+1C36 ; Extender # Mn LEPCHA SIGN RAN
+1C7B ; Extender # Lm OL CHIKI RELAA
+3005 ; Extender # Lm IDEOGRAPHIC ITERATION MARK
+3031..3035 ; Extender # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF
+309D..309E ; Extender # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK
+30FC..30FE ; Extender # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK
+A015 ; Extender # Lm YI SYLLABLE WU
+A60C ; Extender # Lm VAI SYLLABLE LENGTHENER
+A9CF ; Extender # Lm JAVANESE PANGRANGKEP
+A9E6 ; Extender # Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION
+AA70 ; Extender # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION
+AADD ; Extender # Lm TAI VIET SYMBOL SAM
+AAF3..AAF4 ; Extender # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK
+FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
+1135D ; Extender # Lo GRANTHA SIGN PLUTA
+115C6..115C8 ; Extender # Po [3] SIDDHAM REPETITION MARK-1..SIDDHAM REPETITION MARK-3
+16B42..16B43 ; Extender # Lm [2] PAHAWH HMONG SIGN VOS NRUA..PAHAWH HMONG SIGN IB YAM
+
+# Total code points: 38
+
+# ================================================
+
+00AA ; Other_Lowercase # Lo FEMININE ORDINAL INDICATOR
+00BA ; Other_Lowercase # Lo MASCULINE ORDINAL INDICATOR
+02B0..02B8 ; Other_Lowercase # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y
+02C0..02C1 ; Other_Lowercase # Lm [2] MODIFIER LETTER GLOTTAL STOP..MODIFIER LETTER REVERSED GLOTTAL STOP
+02E0..02E4 ; Other_Lowercase # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
+0345 ; Other_Lowercase # Mn COMBINING GREEK YPOGEGRAMMENI
+037A ; Other_Lowercase # Lm GREEK YPOGEGRAMMENI
+1D2C..1D6A ; Other_Lowercase # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI
+1D78 ; Other_Lowercase # Lm MODIFIER LETTER CYRILLIC EN
+1D9B..1DBF ; Other_Lowercase # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA
+2071 ; Other_Lowercase # Lm SUPERSCRIPT LATIN SMALL LETTER I
+207F ; Other_Lowercase # Lm SUPERSCRIPT LATIN SMALL LETTER N
+2090..209C ; Other_Lowercase # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T
+2170..217F ; Other_Lowercase # Nl [16] SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL ONE THOUSAND
+24D0..24E9 ; Other_Lowercase # So [26] CIRCLED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z
+2C7C..2C7D ; Other_Lowercase # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V
+A69C..A69D ; Other_Lowercase # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN
+A770 ; Other_Lowercase # Lm MODIFIER LETTER US
+A7F8..A7F9 ; Other_Lowercase # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE
+AB5C..AB5F ; Other_Lowercase # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK
+
+# Total code points: 189
+
+# ================================================
+
+2160..216F ; Other_Uppercase # Nl [16] ROMAN NUMERAL ONE..ROMAN NUMERAL ONE THOUSAND
+24B6..24CF ; Other_Uppercase # So [26] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN CAPITAL LETTER Z
+1F130..1F149 ; Other_Uppercase # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z
+1F150..1F169 ; Other_Uppercase # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z
+1F170..1F189 ; Other_Uppercase # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z
+
+# Total code points: 120
+
+# ================================================
+
+FDD0..FDEF ; Noncharacter_Code_Point # Cn [32] <noncharacter-FDD0>..<noncharacter-FDEF>
+FFFE..FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-FFFE>..<noncharacter-FFFF>
+1FFFE..1FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-1FFFE>..<noncharacter-1FFFF>
+2FFFE..2FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-2FFFE>..<noncharacter-2FFFF>
+3FFFE..3FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-3FFFE>..<noncharacter-3FFFF>
+4FFFE..4FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-4FFFE>..<noncharacter-4FFFF>
+5FFFE..5FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-5FFFE>..<noncharacter-5FFFF>
+6FFFE..6FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-6FFFE>..<noncharacter-6FFFF>
+7FFFE..7FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-7FFFE>..<noncharacter-7FFFF>
+8FFFE..8FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-8FFFE>..<noncharacter-8FFFF>
+9FFFE..9FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-9FFFE>..<noncharacter-9FFFF>
+AFFFE..AFFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-AFFFE>..<noncharacter-AFFFF>
+BFFFE..BFFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-BFFFE>..<noncharacter-BFFFF>
+CFFFE..CFFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-CFFFE>..<noncharacter-CFFFF>
+DFFFE..DFFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-DFFFE>..<noncharacter-DFFFF>
+EFFFE..EFFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-EFFFE>..<noncharacter-EFFFF>
+FFFFE..FFFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-FFFFE>..<noncharacter-FFFFF>
+10FFFE..10FFFF; Noncharacter_Code_Point # Cn [2] <noncharacter-10FFFE>..<noncharacter-10FFFF>
+
+# Total code points: 66
+
+# ================================================
+
+09BE ; Other_Grapheme_Extend # Mc BENGALI VOWEL SIGN AA
+09D7 ; Other_Grapheme_Extend # Mc BENGALI AU LENGTH MARK
+0B3E ; Other_Grapheme_Extend # Mc ORIYA VOWEL SIGN AA
+0B57 ; Other_Grapheme_Extend # Mc ORIYA AU LENGTH MARK
+0BBE ; Other_Grapheme_Extend # Mc TAMIL VOWEL SIGN AA
+0BD7 ; Other_Grapheme_Extend # Mc TAMIL AU LENGTH MARK
+0CC2 ; Other_Grapheme_Extend # Mc KANNADA VOWEL SIGN UU
+0CD5..0CD6 ; Other_Grapheme_Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
+0D3E ; Other_Grapheme_Extend # Mc MALAYALAM VOWEL SIGN AA
+0D57 ; Other_Grapheme_Extend # Mc MALAYALAM AU LENGTH MARK
+0DCF ; Other_Grapheme_Extend # Mc SINHALA VOWEL SIGN AELA-PILLA
+0DDF ; Other_Grapheme_Extend # Mc SINHALA VOWEL SIGN GAYANUKITTA
+200C..200D ; Other_Grapheme_Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
+302E..302F ; Other_Grapheme_Extend # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK
+FF9E..FF9F ; Other_Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
+1133E ; Other_Grapheme_Extend # Mc GRANTHA VOWEL SIGN AA
+11357 ; Other_Grapheme_Extend # Mc GRANTHA AU LENGTH MARK
+114B0 ; Other_Grapheme_Extend # Mc TIRHUTA VOWEL SIGN AA
+114BD ; Other_Grapheme_Extend # Mc TIRHUTA VOWEL SIGN SHORT O
+115AF ; Other_Grapheme_Extend # Mc SIDDHAM VOWEL SIGN AA
+1D165 ; Other_Grapheme_Extend # Mc MUSICAL SYMBOL COMBINING STEM
+1D16E..1D172 ; Other_Grapheme_Extend # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5
+
+# Total code points: 30
+
+# ================================================
+
+2FF0..2FF1 ; IDS_Binary_Operator # So [2] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO BELOW
+2FF4..2FFB ; IDS_Binary_Operator # So [8] IDEOGRAPHIC DESCRIPTION CHARACTER FULL SURROUND..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
+
+# Total code points: 10
+
+# ================================================
+
+2FF2..2FF3 ; IDS_Trinary_Operator # So [2] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO MIDDLE AND RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO MIDDLE AND BELOW
+
+# Total code points: 2
+
+# ================================================
+
+2E80..2E99 ; Radical # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP
+2E9B..2EF3 ; Radical # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE
+2F00..2FD5 ; Radical # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE
+
+# Total code points: 329
+
+# ================================================
+
+3400..4DB5 ; Unified_Ideograph # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5
+4E00..9FD5 ; Unified_Ideograph # Lo [20950] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FD5
+FA0E..FA0F ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA0E..CJK COMPATIBILITY IDEOGRAPH-FA0F
+FA11 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA11
+FA13..FA14 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA13..CJK COMPATIBILITY IDEOGRAPH-FA14
+FA1F ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA1F
+FA21 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA21
+FA23..FA24 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA23..CJK COMPATIBILITY IDEOGRAPH-FA24
+FA27..FA29 ; Unified_Ideograph # Lo [3] CJK COMPATIBILITY IDEOGRAPH-FA27..CJK COMPATIBILITY IDEOGRAPH-FA29
+20000..2A6D6 ; Unified_Ideograph # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6
+2A700..2B734 ; Unified_Ideograph # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
+2B740..2B81D ; Unified_Ideograph # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
+2B820..2CEA1 ; Unified_Ideograph # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
+
+# Total code points: 80388
+
+# ================================================
+
+034F ; Other_Default_Ignorable_Code_Point # Mn COMBINING GRAPHEME JOINER
+115F..1160 ; Other_Default_Ignorable_Code_Point # Lo [2] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG FILLER
+17B4..17B5 ; Other_Default_Ignorable_Code_Point # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
+2065 ; Other_Default_Ignorable_Code_Point # Cn <reserved-2065>
+3164 ; Other_Default_Ignorable_Code_Point # Lo HANGUL FILLER
+FFA0 ; Other_Default_Ignorable_Code_Point # Lo HALFWIDTH HANGUL FILLER
+FFF0..FFF8 ; Other_Default_Ignorable_Code_Point # Cn [9] <reserved-FFF0>..<reserved-FFF8>
+E0000 ; Other_Default_Ignorable_Code_Point # Cn <reserved-E0000>
+E0002..E001F ; Other_Default_Ignorable_Code_Point # Cn [30] <reserved-E0002>..<reserved-E001F>
+E0080..E00FF ; Other_Default_Ignorable_Code_Point # Cn [128] <reserved-E0080>..<reserved-E00FF>
+E01F0..E0FFF ; Other_Default_Ignorable_Code_Point # Cn [3600] <reserved-E01F0>..<reserved-E0FFF>
+
+# Total code points: 3776
+
+# ================================================
+
+0149 ; Deprecated # L& LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
+0673 ; Deprecated # Lo ARABIC LETTER ALEF WITH WAVY HAMZA BELOW
+0F77 ; Deprecated # Mn TIBETAN VOWEL SIGN VOCALIC RR
+0F79 ; Deprecated # Mn TIBETAN VOWEL SIGN VOCALIC LL
+17A3..17A4 ; Deprecated # Lo [2] KHMER INDEPENDENT VOWEL QAQ..KHMER INDEPENDENT VOWEL QAA
+206A..206F ; Deprecated # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES
+2329 ; Deprecated # Ps LEFT-POINTING ANGLE BRACKET
+232A ; Deprecated # Pe RIGHT-POINTING ANGLE BRACKET
+E0001 ; Deprecated # Cf LANGUAGE TAG
+E007F ; Deprecated # Cf CANCEL TAG
+
+# Total code points: 16
+
+# ================================================
+
+0069..006A ; Soft_Dotted # L& [2] LATIN SMALL LETTER I..LATIN SMALL LETTER J
+012F ; Soft_Dotted # L& LATIN SMALL LETTER I WITH OGONEK
+0249 ; Soft_Dotted # L& LATIN SMALL LETTER J WITH STROKE
+0268 ; Soft_Dotted # L& LATIN SMALL LETTER I WITH STROKE
+029D ; Soft_Dotted # L& LATIN SMALL LETTER J WITH CROSSED-TAIL
+02B2 ; Soft_Dotted # Lm MODIFIER LETTER SMALL J
+03F3 ; Soft_Dotted # L& GREEK LETTER YOT
+0456 ; Soft_Dotted # L& CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+0458 ; Soft_Dotted # L& CYRILLIC SMALL LETTER JE
+1D62 ; Soft_Dotted # Lm LATIN SUBSCRIPT SMALL LETTER I
+1D96 ; Soft_Dotted # L& LATIN SMALL LETTER I WITH RETROFLEX HOOK
+1DA4 ; Soft_Dotted # Lm MODIFIER LETTER SMALL I WITH STROKE
+1DA8 ; Soft_Dotted # Lm MODIFIER LETTER SMALL J WITH CROSSED-TAIL
+1E2D ; Soft_Dotted # L& LATIN SMALL LETTER I WITH TILDE BELOW
+1ECB ; Soft_Dotted # L& LATIN SMALL LETTER I WITH DOT BELOW
+2071 ; Soft_Dotted # Lm SUPERSCRIPT LATIN SMALL LETTER I
+2148..2149 ; Soft_Dotted # L& [2] DOUBLE-STRUCK ITALIC SMALL I..DOUBLE-STRUCK ITALIC SMALL J
+2C7C ; Soft_Dotted # Lm LATIN SUBSCRIPT SMALL LETTER J
+1D422..1D423 ; Soft_Dotted # L& [2] MATHEMATICAL BOLD SMALL I..MATHEMATICAL BOLD SMALL J
+1D456..1D457 ; Soft_Dotted # L& [2] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL ITALIC SMALL J
+1D48A..1D48B ; Soft_Dotted # L& [2] MATHEMATICAL BOLD ITALIC SMALL I..MATHEMATICAL BOLD ITALIC SMALL J
+1D4BE..1D4BF ; Soft_Dotted # L& [2] MATHEMATICAL SCRIPT SMALL I..MATHEMATICAL SCRIPT SMALL J
+1D4F2..1D4F3 ; Soft_Dotted # L& [2] MATHEMATICAL BOLD SCRIPT SMALL I..MATHEMATICAL BOLD SCRIPT SMALL J
+1D526..1D527 ; Soft_Dotted # L& [2] MATHEMATICAL FRAKTUR SMALL I..MATHEMATICAL FRAKTUR SMALL J
+1D55A..1D55B ; Soft_Dotted # L& [2] MATHEMATICAL DOUBLE-STRUCK SMALL I..MATHEMATICAL DOUBLE-STRUCK SMALL J
+1D58E..1D58F ; Soft_Dotted # L& [2] MATHEMATICAL BOLD FRAKTUR SMALL I..MATHEMATICAL BOLD FRAKTUR SMALL J
+1D5C2..1D5C3 ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF SMALL I..MATHEMATICAL SANS-SERIF SMALL J
+1D5F6..1D5F7 ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF BOLD SMALL I..MATHEMATICAL SANS-SERIF BOLD SMALL J
+1D62A..1D62B ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF ITALIC SMALL I..MATHEMATICAL SANS-SERIF ITALIC SMALL J
+1D65E..1D65F ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL I..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL J
+1D692..1D693 ; Soft_Dotted # L& [2] MATHEMATICAL MONOSPACE SMALL I..MATHEMATICAL MONOSPACE SMALL J
+
+# Total code points: 46
+
+# ================================================
+
+0E40..0E44 ; Logical_Order_Exception # Lo [5] THAI CHARACTER SARA E..THAI CHARACTER SARA AI MAIMALAI
+0EC0..0EC4 ; Logical_Order_Exception # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI
+19B5..19B7 ; Logical_Order_Exception # Lo [3] NEW TAI LUE VOWEL SIGN E..NEW TAI LUE VOWEL SIGN O
+19BA ; Logical_Order_Exception # Lo NEW TAI LUE VOWEL SIGN AY
+AAB5..AAB6 ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O
+AAB9 ; Logical_Order_Exception # Lo TAI VIET VOWEL UEA
+AABB..AABC ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL AUE..TAI VIET VOWEL AY
+
+# Total code points: 19
+
+# ================================================
+
+2118 ; Other_ID_Start # Sm SCRIPT CAPITAL P
+212E ; Other_ID_Start # So ESTIMATED SYMBOL
+309B..309C ; Other_ID_Start # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
+
+# Total code points: 4
+
+# ================================================
+
+00B7 ; Other_ID_Continue # Po MIDDLE DOT
+0387 ; Other_ID_Continue # Po GREEK ANO TELEIA
+1369..1371 ; Other_ID_Continue # No [9] ETHIOPIC DIGIT ONE..ETHIOPIC DIGIT NINE
+19DA ; Other_ID_Continue # No NEW TAI LUE THAM DIGIT ONE
+
+# Total code points: 12
+
+# ================================================
+
+0021 ; STerm # Po EXCLAMATION MARK
+002E ; STerm # Po FULL STOP
+003F ; STerm # Po QUESTION MARK
+0589 ; STerm # Po ARMENIAN FULL STOP
+061F ; STerm # Po ARABIC QUESTION MARK
+06D4 ; STerm # Po ARABIC FULL STOP
+0700..0702 ; STerm # Po [3] SYRIAC END OF PARAGRAPH..SYRIAC SUBLINEAR FULL STOP
+07F9 ; STerm # Po NKO EXCLAMATION MARK
+0964..0965 ; STerm # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA
+104A..104B ; STerm # Po [2] MYANMAR SIGN LITTLE SECTION..MYANMAR SIGN SECTION
+1362 ; STerm # Po ETHIOPIC FULL STOP
+1367..1368 ; STerm # Po [2] ETHIOPIC QUESTION MARK..ETHIOPIC PARAGRAPH SEPARATOR
+166E ; STerm # Po CANADIAN SYLLABICS FULL STOP
+1735..1736 ; STerm # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION
+1803 ; STerm # Po MONGOLIAN FULL STOP
+1809 ; STerm # Po MONGOLIAN MANCHU FULL STOP
+1944..1945 ; STerm # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK
+1AA8..1AAB ; STerm # Po [4] TAI THAM SIGN KAAN..TAI THAM SIGN SATKAANKUU
+1B5A..1B5B ; STerm # Po [2] BALINESE PANTI..BALINESE PAMADA
+1B5E..1B5F ; STerm # Po [2] BALINESE CARIK SIKI..BALINESE CARIK PAREREN
+1C3B..1C3C ; STerm # Po [2] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION NYET THYOOM TA-ROL
+1C7E..1C7F ; STerm # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD
+203C..203D ; STerm # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG
+2047..2049 ; STerm # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK
+2E2E ; STerm # Po REVERSED QUESTION MARK
+2E3C ; STerm # Po STENOGRAPHIC FULL STOP
+3002 ; STerm # Po IDEOGRAPHIC FULL STOP
+A4FF ; STerm # Po LISU PUNCTUATION FULL STOP
+A60E..A60F ; STerm # Po [2] VAI FULL STOP..VAI QUESTION MARK
+A6F3 ; STerm # Po BAMUM FULL STOP
+A6F7 ; STerm # Po BAMUM QUESTION MARK
+A876..A877 ; STerm # Po [2] PHAGS-PA MARK SHAD..PHAGS-PA MARK DOUBLE SHAD
+A8CE..A8CF ; STerm # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA
+A92F ; STerm # Po KAYAH LI SIGN SHYA
+A9C8..A9C9 ; STerm # Po [2] JAVANESE PADA LINGSA..JAVANESE PADA LUNGSI
+AA5D..AA5F ; STerm # Po [3] CHAM PUNCTUATION DANDA..CHAM PUNCTUATION TRIPLE DANDA
+AAF0..AAF1 ; STerm # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM
+ABEB ; STerm # Po MEETEI MAYEK CHEIKHEI
+FE52 ; STerm # Po SMALL FULL STOP
+FE56..FE57 ; STerm # Po [2] SMALL QUESTION MARK..SMALL EXCLAMATION MARK
+FF01 ; STerm # Po FULLWIDTH EXCLAMATION MARK
+FF0E ; STerm # Po FULLWIDTH FULL STOP
+FF1F ; STerm # Po FULLWIDTH QUESTION MARK
+FF61 ; STerm # Po HALFWIDTH IDEOGRAPHIC FULL STOP
+10A56..10A57 ; STerm # Po [2] KHAROSHTHI PUNCTUATION DANDA..KHAROSHTHI PUNCTUATION DOUBLE DANDA
+11047..11048 ; STerm # Po [2] BRAHMI DANDA..BRAHMI DOUBLE DANDA
+110BE..110C1 ; STerm # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA
+11141..11143 ; STerm # Po [3] CHAKMA DANDA..CHAKMA QUESTION MARK
+111C5..111C6 ; STerm # Po [2] SHARADA DANDA..SHARADA DOUBLE DANDA
+111CD ; STerm # Po SHARADA SUTRA MARK
+111DE..111DF ; STerm # Po [2] SHARADA SECTION MARK-1..SHARADA SECTION MARK-2
+11238..11239 ; STerm # Po [2] KHOJKI DANDA..KHOJKI DOUBLE DANDA
+1123B..1123C ; STerm # Po [2] KHOJKI SECTION MARK..KHOJKI DOUBLE SECTION MARK
+112A9 ; STerm # Po MULTANI SECTION MARK
+115C2..115C3 ; STerm # Po [2] SIDDHAM DANDA..SIDDHAM DOUBLE DANDA
+115C9..115D7 ; STerm # Po [15] SIDDHAM END OF TEXT MARK..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES
+11641..11642 ; STerm # Po [2] MODI DANDA..MODI DOUBLE DANDA
+1173C..1173E ; STerm # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI
+16A6E..16A6F ; STerm # Po [2] MRO DANDA..MRO DOUBLE DANDA
+16AF5 ; STerm # Po BASSA VAH FULL STOP
+16B37..16B38 ; STerm # Po [2] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS TSHAB CEEB
+16B44 ; STerm # Po PAHAWH HMONG SIGN XAUS
+1BC9F ; STerm # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP
+1DA88 ; STerm # Po SIGNWRITING FULL STOP
+
+# Total code points: 120
+
+# ================================================
+
+180B..180D ; Variation_Selector # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE
+FE00..FE0F ; Variation_Selector # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
+E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
+
+# Total code points: 259
+
+# ================================================
+
+0009..000D ; Pattern_White_Space # Cc [5] <control-0009>..<control-000D>
+0020 ; Pattern_White_Space # Zs SPACE
+0085 ; Pattern_White_Space # Cc <control-0085>
+200E..200F ; Pattern_White_Space # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK
+2028 ; Pattern_White_Space # Zl LINE SEPARATOR
+2029 ; Pattern_White_Space # Zp PARAGRAPH SEPARATOR
+
+# Total code points: 11
+
+# ================================================
+
+0021..0023 ; Pattern_Syntax # Po [3] EXCLAMATION MARK..NUMBER SIGN
+0024 ; Pattern_Syntax # Sc DOLLAR SIGN
+0025..0027 ; Pattern_Syntax # Po [3] PERCENT SIGN..APOSTROPHE
+0028 ; Pattern_Syntax # Ps LEFT PARENTHESIS
+0029 ; Pattern_Syntax # Pe RIGHT PARENTHESIS
+002A ; Pattern_Syntax # Po ASTERISK
+002B ; Pattern_Syntax # Sm PLUS SIGN
+002C ; Pattern_Syntax # Po COMMA
+002D ; Pattern_Syntax # Pd HYPHEN-MINUS
+002E..002F ; Pattern_Syntax # Po [2] FULL STOP..SOLIDUS
+003A..003B ; Pattern_Syntax # Po [2] COLON..SEMICOLON
+003C..003E ; Pattern_Syntax # Sm [3] LESS-THAN SIGN..GREATER-THAN SIGN
+003F..0040 ; Pattern_Syntax # Po [2] QUESTION MARK..COMMERCIAL AT
+005B ; Pattern_Syntax # Ps LEFT SQUARE BRACKET
+005C ; Pattern_Syntax # Po REVERSE SOLIDUS
+005D ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET
+005E ; Pattern_Syntax # Sk CIRCUMFLEX ACCENT
+0060 ; Pattern_Syntax # Sk GRAVE ACCENT
+007B ; Pattern_Syntax # Ps LEFT CURLY BRACKET
+007C ; Pattern_Syntax # Sm VERTICAL LINE
+007D ; Pattern_Syntax # Pe RIGHT CURLY BRACKET
+007E ; Pattern_Syntax # Sm TILDE
+00A1 ; Pattern_Syntax # Po INVERTED EXCLAMATION MARK
+00A2..00A5 ; Pattern_Syntax # Sc [4] CENT SIGN..YEN SIGN
+00A6 ; Pattern_Syntax # So BROKEN BAR
+00A7 ; Pattern_Syntax # Po SECTION SIGN
+00A9 ; Pattern_Syntax # So COPYRIGHT SIGN
+00AB ; Pattern_Syntax # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+00AC ; Pattern_Syntax # Sm NOT SIGN
+00AE ; Pattern_Syntax # So REGISTERED SIGN
+00B0 ; Pattern_Syntax # So DEGREE SIGN
+00B1 ; Pattern_Syntax # Sm PLUS-MINUS SIGN
+00B6 ; Pattern_Syntax # Po PILCROW SIGN
+00BB ; Pattern_Syntax # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+00BF ; Pattern_Syntax # Po INVERTED QUESTION MARK
+00D7 ; Pattern_Syntax # Sm MULTIPLICATION SIGN
+00F7 ; Pattern_Syntax # Sm DIVISION SIGN
+2010..2015 ; Pattern_Syntax # Pd [6] HYPHEN..HORIZONTAL BAR
+2016..2017 ; Pattern_Syntax # Po [2] DOUBLE VERTICAL LINE..DOUBLE LOW LINE
+2018 ; Pattern_Syntax # Pi LEFT SINGLE QUOTATION MARK
+2019 ; Pattern_Syntax # Pf RIGHT SINGLE QUOTATION MARK
+201A ; Pattern_Syntax # Ps SINGLE LOW-9 QUOTATION MARK
+201B..201C ; Pattern_Syntax # Pi [2] SINGLE HIGH-REVERSED-9 QUOTATION MARK..LEFT DOUBLE QUOTATION MARK
+201D ; Pattern_Syntax # Pf RIGHT DOUBLE QUOTATION MARK
+201E ; Pattern_Syntax # Ps DOUBLE LOW-9 QUOTATION MARK
+201F ; Pattern_Syntax # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK
+2020..2027 ; Pattern_Syntax # Po [8] DAGGER..HYPHENATION POINT
+2030..2038 ; Pattern_Syntax # Po [9] PER MILLE SIGN..CARET
+2039 ; Pattern_Syntax # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+203A ; Pattern_Syntax # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+203B..203E ; Pattern_Syntax # Po [4] REFERENCE MARK..OVERLINE
+2041..2043 ; Pattern_Syntax # Po [3] CARET INSERTION POINT..HYPHEN BULLET
+2044 ; Pattern_Syntax # Sm FRACTION SLASH
+2045 ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH QUILL
+2046 ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH QUILL
+2047..2051 ; Pattern_Syntax # Po [11] DOUBLE QUESTION MARK..TWO ASTERISKS ALIGNED VERTICALLY
+2052 ; Pattern_Syntax # Sm COMMERCIAL MINUS SIGN
+2053 ; Pattern_Syntax # Po SWUNG DASH
+2055..205E ; Pattern_Syntax # Po [10] FLOWER PUNCTUATION MARK..VERTICAL FOUR DOTS
+2190..2194 ; Pattern_Syntax # Sm [5] LEFTWARDS ARROW..LEFT RIGHT ARROW
+2195..2199 ; Pattern_Syntax # So [5] UP DOWN ARROW..SOUTH WEST ARROW
+219A..219B ; Pattern_Syntax # Sm [2] LEFTWARDS ARROW WITH STROKE..RIGHTWARDS ARROW WITH STROKE
+219C..219F ; Pattern_Syntax # So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW
+21A0 ; Pattern_Syntax # Sm RIGHTWARDS TWO HEADED ARROW
+21A1..21A2 ; Pattern_Syntax # So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL
+21A3 ; Pattern_Syntax # Sm RIGHTWARDS ARROW WITH TAIL
+21A4..21A5 ; Pattern_Syntax # So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR
+21A6 ; Pattern_Syntax # Sm RIGHTWARDS ARROW FROM BAR
+21A7..21AD ; Pattern_Syntax # So [7] DOWNWARDS ARROW FROM BAR..LEFT RIGHT WAVE ARROW
+21AE ; Pattern_Syntax # Sm LEFT RIGHT ARROW WITH STROKE
+21AF..21CD ; Pattern_Syntax # So [31] DOWNWARDS ZIGZAG ARROW..LEFTWARDS DOUBLE ARROW WITH STROKE
+21CE..21CF ; Pattern_Syntax # Sm [2] LEFT RIGHT DOUBLE ARROW WITH STROKE..RIGHTWARDS DOUBLE ARROW WITH STROKE
+21D0..21D1 ; Pattern_Syntax # So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW
+21D2 ; Pattern_Syntax # Sm RIGHTWARDS DOUBLE ARROW
+21D3 ; Pattern_Syntax # So DOWNWARDS DOUBLE ARROW
+21D4 ; Pattern_Syntax # Sm LEFT RIGHT DOUBLE ARROW
+21D5..21F3 ; Pattern_Syntax # So [31] UP DOWN DOUBLE ARROW..UP DOWN WHITE ARROW
+21F4..22FF ; Pattern_Syntax # Sm [268] RIGHT ARROW WITH SMALL CIRCLE..Z NOTATION BAG MEMBERSHIP
+2300..2307 ; Pattern_Syntax # So [8] DIAMETER SIGN..WAVY LINE
+2308 ; Pattern_Syntax # Ps LEFT CEILING
+2309 ; Pattern_Syntax # Pe RIGHT CEILING
+230A ; Pattern_Syntax # Ps LEFT FLOOR
+230B ; Pattern_Syntax # Pe RIGHT FLOOR
+230C..231F ; Pattern_Syntax # So [20] BOTTOM RIGHT CROP..BOTTOM RIGHT CORNER
+2320..2321 ; Pattern_Syntax # Sm [2] TOP HALF INTEGRAL..BOTTOM HALF INTEGRAL
+2322..2328 ; Pattern_Syntax # So [7] FROWN..KEYBOARD
+2329 ; Pattern_Syntax # Ps LEFT-POINTING ANGLE BRACKET
+232A ; Pattern_Syntax # Pe RIGHT-POINTING ANGLE BRACKET
+232B..237B ; Pattern_Syntax # So [81] ERASE TO THE LEFT..NOT CHECK MARK
+237C ; Pattern_Syntax # Sm RIGHT ANGLE WITH DOWNWARDS ZIGZAG ARROW
+237D..239A ; Pattern_Syntax # So [30] SHOULDERED OPEN BOX..CLEAR SCREEN SYMBOL
+239B..23B3 ; Pattern_Syntax # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM
+23B4..23DB ; Pattern_Syntax # So [40] TOP SQUARE BRACKET..FUSE
+23DC..23E1 ; Pattern_Syntax # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET
+23E2..23FA ; Pattern_Syntax # So [25] WHITE TRAPEZIUM..BLACK CIRCLE FOR RECORD
+23FB..23FF ; Pattern_Syntax # Cn [5] <reserved-23FB>..<reserved-23FF>
+2400..2426 ; Pattern_Syntax # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO
+2427..243F ; Pattern_Syntax # Cn [25] <reserved-2427>..<reserved-243F>
+2440..244A ; Pattern_Syntax # So [11] OCR HOOK..OCR DOUBLE BACKSLASH
+244B..245F ; Pattern_Syntax # Cn [21] <reserved-244B>..<reserved-245F>
+2500..25B6 ; Pattern_Syntax # So [183] BOX DRAWINGS LIGHT HORIZONTAL..BLACK RIGHT-POINTING TRIANGLE
+25B7 ; Pattern_Syntax # Sm WHITE RIGHT-POINTING TRIANGLE
+25B8..25C0 ; Pattern_Syntax # So [9] BLACK RIGHT-POINTING SMALL TRIANGLE..BLACK LEFT-POINTING TRIANGLE
+25C1 ; Pattern_Syntax # Sm WHITE LEFT-POINTING TRIANGLE
+25C2..25F7 ; Pattern_Syntax # So [54] BLACK LEFT-POINTING SMALL TRIANGLE..WHITE CIRCLE WITH UPPER RIGHT QUADRANT
+25F8..25FF ; Pattern_Syntax # Sm [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE
+2600..266E ; Pattern_Syntax # So [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN
+266F ; Pattern_Syntax # Sm MUSIC SHARP SIGN
+2670..2767 ; Pattern_Syntax # So [248] WEST SYRIAC CROSS..ROTATED FLORAL HEART BULLET
+2768 ; Pattern_Syntax # Ps MEDIUM LEFT PARENTHESIS ORNAMENT
+2769 ; Pattern_Syntax # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT
+276A ; Pattern_Syntax # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT
+276B ; Pattern_Syntax # Pe MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT
+276C ; Pattern_Syntax # Ps MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT
+276D ; Pattern_Syntax # Pe MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT
+276E ; Pattern_Syntax # Ps HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT
+276F ; Pattern_Syntax # Pe HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT
+2770 ; Pattern_Syntax # Ps HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT
+2771 ; Pattern_Syntax # Pe HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT
+2772 ; Pattern_Syntax # Ps LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT
+2773 ; Pattern_Syntax # Pe LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT
+2774 ; Pattern_Syntax # Ps MEDIUM LEFT CURLY BRACKET ORNAMENT
+2775 ; Pattern_Syntax # Pe MEDIUM RIGHT CURLY BRACKET ORNAMENT
+2794..27BF ; Pattern_Syntax # So [44] HEAVY WIDE-HEADED RIGHTWARDS ARROW..DOUBLE CURLY LOOP
+27C0..27C4 ; Pattern_Syntax # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET
+27C5 ; Pattern_Syntax # Ps LEFT S-SHAPED BAG DELIMITER
+27C6 ; Pattern_Syntax # Pe RIGHT S-SHAPED BAG DELIMITER
+27C7..27E5 ; Pattern_Syntax # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK
+27E6 ; Pattern_Syntax # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET
+27E7 ; Pattern_Syntax # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET
+27E8 ; Pattern_Syntax # Ps MATHEMATICAL LEFT ANGLE BRACKET
+27E9 ; Pattern_Syntax # Pe MATHEMATICAL RIGHT ANGLE BRACKET
+27EA ; Pattern_Syntax # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET
+27EB ; Pattern_Syntax # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET
+27EC ; Pattern_Syntax # Ps MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET
+27ED ; Pattern_Syntax # Pe MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET
+27EE ; Pattern_Syntax # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS
+27EF ; Pattern_Syntax # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS
+27F0..27FF ; Pattern_Syntax # Sm [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW
+2800..28FF ; Pattern_Syntax # So [256] BRAILLE PATTERN BLANK..BRAILLE PATTERN DOTS-12345678
+2900..2982 ; Pattern_Syntax # Sm [131] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..Z NOTATION TYPE COLON
+2983 ; Pattern_Syntax # Ps LEFT WHITE CURLY BRACKET
+2984 ; Pattern_Syntax # Pe RIGHT WHITE CURLY BRACKET
+2985 ; Pattern_Syntax # Ps LEFT WHITE PARENTHESIS
+2986 ; Pattern_Syntax # Pe RIGHT WHITE PARENTHESIS
+2987 ; Pattern_Syntax # Ps Z NOTATION LEFT IMAGE BRACKET
+2988 ; Pattern_Syntax # Pe Z NOTATION RIGHT IMAGE BRACKET
+2989 ; Pattern_Syntax # Ps Z NOTATION LEFT BINDING BRACKET
+298A ; Pattern_Syntax # Pe Z NOTATION RIGHT BINDING BRACKET
+298B ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH UNDERBAR
+298C ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH UNDERBAR
+298D ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER
+298E ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER
+298F ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER
+2990 ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER
+2991 ; Pattern_Syntax # Ps LEFT ANGLE BRACKET WITH DOT
+2992 ; Pattern_Syntax # Pe RIGHT ANGLE BRACKET WITH DOT
+2993 ; Pattern_Syntax # Ps LEFT ARC LESS-THAN BRACKET
+2994 ; Pattern_Syntax # Pe RIGHT ARC GREATER-THAN BRACKET
+2995 ; Pattern_Syntax # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET
+2996 ; Pattern_Syntax # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET
+2997 ; Pattern_Syntax # Ps LEFT BLACK TORTOISE SHELL BRACKET
+2998 ; Pattern_Syntax # Pe RIGHT BLACK TORTOISE SHELL BRACKET
+2999..29D7 ; Pattern_Syntax # Sm [63] DOTTED FENCE..BLACK HOURGLASS
+29D8 ; Pattern_Syntax # Ps LEFT WIGGLY FENCE
+29D9 ; Pattern_Syntax # Pe RIGHT WIGGLY FENCE
+29DA ; Pattern_Syntax # Ps LEFT DOUBLE WIGGLY FENCE
+29DB ; Pattern_Syntax # Pe RIGHT DOUBLE WIGGLY FENCE
+29DC..29FB ; Pattern_Syntax # Sm [32] INCOMPLETE INFINITY..TRIPLE PLUS
+29FC ; Pattern_Syntax # Ps LEFT-POINTING CURVED ANGLE BRACKET
+29FD ; Pattern_Syntax # Pe RIGHT-POINTING CURVED ANGLE BRACKET
+29FE..2AFF ; Pattern_Syntax # Sm [258] TINY..N-ARY WHITE VERTICAL BAR
+2B00..2B2F ; Pattern_Syntax # So [48] NORTH EAST WHITE ARROW..WHITE VERTICAL ELLIPSE
+2B30..2B44 ; Pattern_Syntax # Sm [21] LEFT ARROW WITH SMALL CIRCLE..RIGHTWARDS ARROW THROUGH SUPERSET
+2B45..2B46 ; Pattern_Syntax # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW
+2B47..2B4C ; Pattern_Syntax # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR
+2B4D..2B73 ; Pattern_Syntax # So [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR
+2B74..2B75 ; Pattern_Syntax # Cn [2] <reserved-2B74>..<reserved-2B75>
+2B76..2B95 ; Pattern_Syntax # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW
+2B96..2B97 ; Pattern_Syntax # Cn [2] <reserved-2B96>..<reserved-2B97>
+2B98..2BB9 ; Pattern_Syntax # So [34] THREE-D TOP-LIGHTED LEFTWARDS EQUILATERAL ARROWHEAD..UP ARROWHEAD IN A RECTANGLE BOX
+2BBA..2BBC ; Pattern_Syntax # Cn [3] <reserved-2BBA>..<reserved-2BBC>
+2BBD..2BC8 ; Pattern_Syntax # So [12] BALLOT BOX WITH LIGHT X..BLACK MEDIUM RIGHT-POINTING TRIANGLE CENTRED
+2BC9 ; Pattern_Syntax # Cn <reserved-2BC9>
+2BCA..2BD1 ; Pattern_Syntax # So [8] TOP HALF BLACK CIRCLE..UNCERTAINTY SIGN
+2BD2..2BEB ; Pattern_Syntax # Cn [26] <reserved-2BD2>..<reserved-2BEB>
+2BEC..2BEF ; Pattern_Syntax # So [4] LEFTWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS..DOWNWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS
+2BF0..2BFF ; Pattern_Syntax # Cn [16] <reserved-2BF0>..<reserved-2BFF>
+2E00..2E01 ; Pattern_Syntax # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER
+2E02 ; Pattern_Syntax # Pi LEFT SUBSTITUTION BRACKET
+2E03 ; Pattern_Syntax # Pf RIGHT SUBSTITUTION BRACKET
+2E04 ; Pattern_Syntax # Pi LEFT DOTTED SUBSTITUTION BRACKET
+2E05 ; Pattern_Syntax # Pf RIGHT DOTTED SUBSTITUTION BRACKET
+2E06..2E08 ; Pattern_Syntax # Po [3] RAISED INTERPOLATION MARKER..DOTTED TRANSPOSITION MARKER
+2E09 ; Pattern_Syntax # Pi LEFT TRANSPOSITION BRACKET
+2E0A ; Pattern_Syntax # Pf RIGHT TRANSPOSITION BRACKET
+2E0B ; Pattern_Syntax # Po RAISED SQUARE
+2E0C ; Pattern_Syntax # Pi LEFT RAISED OMISSION BRACKET
+2E0D ; Pattern_Syntax # Pf RIGHT RAISED OMISSION BRACKET
+2E0E..2E16 ; Pattern_Syntax # Po [9] EDITORIAL CORONIS..DOTTED RIGHT-POINTING ANGLE
+2E17 ; Pattern_Syntax # Pd DOUBLE OBLIQUE HYPHEN
+2E18..2E19 ; Pattern_Syntax # Po [2] INVERTED INTERROBANG..PALM BRANCH
+2E1A ; Pattern_Syntax # Pd HYPHEN WITH DIAERESIS
+2E1B ; Pattern_Syntax # Po TILDE WITH RING ABOVE
+2E1C ; Pattern_Syntax # Pi LEFT LOW PARAPHRASE BRACKET
+2E1D ; Pattern_Syntax # Pf RIGHT LOW PARAPHRASE BRACKET
+2E1E..2E1F ; Pattern_Syntax # Po [2] TILDE WITH DOT ABOVE..TILDE WITH DOT BELOW
+2E20 ; Pattern_Syntax # Pi LEFT VERTICAL BAR WITH QUILL
+2E21 ; Pattern_Syntax # Pf RIGHT VERTICAL BAR WITH QUILL
+2E22 ; Pattern_Syntax # Ps TOP LEFT HALF BRACKET
+2E23 ; Pattern_Syntax # Pe TOP RIGHT HALF BRACKET
+2E24 ; Pattern_Syntax # Ps BOTTOM LEFT HALF BRACKET
+2E25 ; Pattern_Syntax # Pe BOTTOM RIGHT HALF BRACKET
+2E26 ; Pattern_Syntax # Ps LEFT SIDEWAYS U BRACKET
+2E27 ; Pattern_Syntax # Pe RIGHT SIDEWAYS U BRACKET
+2E28 ; Pattern_Syntax # Ps LEFT DOUBLE PARENTHESIS
+2E29 ; Pattern_Syntax # Pe RIGHT DOUBLE PARENTHESIS
+2E2A..2E2E ; Pattern_Syntax # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK
+2E2F ; Pattern_Syntax # Lm VERTICAL TILDE
+2E30..2E39 ; Pattern_Syntax # Po [10] RING POINT..TOP HALF SECTION SIGN
+2E3A..2E3B ; Pattern_Syntax # Pd [2] TWO-EM DASH..THREE-EM DASH
+2E3C..2E3F ; Pattern_Syntax # Po [4] STENOGRAPHIC FULL STOP..CAPITULUM
+2E40 ; Pattern_Syntax # Pd DOUBLE HYPHEN
+2E41 ; Pattern_Syntax # Po REVERSED COMMA
+2E42 ; Pattern_Syntax # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK
+2E43..2E7F ; Pattern_Syntax # Cn [61] <reserved-2E43>..<reserved-2E7F>
+3001..3003 ; Pattern_Syntax # Po [3] IDEOGRAPHIC COMMA..DITTO MARK
+3008 ; Pattern_Syntax # Ps LEFT ANGLE BRACKET
+3009 ; Pattern_Syntax # Pe RIGHT ANGLE BRACKET
+300A ; Pattern_Syntax # Ps LEFT DOUBLE ANGLE BRACKET
+300B ; Pattern_Syntax # Pe RIGHT DOUBLE ANGLE BRACKET
+300C ; Pattern_Syntax # Ps LEFT CORNER BRACKET
+300D ; Pattern_Syntax # Pe RIGHT CORNER BRACKET
+300E ; Pattern_Syntax # Ps LEFT WHITE CORNER BRACKET
+300F ; Pattern_Syntax # Pe RIGHT WHITE CORNER BRACKET
+3010 ; Pattern_Syntax # Ps LEFT BLACK LENTICULAR BRACKET
+3011 ; Pattern_Syntax # Pe RIGHT BLACK LENTICULAR BRACKET
+3012..3013 ; Pattern_Syntax # So [2] POSTAL MARK..GETA MARK
+3014 ; Pattern_Syntax # Ps LEFT TORTOISE SHELL BRACKET
+3015 ; Pattern_Syntax # Pe RIGHT TORTOISE SHELL BRACKET
+3016 ; Pattern_Syntax # Ps LEFT WHITE LENTICULAR BRACKET
+3017 ; Pattern_Syntax # Pe RIGHT WHITE LENTICULAR BRACKET
+3018 ; Pattern_Syntax # Ps LEFT WHITE TORTOISE SHELL BRACKET
+3019 ; Pattern_Syntax # Pe RIGHT WHITE TORTOISE SHELL BRACKET
+301A ; Pattern_Syntax # Ps LEFT WHITE SQUARE BRACKET
+301B ; Pattern_Syntax # Pe RIGHT WHITE SQUARE BRACKET
+301C ; Pattern_Syntax # Pd WAVE DASH
+301D ; Pattern_Syntax # Ps REVERSED DOUBLE PRIME QUOTATION MARK
+301E..301F ; Pattern_Syntax # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK
+3020 ; Pattern_Syntax # So POSTAL MARK FACE
+3030 ; Pattern_Syntax # Pd WAVY DASH
+FD3E ; Pattern_Syntax # Pe ORNATE LEFT PARENTHESIS
+FD3F ; Pattern_Syntax # Ps ORNATE RIGHT PARENTHESIS
+FE45..FE46 ; Pattern_Syntax # Po [2] SESAME DOT..WHITE SESAME DOT
+
+# Total code points: 2760
+
+# EOF
diff --git a/src/third_party/unicode-8.0.0/ReadMe.txt b/src/third_party/unicode-8.0.0/ReadMe.txt
new file mode 100644
index 00000000000..fc4a9044b7e
--- /dev/null
+++ b/src/third_party/unicode-8.0.0/ReadMe.txt
@@ -0,0 +1,17 @@
+# Date: 2015-06-16, 20:24:00 GMT [KW]
+#
+# Unicode Character Database
+# Copyright (c) 1991-2015 Unicode, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+# For documentation, see the following:
+# NamesList.html
+# UAX #38, "Unicode Han Database (Unihan)"
+# UAX #44, "Unicode Character Database."
+#
+# The UAXes can be accessed at http://www.unicode.org/versions/Unicode8.0.0/
+
+This directory contains the final data files
+for the Unicode Character Database, for Version 8.0.0 of the Unicode
+Standard.
+