diff options
-rw-r--r-- | distsrc/THIRD-PARTY-NOTICES | 44 | ||||
-rw-r--r-- | src/mongo/SConscript | 7 | ||||
-rw-r--r-- | src/mongo/db/fts/SConscript | 7 | ||||
-rw-r--r-- | src/mongo/db/fts/unicode/SConscript | 65 | ||||
-rw-r--r-- | src/mongo/db/fts/unicode/codepoints.h | 86 | ||||
-rw-r--r-- | src/mongo/db/fts/unicode/codepoints_diacritic_map.cpp | 3969 | ||||
-rw-r--r-- | src/mongo/db/fts/unicode/codepoints_test.cpp | 94 | ||||
-rw-r--r-- | src/mongo/db/fts/unicode/gen_casefold_map.py | 76 | ||||
-rw-r--r-- | src/mongo/db/fts/unicode/gen_delimiter_list.py | 80 | ||||
-rw-r--r-- | src/mongo/db/fts/unicode/gen_diacritic_list.py | 63 | ||||
-rw-r--r-- | src/mongo/db/fts/unicode/gen_diacritic_map.py | 105 | ||||
-rw-r--r-- | src/mongo/db/fts/unicode/gen_helper.py | 39 | ||||
-rw-r--r-- | src/mongo/db/fts/unicode/string.cpp | 157 | ||||
-rw-r--r-- | src/mongo/db/fts/unicode/string.h | 139 | ||||
-rw-r--r-- | src/mongo/db/fts/unicode/string_test.cpp | 187 | ||||
-rw-r--r-- | src/mongo/shell/linenoise_utf8.h | 6 | ||||
-rw-r--r-- | src/third_party/unicode-8.0.0/CaseFolding.txt | 1414 | ||||
-rw-r--r-- | src/third_party/unicode-8.0.0/PropList.txt | 1525 | ||||
-rw-r--r-- | src/third_party/unicode-8.0.0/ReadMe.txt | 17 |
19 files changed, 8076 insertions, 4 deletions
diff --git a/distsrc/THIRD-PARTY-NOTICES b/distsrc/THIRD-PARTY-NOTICES index 040be4680fd..37349340d22 100644 --- a/distsrc/THIRD-PARTY-NOTICES +++ b/distsrc/THIRD-PARTY-NOTICES @@ -501,7 +501,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -13) License Notice for SpiderMonkey +14) License Notice for SpiderMonkey ----------------------------------- |------------------------------------------------|------------------|---------------| @@ -698,7 +698,7 @@ You can contact the author at : - LZ4 source repository : http://code.google.com/p/lz4/ - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c -14) License Notice for Intel DFP Math Library +15) License Notice for Intel DFP Math Library --------------------------------------------- Copyright (c) 2011, Intel Corp. @@ -730,4 +730,44 @@ OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +16) License Notice for Unicode Data +----------------------------------- + +Copyright © 1991-2015 Unicode, Inc. All rights reserved. +Distributed under the Terms of Use in +http://www.unicode.org/copyright.html. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of the Unicode data files and any associated documentation +(the "Data Files") or Unicode software and any associated documentation +(the "Software") to deal in the Data Files or Software +without restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, and/or sell copies of +the Data Files or Software, and to permit persons to whom the Data Files +or Software are furnished to do so, provided that +(a) this copyright and permission notice appear with all copies +of the Data Files or Software, +(b) this copyright and permission notice appear in associated +documentation, and +(c) there is clear notice in each modified Data File or in the Software +as well as in the documentation associated with the Data File(s) or +Software that the data or software has been modified. + +THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT OF THIRD PARTY RIGHTS. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS +NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL +DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, +DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THE DATA FILES OR SOFTWARE. + +Except as contained in this notice, the name of a copyright holder +shall not be used in advertising or otherwise to promote the sale, +use or other dealings in these Data Files or Software without prior +written authorization of the copyright holder. + End diff --git a/src/mongo/SConscript b/src/mongo/SConscript index bd6407bbff8..f6e42077bfa 100644 --- a/src/mongo/SConscript +++ b/src/mongo/SConscript @@ -312,6 +312,11 @@ env.Install( 'util/options_parser/options_parser_init', ])) +env.Library("linenoise_utf8", + source=[ + "shell/linenoise_utf8.cpp", + ]) + # --- sniffer --- mongosniff_built = False if env.TargetOSIs('osx') or env["_HAVEPCAP"]: @@ -341,7 +346,6 @@ if not has_option('noshell') and (usev8 or usemozjs): "shell/bench.cpp", "shell/clientAndShell.cpp", "shell/linenoise.cpp", - "shell/linenoise_utf8.cpp", "shell/mk_wcwidth.cpp", "shell/mongo-server.cpp", "shell/shell_utils.cpp", @@ -355,6 +359,7 @@ if not has_option('noshell') and (usev8 or usemozjs): 'scripting/scripting', 'util/processinfo', 'util/signal_handlers', + 'linenoise_utf8', 'shell/mongojs', ]) diff --git a/src/mongo/db/fts/SConscript b/src/mongo/db/fts/SConscript index 25f3b467c57..3a769baca98 100644 --- a/src/mongo/db/fts/SConscript +++ b/src/mongo/db/fts/SConscript @@ -2,6 +2,12 @@ Import("env") +env.SConscript( + dirs=[ + 'unicode', + ], +) + stop_word_languages = [ 'danish', 'dutch', @@ -44,6 +50,7 @@ baseEnv.Library('base', [ 'tokenizer.cpp', ], LIBDEPS=["$BUILD_DIR/mongo/base", "$BUILD_DIR/mongo/db/common", + "$BUILD_DIR/mongo/db/fts/unicode/unicode", "$BUILD_DIR/mongo/platform/platform", "$BUILD_DIR/third_party/shim_stemmer" ]) diff --git a/src/mongo/db/fts/unicode/SConscript b/src/mongo/db/fts/unicode/SConscript new file mode 100644 index 00000000000..dc01c5b618c --- /dev/null +++ b/src/mongo/db/fts/unicode/SConscript @@ -0,0 +1,65 @@ +# -*- mode: python -*- + +Import("env") + +env.Command( + target="codepoints_casefold.cpp", + source=[ + "gen_casefold_map.py", + "#/src/third_party/unicode-8.0.0/CaseFolding.txt", + "gen_helper.py", + ], + action="$PYTHON ${SOURCES[0]} ${SOURCES[1]} $TARGETS") + +env.Command( + target="codepoints_delimiter_list.cpp", + source=[ + "gen_delimiter_list.py", + "#/src/third_party/unicode-8.0.0/PropList.txt", + "gen_helper.py", + ], + action="$PYTHON ${SOURCES[0]} ${SOURCES[1]} $TARGETS") + +env.Command( + target="codepoints_diacritic_list.cpp", + source=[ + "gen_diacritic_list.py", + "#/src/third_party/unicode-8.0.0/PropList.txt", + "gen_helper.py", + ], + action="$PYTHON ${SOURCES[0]} ${SOURCES[1]} $TARGETS") + +env.Library( + target='unicode', + source=[ + 'codepoints_casefold.cpp', + 'codepoints_delimiter_list.cpp', + 'codepoints_diacritic_list.cpp', + 'codepoints_diacritic_map.cpp', + 'string.cpp', + ], + LIBDEPS=[ + '$BUILD_DIR/mongo/base', + '$BUILD_DIR/mongo/linenoise_utf8', + ] +) + +env.CppUnitTest( + target='string_test', + source=[ + 'string_test.cpp' + ], + LIBDEPS=[ + 'unicode', + ] +) + +env.CppUnitTest( + target='codepoints_test', + source=[ + 'codepoints_test.cpp' + ], + LIBDEPS=[ + 'unicode', + ] +) diff --git a/src/mongo/db/fts/unicode/codepoints.h b/src/mongo/db/fts/unicode/codepoints.h new file mode 100644 index 00000000000..5b1e8e2b2b5 --- /dev/null +++ b/src/mongo/db/fts/unicode/codepoints.h @@ -0,0 +1,86 @@ +/** + * Copyright (C) 2015 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include <string> + +namespace mongo { +namespace unicode { + +/** + * There are currently two options supported for the delimiter list. The only difference between the + * English and NotEnglish modes is that in English, the apostrophe is considered a delimiter, while + * in NotEnglish, it is not. + */ +enum class DelimiterListLanguage { + kEnglish, + kNotEnglish, +}; + +/** + * There are currently two Case folding modes supported. The only difference between the Normal mode + * and the Turkish mode is that in the Turkish mode, the letter I is lowercased to ı, and the letter + * İ is lowercased to i. In the normal mode, the letter I is lowercased to i, and there is no + * mapping for İ. + */ +enum class CaseFoldMode { + kNormal, + kTurkish, +}; + +/** + * Returns whether or not the given codepoint is a diacritic. In 'D' normalized Unicode text, + * diacritics are removed by removing characters with these codepoints. + */ +bool codepointIsDiacritic(char32_t codepoint); + +/** + * Returns whether or not the given codepoint is considered a delimiter in the language 'lang'. + * Currently, there is only a difference between English and non-English languages (the apostrophe). + * To see which Unicode character categories were considered delimiters, see gen_delimiter_list.py. + */ +bool codepointIsDelimiter(char32_t codepoint, DelimiterListLanguage lang); + +/** + * Return a version of the given codepoint without any diacritics. These mappings are generated by + * taking all of the characters within a set of Unicode code blocks (see gen_diacritic_map.py to see + * which code blocks are used), decomposing them to the NFD normalization form, removing any + * combining marks, and renormalizing them to the NFC form. The result is a mapping from original + * codepoint to a codepoint with no diacritics. + */ +char32_t codepointRemoveDiacritics(char32_t codepoint); + +/** + * Return the lowercased version of the given codepoint, applying the special Turkish version of + * case folding if specified. + */ +char32_t codepointToLower(char32_t codepoint, CaseFoldMode mode = CaseFoldMode::kNormal); + +} // namespace unicode +} // namespace mongo diff --git a/src/mongo/db/fts/unicode/codepoints_diacritic_map.cpp b/src/mongo/db/fts/unicode/codepoints_diacritic_map.cpp new file mode 100644 index 00000000000..c6b39d328b8 --- /dev/null +++ b/src/mongo/db/fts/unicode/codepoints_diacritic_map.cpp @@ -0,0 +1,3969 @@ +/** + * Copyright (C) 2015 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + * + * THIS IS A GENERATED FILE, DO NOT MODIFY. + */ + +#include "mongo/db/fts/unicode/codepoints.h" + +namespace mongo { +namespace unicode { + +char32_t codepointRemoveDiacritics(char32_t codepoint) { + switch (codepoint) { + case 0xc0: + return 0x41; + case 0xc1: + return 0x41; + case 0xc2: + return 0x41; + case 0xc3: + return 0x41; + case 0xc4: + return 0x41; + case 0xc5: + return 0x41; + case 0xc7: + return 0x43; + case 0xc8: + return 0x45; + case 0xc9: + return 0x45; + case 0xca: + return 0x45; + case 0xcb: + return 0x45; + case 0xcc: + return 0x49; + case 0xcd: + return 0x49; + case 0xce: + return 0x49; + case 0xcf: + return 0x49; + case 0xd1: + return 0x4e; + case 0xd2: + return 0x4f; + case 0xd3: + return 0x4f; + case 0xd4: + return 0x4f; + case 0xd5: + return 0x4f; + case 0xd6: + return 0x4f; + case 0xd9: + return 0x55; + case 0xda: + return 0x55; + case 0xdb: + return 0x55; + case 0xdc: + return 0x55; + case 0xdd: + return 0x59; + case 0xe0: + return 0x61; + case 0xe1: + return 0x61; + case 0xe2: + return 0x61; + case 0xe3: + return 0x61; + case 0xe4: + return 0x61; + case 0xe5: + return 0x61; + case 0xe7: + return 0x63; + case 0xe8: + return 0x65; + case 0xe9: + return 0x65; + case 0xea: + return 0x65; + case 0xeb: + return 0x65; + case 0xec: + return 0x69; + case 0xed: + return 0x69; + case 0xee: + return 0x69; + case 0xef: + return 0x69; + case 0xf1: + return 0x6e; + case 0xf2: + return 0x6f; + case 0xf3: + return 0x6f; + case 0xf4: + return 0x6f; + case 0xf5: + return 0x6f; + case 0xf6: + return 0x6f; + case 0xf9: + return 0x75; + case 0xfa: + return 0x75; + case 0xfb: + return 0x75; + case 0xfc: + return 0x75; + case 0xfd: + return 0x79; + case 0xff: + return 0x79; + case 0x100: + return 0x41; + case 0x101: + return 0x61; + case 0x102: + return 0x41; + case 0x103: + return 0x61; + case 0x104: + return 0x41; + case 0x105: + return 0x61; + case 0x106: + return 0x43; + case 0x107: + return 0x63; + case 0x108: + return 0x43; + case 0x109: + return 0x63; + case 0x10a: + return 0x43; + case 0x10b: + return 0x63; + case 0x10c: + return 0x43; + case 0x10d: + return 0x63; + case 0x10e: + return 0x44; + case 0x10f: + return 0x64; + case 0x112: + return 0x45; + case 0x113: + return 0x65; + case 0x114: + return 0x45; + case 0x115: + return 0x65; + case 0x116: + return 0x45; + case 0x117: + return 0x65; + case 0x118: + return 0x45; + case 0x119: + return 0x65; + case 0x11a: + return 0x45; + case 0x11b: + return 0x65; + case 0x11c: + return 0x47; + case 0x11d: + return 0x67; + case 0x11e: + return 0x47; + case 0x11f: + return 0x67; + case 0x120: + return 0x47; + case 0x121: + return 0x67; + case 0x122: + return 0x47; + case 0x123: + return 0x67; + case 0x124: + return 0x48; + case 0x125: + return 0x68; + case 0x128: + return 0x49; + case 0x129: + return 0x69; + case 0x12a: + return 0x49; + case 0x12b: + return 0x69; + case 0x12c: + return 0x49; + case 0x12d: + return 0x69; + case 0x12e: + return 0x49; + case 0x12f: + return 0x69; + case 0x130: + return 0x49; + case 0x134: + return 0x4a; + case 0x135: + return 0x6a; + case 0x136: + return 0x4b; + case 0x137: + return 0x6b; + case 0x139: + return 0x4c; + case 0x13a: + return 0x6c; + case 0x13b: + return 0x4c; + case 0x13c: + return 0x6c; + case 0x13d: + return 0x4c; + case 0x13e: + return 0x6c; + case 0x143: + return 0x4e; + case 0x144: + return 0x6e; + case 0x145: + return 0x4e; + case 0x146: + return 0x6e; + case 0x147: + return 0x4e; + case 0x148: + return 0x6e; + case 0x14c: + return 0x4f; + case 0x14d: + return 0x6f; + case 0x14e: + return 0x4f; + case 0x14f: + return 0x6f; + case 0x150: + return 0x4f; + case 0x151: + return 0x6f; + case 0x154: + return 0x52; + case 0x155: + return 0x72; + case 0x156: + return 0x52; + case 0x157: + return 0x72; + case 0x158: + return 0x52; + case 0x159: + return 0x72; + case 0x15a: + return 0x53; + case 0x15b: + return 0x73; + case 0x15c: + return 0x53; + case 0x15d: + return 0x73; + case 0x15e: + return 0x53; + case 0x15f: + return 0x73; + case 0x160: + return 0x53; + case 0x161: + return 0x73; + case 0x162: + return 0x54; + case 0x163: + return 0x74; + case 0x164: + return 0x54; + case 0x165: + return 0x74; + case 0x168: + return 0x55; + case 0x169: + return 0x75; + case 0x16a: + return 0x55; + case 0x16b: + return 0x75; + case 0x16c: + return 0x55; + case 0x16d: + return 0x75; + case 0x16e: + return 0x55; + case 0x16f: + return 0x75; + case 0x170: + return 0x55; + case 0x171: + return 0x75; + case 0x172: + return 0x55; + case 0x173: + return 0x75; + case 0x174: + return 0x57; + case 0x175: + return 0x77; + case 0x176: + return 0x59; + case 0x177: + return 0x79; + case 0x178: + return 0x59; + case 0x179: + return 0x5a; + case 0x17a: + return 0x7a; + case 0x17b: + return 0x5a; + case 0x17c: + return 0x7a; + case 0x17d: + return 0x5a; + case 0x17e: + return 0x7a; + case 0x1a0: + return 0x4f; + case 0x1a1: + return 0x6f; + case 0x1af: + return 0x55; + case 0x1b0: + return 0x75; + case 0x1cd: + return 0x41; + case 0x1ce: + return 0x61; + case 0x1cf: + return 0x49; + case 0x1d0: + return 0x69; + case 0x1d1: + return 0x4f; + case 0x1d2: + return 0x6f; + case 0x1d3: + return 0x55; + case 0x1d4: + return 0x75; + case 0x1d5: + return 0x55; + case 0x1d6: + return 0x75; + case 0x1d7: + return 0x55; + case 0x1d8: + return 0x75; + case 0x1d9: + return 0x55; + case 0x1da: + return 0x75; + case 0x1db: + return 0x55; + case 0x1dc: + return 0x75; + case 0x1de: + return 0x41; + case 0x1df: + return 0x61; + case 0x1e0: + return 0x41; + case 0x1e1: + return 0x61; + case 0x1e2: + return 0xc6; + case 0x1e3: + return 0xe6; + case 0x1e6: + return 0x47; + case 0x1e7: + return 0x67; + case 0x1e8: + return 0x4b; + case 0x1e9: + return 0x6b; + case 0x1ea: + return 0x4f; + case 0x1eb: + return 0x6f; + case 0x1ec: + return 0x4f; + case 0x1ed: + return 0x6f; + case 0x1ee: + return 0x1b7; + case 0x1ef: + return 0x292; + case 0x1f0: + return 0x6a; + case 0x1f4: + return 0x47; + case 0x1f5: + return 0x67; + case 0x1f8: + return 0x4e; + case 0x1f9: + return 0x6e; + case 0x1fa: + return 0x41; + case 0x1fb: + return 0x61; + case 0x1fc: + return 0xc6; + case 0x1fd: + return 0xe6; + case 0x1fe: + return 0xd8; + case 0x1ff: + return 0xf8; + case 0x200: + return 0x41; + case 0x201: + return 0x61; + case 0x202: + return 0x41; + case 0x203: + return 0x61; + case 0x204: + return 0x45; + case 0x205: + return 0x65; + case 0x206: + return 0x45; + case 0x207: + return 0x65; + case 0x208: + return 0x49; + case 0x209: + return 0x69; + case 0x20a: + return 0x49; + case 0x20b: + return 0x69; + case 0x20c: + return 0x4f; + case 0x20d: + return 0x6f; + case 0x20e: + return 0x4f; + case 0x20f: + return 0x6f; + case 0x210: + return 0x52; + case 0x211: + return 0x72; + case 0x212: + return 0x52; + case 0x213: + return 0x72; + case 0x214: + return 0x55; + case 0x215: + return 0x75; + case 0x216: + return 0x55; + case 0x217: + return 0x75; + case 0x218: + return 0x53; + case 0x219: + return 0x73; + case 0x21a: + return 0x54; + case 0x21b: + return 0x74; + case 0x21e: + return 0x48; + case 0x21f: + return 0x68; + case 0x226: + return 0x41; + case 0x227: + return 0x61; + case 0x228: + return 0x45; + case 0x229: + return 0x65; + case 0x22a: + return 0x4f; + case 0x22b: + return 0x6f; + case 0x22c: + return 0x4f; + case 0x22d: + return 0x6f; + case 0x22e: + return 0x4f; + case 0x22f: + return 0x6f; + case 0x230: + return 0x4f; + case 0x231: + return 0x6f; + case 0x232: + return 0x59; + case 0x233: + return 0x79; + case 0x37e: + return 0x3b; + case 0x386: + return 0x391; + case 0x388: + return 0x395; + case 0x389: + return 0x397; + case 0x38a: + return 0x399; + case 0x38c: + return 0x39f; + case 0x38e: + return 0x3a5; + case 0x38f: + return 0x3a9; + case 0x390: + return 0x3b9; + case 0x3aa: + return 0x399; + case 0x3ab: + return 0x3a5; + case 0x3ac: + return 0x3b1; + case 0x3ad: + return 0x3b5; + case 0x3ae: + return 0x3b7; + case 0x3af: + return 0x3b9; + case 0x3b0: + return 0x3c5; + case 0x3ca: + return 0x3b9; + case 0x3cb: + return 0x3c5; + case 0x3cc: + return 0x3bf; + case 0x3cd: + return 0x3c5; + case 0x3ce: + return 0x3c9; + case 0x3d3: + return 0x3d2; + case 0x3d4: + return 0x3d2; + case 0x400: + return 0x415; + case 0x401: + return 0x415; + case 0x403: + return 0x413; + case 0x407: + return 0x406; + case 0x40c: + return 0x41a; + case 0x40d: + return 0x418; + case 0x40e: + return 0x423; + case 0x419: + return 0x418; + case 0x439: + return 0x438; + case 0x450: + return 0x435; + case 0x451: + return 0x435; + case 0x453: + return 0x433; + case 0x457: + return 0x456; + case 0x45c: + return 0x43a; + case 0x45d: + return 0x438; + case 0x45e: + return 0x443; + case 0x476: + return 0x474; + case 0x477: + return 0x475; + case 0x4c1: + return 0x416; + case 0x4c2: + return 0x436; + case 0x4d0: + return 0x410; + case 0x4d1: + return 0x430; + case 0x4d2: + return 0x410; + case 0x4d3: + return 0x430; + case 0x4d6: + return 0x415; + case 0x4d7: + return 0x435; + case 0x4da: + return 0x4d8; + case 0x4db: + return 0x4d9; + case 0x4dc: + return 0x416; + case 0x4dd: + return 0x436; + case 0x4de: + return 0x417; + case 0x4df: + return 0x437; + case 0x4e2: + return 0x418; + case 0x4e3: + return 0x438; + case 0x4e4: + return 0x418; + case 0x4e5: + return 0x438; + case 0x4e6: + return 0x41e; + case 0x4e7: + return 0x43e; + case 0x4ea: + return 0x4e8; + case 0x4eb: + return 0x4e9; + case 0x4ec: + return 0x42d; + case 0x4ed: + return 0x44d; + case 0x4ee: + return 0x423; + case 0x4ef: + return 0x443; + case 0x4f0: + return 0x423; + case 0x4f1: + return 0x443; + case 0x4f2: + return 0x423; + case 0x4f3: + return 0x443; + case 0x4f4: + return 0x427; + case 0x4f5: + return 0x447; + case 0x4f8: + return 0x42b; + case 0x4f9: + return 0x44b; + case 0x929: + return 0x928; + case 0x931: + return 0x930; + case 0x934: + return 0x933; + case 0x958: + return 0x915; + case 0x959: + return 0x916; + case 0x95a: + return 0x917; + case 0x95b: + return 0x91c; + case 0x95c: + return 0x921; + case 0x95d: + return 0x922; + case 0x95e: + return 0x92b; + case 0x95f: + return 0x92f; + case 0x9dc: + return 0x9a1; + case 0x9dd: + return 0x9a2; + case 0x9df: + return 0x9af; + case 0xa33: + return 0xa32; + case 0xa36: + return 0xa38; + case 0xa59: + return 0xa16; + case 0xa5a: + return 0xa17; + case 0xa5b: + return 0xa1c; + case 0xa5e: + return 0xa2b; + case 0xb5c: + return 0xb21; + case 0xb5d: + return 0xb22; + case 0xdda: + return 0xdd9; + case 0xddd: + return 0xddc; + case 0x1e00: + return 0x41; + case 0x1e01: + return 0x61; + case 0x1e02: + return 0x42; + case 0x1e03: + return 0x62; + case 0x1e04: + return 0x42; + case 0x1e05: + return 0x62; + case 0x1e06: + return 0x42; + case 0x1e07: + return 0x62; + case 0x1e08: + return 0x43; + case 0x1e09: + return 0x63; + case 0x1e0a: + return 0x44; + case 0x1e0b: + return 0x64; + case 0x1e0c: + return 0x44; + case 0x1e0d: + return 0x64; + case 0x1e0e: + return 0x44; + case 0x1e0f: + return 0x64; + case 0x1e10: + return 0x44; + case 0x1e11: + return 0x64; + case 0x1e12: + return 0x44; + case 0x1e13: + return 0x64; + case 0x1e14: + return 0x45; + case 0x1e15: + return 0x65; + case 0x1e16: + return 0x45; + case 0x1e17: + return 0x65; + case 0x1e18: + return 0x45; + case 0x1e19: + return 0x65; + case 0x1e1a: + return 0x45; + case 0x1e1b: + return 0x65; + case 0x1e1c: + return 0x45; + case 0x1e1d: + return 0x65; + case 0x1e1e: + return 0x46; + case 0x1e1f: + return 0x66; + case 0x1e20: + return 0x47; + case 0x1e21: + return 0x67; + case 0x1e22: + return 0x48; + case 0x1e23: + return 0x68; + case 0x1e24: + return 0x48; + case 0x1e25: + return 0x68; + case 0x1e26: + return 0x48; + case 0x1e27: + return 0x68; + case 0x1e28: + return 0x48; + case 0x1e29: + return 0x68; + case 0x1e2a: + return 0x48; + case 0x1e2b: + return 0x68; + case 0x1e2c: + return 0x49; + case 0x1e2d: + return 0x69; + case 0x1e2e: + return 0x49; + case 0x1e2f: + return 0x69; + case 0x1e30: + return 0x4b; + case 0x1e31: + return 0x6b; + case 0x1e32: + return 0x4b; + case 0x1e33: + return 0x6b; + case 0x1e34: + return 0x4b; + case 0x1e35: + return 0x6b; + case 0x1e36: + return 0x4c; + case 0x1e37: + return 0x6c; + case 0x1e38: + return 0x4c; + case 0x1e39: + return 0x6c; + case 0x1e3a: + return 0x4c; + case 0x1e3b: + return 0x6c; + case 0x1e3c: + return 0x4c; + case 0x1e3d: + return 0x6c; + case 0x1e3e: + return 0x4d; + case 0x1e3f: + return 0x6d; + case 0x1e40: + return 0x4d; + case 0x1e41: + return 0x6d; + case 0x1e42: + return 0x4d; + case 0x1e43: + return 0x6d; + case 0x1e44: + return 0x4e; + case 0x1e45: + return 0x6e; + case 0x1e46: + return 0x4e; + case 0x1e47: + return 0x6e; + case 0x1e48: + return 0x4e; + case 0x1e49: + return 0x6e; + case 0x1e4a: + return 0x4e; + case 0x1e4b: + return 0x6e; + case 0x1e4c: + return 0x4f; + case 0x1e4d: + return 0x6f; + case 0x1e4e: + return 0x4f; + case 0x1e4f: + return 0x6f; + case 0x1e50: + return 0x4f; + case 0x1e51: + return 0x6f; + case 0x1e52: + return 0x4f; + case 0x1e53: + return 0x6f; + case 0x1e54: + return 0x50; + case 0x1e55: + return 0x70; + case 0x1e56: + return 0x50; + case 0x1e57: + return 0x70; + case 0x1e58: + return 0x52; + case 0x1e59: + return 0x72; + case 0x1e5a: + return 0x52; + case 0x1e5b: + return 0x72; + case 0x1e5c: + return 0x52; + case 0x1e5d: + return 0x72; + case 0x1e5e: + return 0x52; + case 0x1e5f: + return 0x72; + case 0x1e60: + return 0x53; + case 0x1e61: + return 0x73; + case 0x1e62: + return 0x53; + case 0x1e63: + return 0x73; + case 0x1e64: + return 0x53; + case 0x1e65: + return 0x73; + case 0x1e66: + return 0x53; + case 0x1e67: + return 0x73; + case 0x1e68: + return 0x53; + case 0x1e69: + return 0x73; + case 0x1e6a: + return 0x54; + case 0x1e6b: + return 0x74; + case 0x1e6c: + return 0x54; + case 0x1e6d: + return 0x74; + case 0x1e6e: + return 0x54; + case 0x1e6f: + return 0x74; + case 0x1e70: + return 0x54; + case 0x1e71: + return 0x74; + case 0x1e72: + return 0x55; + case 0x1e73: + return 0x75; + case 0x1e74: + return 0x55; + case 0x1e75: + return 0x75; + case 0x1e76: + return 0x55; + case 0x1e77: + return 0x75; + case 0x1e78: + return 0x55; + case 0x1e79: + return 0x75; + case 0x1e7a: + return 0x55; + case 0x1e7b: + return 0x75; + case 0x1e7c: + return 0x56; + case 0x1e7d: + return 0x76; + case 0x1e7e: + return 0x56; + case 0x1e7f: + return 0x76; + case 0x1e80: + return 0x57; + case 0x1e81: + return 0x77; + case 0x1e82: + return 0x57; + case 0x1e83: + return 0x77; + case 0x1e84: + return 0x57; + case 0x1e85: + return 0x77; + case 0x1e86: + return 0x57; + case 0x1e87: + return 0x77; + case 0x1e88: + return 0x57; + case 0x1e89: + return 0x77; + case 0x1e8a: + return 0x58; + case 0x1e8b: + return 0x78; + case 0x1e8c: + return 0x58; + case 0x1e8d: + return 0x78; + case 0x1e8e: + return 0x59; + case 0x1e8f: + return 0x79; + case 0x1e90: + return 0x5a; + case 0x1e91: + return 0x7a; + case 0x1e92: + return 0x5a; + case 0x1e93: + return 0x7a; + case 0x1e94: + return 0x5a; + case 0x1e95: + return 0x7a; + case 0x1e96: + return 0x68; + case 0x1e97: + return 0x74; + case 0x1e98: + return 0x77; + case 0x1e99: + return 0x79; + case 0x1e9b: + return 0x17f; + case 0x1ea0: + return 0x41; + case 0x1ea1: + return 0x61; + case 0x1ea2: + return 0x41; + case 0x1ea3: + return 0x61; + case 0x1ea4: + return 0x41; + case 0x1ea5: + return 0x61; + case 0x1ea6: + return 0x41; + case 0x1ea7: + return 0x61; + case 0x1ea8: + return 0x41; + case 0x1ea9: + return 0x61; + case 0x1eaa: + return 0x41; + case 0x1eab: + return 0x61; + case 0x1eac: + return 0x41; + case 0x1ead: + return 0x61; + case 0x1eae: + return 0x41; + case 0x1eaf: + return 0x61; + case 0x1eb0: + return 0x41; + case 0x1eb1: + return 0x61; + case 0x1eb2: + return 0x41; + case 0x1eb3: + return 0x61; + case 0x1eb4: + return 0x41; + case 0x1eb5: + return 0x61; + case 0x1eb6: + return 0x41; + case 0x1eb7: + return 0x61; + case 0x1eb8: + return 0x45; + case 0x1eb9: + return 0x65; + case 0x1eba: + return 0x45; + case 0x1ebb: + return 0x65; + case 0x1ebc: + return 0x45; + case 0x1ebd: + return 0x65; + case 0x1ebe: + return 0x45; + case 0x1ebf: + return 0x65; + case 0x1ec0: + return 0x45; + case 0x1ec1: + return 0x65; + case 0x1ec2: + return 0x45; + case 0x1ec3: + return 0x65; + case 0x1ec4: + return 0x45; + case 0x1ec5: + return 0x65; + case 0x1ec6: + return 0x45; + case 0x1ec7: + return 0x65; + case 0x1ec8: + return 0x49; + case 0x1ec9: + return 0x69; + case 0x1eca: + return 0x49; + case 0x1ecb: + return 0x69; + case 0x1ecc: + return 0x4f; + case 0x1ecd: + return 0x6f; + case 0x1ece: + return 0x4f; + case 0x1ecf: + return 0x6f; + case 0x1ed0: + return 0x4f; + case 0x1ed1: + return 0x6f; + case 0x1ed2: + return 0x4f; + case 0x1ed3: + return 0x6f; + case 0x1ed4: + return 0x4f; + case 0x1ed5: + return 0x6f; + case 0x1ed6: + return 0x4f; + case 0x1ed7: + return 0x6f; + case 0x1ed8: + return 0x4f; + case 0x1ed9: + return 0x6f; + case 0x1eda: + return 0x4f; + case 0x1edb: + return 0x6f; + case 0x1edc: + return 0x4f; + case 0x1edd: + return 0x6f; + case 0x1ede: + return 0x4f; + case 0x1edf: + return 0x6f; + case 0x1ee0: + return 0x4f; + case 0x1ee1: + return 0x6f; + case 0x1ee2: + return 0x4f; + case 0x1ee3: + return 0x6f; + case 0x1ee4: + return 0x55; + case 0x1ee5: + return 0x75; + case 0x1ee6: + return 0x55; + case 0x1ee7: + return 0x75; + case 0x1ee8: + return 0x55; + case 0x1ee9: + return 0x75; + case 0x1eea: + return 0x55; + case 0x1eeb: + return 0x75; + case 0x1eec: + return 0x55; + case 0x1eed: + return 0x75; + case 0x1eee: + return 0x55; + case 0x1eef: + return 0x75; + case 0x1ef0: + return 0x55; + case 0x1ef1: + return 0x75; + case 0x1ef2: + return 0x59; + case 0x1ef3: + return 0x79; + case 0x1ef4: + return 0x59; + case 0x1ef5: + return 0x79; + case 0x1ef6: + return 0x59; + case 0x1ef7: + return 0x79; + case 0x1ef8: + return 0x59; + case 0x1ef9: + return 0x79; + case 0x1f00: + return 0x3b1; + case 0x1f01: + return 0x3b1; + case 0x1f02: + return 0x3b1; + case 0x1f03: + return 0x3b1; + case 0x1f04: + return 0x3b1; + case 0x1f05: + return 0x3b1; + case 0x1f06: + return 0x3b1; + case 0x1f07: + return 0x3b1; + case 0x1f08: + return 0x391; + case 0x1f09: + return 0x391; + case 0x1f0a: + return 0x391; + case 0x1f0b: + return 0x391; + case 0x1f0c: + return 0x391; + case 0x1f0d: + return 0x391; + case 0x1f0e: + return 0x391; + case 0x1f0f: + return 0x391; + case 0x1f10: + return 0x3b5; + case 0x1f11: + return 0x3b5; + case 0x1f12: + return 0x3b5; + case 0x1f13: + return 0x3b5; + case 0x1f14: + return 0x3b5; + case 0x1f15: + return 0x3b5; + case 0x1f18: + return 0x395; + case 0x1f19: + return 0x395; + case 0x1f1a: + return 0x395; + case 0x1f1b: + return 0x395; + case 0x1f1c: + return 0x395; + case 0x1f1d: + return 0x395; + case 0x1f20: + return 0x3b7; + case 0x1f21: + return 0x3b7; + case 0x1f22: + return 0x3b7; + case 0x1f23: + return 0x3b7; + case 0x1f24: + return 0x3b7; + case 0x1f25: + return 0x3b7; + case 0x1f26: + return 0x3b7; + case 0x1f27: + return 0x3b7; + case 0x1f28: + return 0x397; + case 0x1f29: + return 0x397; + case 0x1f2a: + return 0x397; + case 0x1f2b: + return 0x397; + case 0x1f2c: + return 0x397; + case 0x1f2d: + return 0x397; + case 0x1f2e: + return 0x397; + case 0x1f2f: + return 0x397; + case 0x1f30: + return 0x3b9; + case 0x1f31: + return 0x3b9; + case 0x1f32: + return 0x3b9; + case 0x1f33: + return 0x3b9; + case 0x1f34: + return 0x3b9; + case 0x1f35: + return 0x3b9; + case 0x1f36: + return 0x3b9; + case 0x1f37: + return 0x3b9; + case 0x1f38: + return 0x399; + case 0x1f39: + return 0x399; + case 0x1f3a: + return 0x399; + case 0x1f3b: + return 0x399; + case 0x1f3c: + return 0x399; + case 0x1f3d: + return 0x399; + case 0x1f3e: + return 0x399; + case 0x1f3f: + return 0x399; + case 0x1f40: + return 0x3bf; + case 0x1f41: + return 0x3bf; + case 0x1f42: + return 0x3bf; + case 0x1f43: + return 0x3bf; + case 0x1f44: + return 0x3bf; + case 0x1f45: + return 0x3bf; + case 0x1f48: + return 0x39f; + case 0x1f49: + return 0x39f; + case 0x1f4a: + return 0x39f; + case 0x1f4b: + return 0x39f; + case 0x1f4c: + return 0x39f; + case 0x1f4d: + return 0x39f; + case 0x1f50: + return 0x3c5; + case 0x1f51: + return 0x3c5; + case 0x1f52: + return 0x3c5; + case 0x1f53: + return 0x3c5; + case 0x1f54: + return 0x3c5; + case 0x1f55: + return 0x3c5; + case 0x1f56: + return 0x3c5; + case 0x1f57: + return 0x3c5; + case 0x1f59: + return 0x3a5; + case 0x1f5b: + return 0x3a5; + case 0x1f5d: + return 0x3a5; + case 0x1f5f: + return 0x3a5; + case 0x1f60: + return 0x3c9; + case 0x1f61: + return 0x3c9; + case 0x1f62: + return 0x3c9; + case 0x1f63: + return 0x3c9; + case 0x1f64: + return 0x3c9; + case 0x1f65: + return 0x3c9; + case 0x1f66: + return 0x3c9; + case 0x1f67: + return 0x3c9; + case 0x1f68: + return 0x3a9; + case 0x1f69: + return 0x3a9; + case 0x1f6a: + return 0x3a9; + case 0x1f6b: + return 0x3a9; + case 0x1f6c: + return 0x3a9; + case 0x1f6d: + return 0x3a9; + case 0x1f6e: + return 0x3a9; + case 0x1f6f: + return 0x3a9; + case 0x1f70: + return 0x3b1; + case 0x1f71: + return 0x3b1; + case 0x1f72: + return 0x3b5; + case 0x1f73: + return 0x3b5; + case 0x1f74: + return 0x3b7; + case 0x1f75: + return 0x3b7; + case 0x1f76: + return 0x3b9; + case 0x1f77: + return 0x3b9; + case 0x1f78: + return 0x3bf; + case 0x1f79: + return 0x3bf; + case 0x1f7a: + return 0x3c5; + case 0x1f7b: + return 0x3c5; + case 0x1f7c: + return 0x3c9; + case 0x1f7d: + return 0x3c9; + case 0x1f80: + return 0x3b1; + case 0x1f81: + return 0x3b1; + case 0x1f82: + return 0x3b1; + case 0x1f83: + return 0x3b1; + case 0x1f84: + return 0x3b1; + case 0x1f85: + return 0x3b1; + case 0x1f86: + return 0x3b1; + case 0x1f87: + return 0x3b1; + case 0x1f88: + return 0x391; + case 0x1f89: + return 0x391; + case 0x1f8a: + return 0x391; + case 0x1f8b: + return 0x391; + case 0x1f8c: + return 0x391; + case 0x1f8d: + return 0x391; + case 0x1f8e: + return 0x391; + case 0x1f8f: + return 0x391; + case 0x1f90: + return 0x3b7; + case 0x1f91: + return 0x3b7; + case 0x1f92: + return 0x3b7; + case 0x1f93: + return 0x3b7; + case 0x1f94: + return 0x3b7; + case 0x1f95: + return 0x3b7; + case 0x1f96: + return 0x3b7; + case 0x1f97: + return 0x3b7; + case 0x1f98: + return 0x397; + case 0x1f99: + return 0x397; + case 0x1f9a: + return 0x397; + case 0x1f9b: + return 0x397; + case 0x1f9c: + return 0x397; + case 0x1f9d: + return 0x397; + case 0x1f9e: + return 0x397; + case 0x1f9f: + return 0x397; + case 0x1fa0: + return 0x3c9; + case 0x1fa1: + return 0x3c9; + case 0x1fa2: + return 0x3c9; + case 0x1fa3: + return 0x3c9; + case 0x1fa4: + return 0x3c9; + case 0x1fa5: + return 0x3c9; + case 0x1fa6: + return 0x3c9; + case 0x1fa7: + return 0x3c9; + case 0x1fa8: + return 0x3a9; + case 0x1fa9: + return 0x3a9; + case 0x1faa: + return 0x3a9; + case 0x1fab: + return 0x3a9; + case 0x1fac: + return 0x3a9; + case 0x1fad: + return 0x3a9; + case 0x1fae: + return 0x3a9; + case 0x1faf: + return 0x3a9; + case 0x1fb0: + return 0x3b1; + case 0x1fb1: + return 0x3b1; + case 0x1fb2: + return 0x3b1; + case 0x1fb3: + return 0x3b1; + case 0x1fb4: + return 0x3b1; + case 0x1fb6: + return 0x3b1; + case 0x1fb7: + return 0x3b1; + case 0x1fb8: + return 0x391; + case 0x1fb9: + return 0x391; + case 0x1fba: + return 0x391; + case 0x1fbb: + return 0x391; + case 0x1fbc: + return 0x391; + case 0x1fbe: + return 0x3b9; + case 0x1fc2: + return 0x3b7; + case 0x1fc3: + return 0x3b7; + case 0x1fc4: + return 0x3b7; + case 0x1fc6: + return 0x3b7; + case 0x1fc7: + return 0x3b7; + case 0x1fc8: + return 0x395; + case 0x1fc9: + return 0x395; + case 0x1fca: + return 0x397; + case 0x1fcb: + return 0x397; + case 0x1fcc: + return 0x397; + case 0x1fd0: + return 0x3b9; + case 0x1fd1: + return 0x3b9; + case 0x1fd2: + return 0x3b9; + case 0x1fd3: + return 0x3b9; + case 0x1fd6: + return 0x3b9; + case 0x1fd7: + return 0x3b9; + case 0x1fd8: + return 0x399; + case 0x1fd9: + return 0x399; + case 0x1fda: + return 0x399; + case 0x1fdb: + return 0x399; + case 0x1fe0: + return 0x3c5; + case 0x1fe1: + return 0x3c5; + case 0x1fe2: + return 0x3c5; + case 0x1fe3: + return 0x3c5; + case 0x1fe4: + return 0x3c1; + case 0x1fe5: + return 0x3c1; + case 0x1fe6: + return 0x3c5; + case 0x1fe7: + return 0x3c5; + case 0x1fe8: + return 0x3a5; + case 0x1fe9: + return 0x3a5; + case 0x1fea: + return 0x3a5; + case 0x1feb: + return 0x3a5; + case 0x1fec: + return 0x3a1; + case 0x1ff2: + return 0x3c9; + case 0x1ff3: + return 0x3c9; + case 0x1ff4: + return 0x3c9; + case 0x1ff6: + return 0x3c9; + case 0x1ff7: + return 0x3c9; + case 0x1ff8: + return 0x39f; + case 0x1ff9: + return 0x39f; + case 0x1ffa: + return 0x3a9; + case 0x1ffb: + return 0x3a9; + case 0x1ffc: + return 0x3a9; + case 0x2000: + return 0x2002; + case 0x2001: + return 0x2003; + case 0x2126: + return 0x3a9; + case 0x212a: + return 0x4b; + case 0x212b: + return 0x41; + case 0x219a: + return 0x2190; + case 0x219b: + return 0x2192; + case 0x21ae: + return 0x2194; + case 0x21cd: + return 0x21d0; + case 0x21ce: + return 0x21d4; + case 0x21cf: + return 0x21d2; + case 0x2204: + return 0x2203; + case 0x2209: + return 0x2208; + case 0x220c: + return 0x220b; + case 0x2224: + return 0x2223; + case 0x2226: + return 0x2225; + case 0x2241: + return 0x223c; + case 0x2244: + return 0x2243; + case 0x2247: + return 0x2245; + case 0x2249: + return 0x2248; + case 0x2260: + return 0x3d; + case 0x2262: + return 0x2261; + case 0x226d: + return 0x224d; + case 0x226e: + return 0x3c; + case 0x226f: + return 0x3e; + case 0x2270: + return 0x2264; + case 0x2271: + return 0x2265; + case 0x2274: + return 0x2272; + case 0x2275: + return 0x2273; + case 0x2278: + return 0x2276; + case 0x2279: + return 0x2277; + case 0x2280: + return 0x227a; + case 0x2281: + return 0x227b; + case 0x2284: + return 0x2282; + case 0x2285: + return 0x2283; + case 0x2288: + return 0x2286; + case 0x2289: + return 0x2287; + case 0x22ac: + return 0x22a2; + case 0x22ad: + return 0x22a8; + case 0x22ae: + return 0x22a9; + case 0x22af: + return 0x22ab; + case 0x22e0: + return 0x227c; + case 0x22e1: + return 0x227d; + case 0x22e2: + return 0x2291; + case 0x22e3: + return 0x2292; + case 0x22ea: + return 0x22b2; + case 0x22eb: + return 0x22b3; + case 0x22ec: + return 0x22b4; + case 0x22ed: + return 0x22b5; + case 0x2329: + return 0x3008; + case 0x232a: + return 0x3009; + case 0x2adc: + return 0x2add; + case 0x304c: + return 0x304b; + case 0x304e: + return 0x304d; + case 0x3050: + return 0x304f; + case 0x3052: + return 0x3051; + case 0x3054: + return 0x3053; + case 0x3056: + return 0x3055; + case 0x3058: + return 0x3057; + case 0x305a: + return 0x3059; + case 0x305c: + return 0x305b; + case 0x305e: + return 0x305d; + case 0x3060: + return 0x305f; + case 0x3062: + return 0x3061; + case 0x3065: + return 0x3064; + case 0x3067: + return 0x3066; + case 0x3069: + return 0x3068; + case 0x3070: + return 0x306f; + case 0x3071: + return 0x306f; + case 0x3073: + return 0x3072; + case 0x3074: + return 0x3072; + case 0x3076: + return 0x3075; + case 0x3077: + return 0x3075; + case 0x3079: + return 0x3078; + case 0x307a: + return 0x3078; + case 0x307c: + return 0x307b; + case 0x307d: + return 0x307b; + case 0x3094: + return 0x3046; + case 0x309e: + return 0x309d; + case 0x30ac: + return 0x30ab; + case 0x30ae: + return 0x30ad; + case 0x30b0: + return 0x30af; + case 0x30b2: + return 0x30b1; + case 0x30b4: + return 0x30b3; + case 0x30b6: + return 0x30b5; + case 0x30b8: + return 0x30b7; + case 0x30ba: + return 0x30b9; + case 0x30bc: + return 0x30bb; + case 0x30be: + return 0x30bd; + case 0x30c0: + return 0x30bf; + case 0x30c2: + return 0x30c1; + case 0x30c5: + return 0x30c4; + case 0x30c7: + return 0x30c6; + case 0x30c9: + return 0x30c8; + case 0x30d0: + return 0x30cf; + case 0x30d1: + return 0x30cf; + case 0x30d3: + return 0x30d2; + case 0x30d4: + return 0x30d2; + case 0x30d6: + return 0x30d5; + case 0x30d7: + return 0x30d5; + case 0x30d9: + return 0x30d8; + case 0x30da: + return 0x30d8; + case 0x30dc: + return 0x30db; + case 0x30dd: + return 0x30db; + case 0x30f4: + return 0x30a6; + case 0x30f7: + return 0x30ef; + case 0x30f8: + return 0x30f0; + case 0x30f9: + return 0x30f1; + case 0x30fa: + return 0x30f2; + case 0x30fe: + return 0x30fd; + case 0xf900: + return 0x8c48; + case 0xf901: + return 0x66f4; + case 0xf902: + return 0x8eca; + case 0xf903: + return 0x8cc8; + case 0xf904: + return 0x6ed1; + case 0xf905: + return 0x4e32; + case 0xf906: + return 0x53e5; + case 0xf907: + return 0x9f9c; + case 0xf908: + return 0x9f9c; + case 0xf909: + return 0x5951; + case 0xf90a: + return 0x91d1; + case 0xf90b: + return 0x5587; + case 0xf90c: + return 0x5948; + case 0xf90d: + return 0x61f6; + case 0xf90e: + return 0x7669; + case 0xf90f: + return 0x7f85; + case 0xf910: + return 0x863f; + case 0xf911: + return 0x87ba; + case 0xf912: + return 0x88f8; + case 0xf913: + return 0x908f; + case 0xf914: + return 0x6a02; + case 0xf915: + return 0x6d1b; + case 0xf916: + return 0x70d9; + case 0xf917: + return 0x73de; + case 0xf918: + return 0x843d; + case 0xf919: + return 0x916a; + case 0xf91a: + return 0x99f1; + case 0xf91b: + return 0x4e82; + case 0xf91c: + return 0x5375; + case 0xf91d: + return 0x6b04; + case 0xf91e: + return 0x721b; + case 0xf91f: + return 0x862d; + case 0xf920: + return 0x9e1e; + case 0xf921: + return 0x5d50; + case 0xf922: + return 0x6feb; + case 0xf923: + return 0x85cd; + case 0xf924: + return 0x8964; + case 0xf925: + return 0x62c9; + case 0xf926: + return 0x81d8; + case 0xf927: + return 0x881f; + case 0xf928: + return 0x5eca; + case 0xf929: + return 0x6717; + case 0xf92a: + return 0x6d6a; + case 0xf92b: + return 0x72fc; + case 0xf92c: + return 0x90ce; + case 0xf92d: + return 0x4f86; + case 0xf92e: + return 0x51b7; + case 0xf92f: + return 0x52de; + case 0xf930: + return 0x64c4; + case 0xf931: + return 0x6ad3; + case 0xf932: + return 0x7210; + case 0xf933: + return 0x76e7; + case 0xf934: + return 0x8001; + case 0xf935: + return 0x8606; + case 0xf936: + return 0x865c; + case 0xf937: + return 0x8def; + case 0xf938: + return 0x9732; + case 0xf939: + return 0x9b6f; + case 0xf93a: + return 0x9dfa; + case 0xf93b: + return 0x788c; + case 0xf93c: + return 0x797f; + case 0xf93d: + return 0x7da0; + case 0xf93e: + return 0x83c9; + case 0xf93f: + return 0x9304; + case 0xf940: + return 0x9e7f; + case 0xf941: + return 0x8ad6; + case 0xf942: + return 0x58df; + case 0xf943: + return 0x5f04; + case 0xf944: + return 0x7c60; + case 0xf945: + return 0x807e; + case 0xf946: + return 0x7262; + case 0xf947: + return 0x78ca; + case 0xf948: + return 0x8cc2; + case 0xf949: + return 0x96f7; + case 0xf94a: + return 0x58d8; + case 0xf94b: + return 0x5c62; + case 0xf94c: + return 0x6a13; + case 0xf94d: + return 0x6dda; + case 0xf94e: + return 0x6f0f; + case 0xf94f: + return 0x7d2f; + case 0xf950: + return 0x7e37; + case 0xf951: + return 0x964b; + case 0xf952: + return 0x52d2; + case 0xf953: + return 0x808b; + case 0xf954: + return 0x51dc; + case 0xf955: + return 0x51cc; + case 0xf956: + return 0x7a1c; + case 0xf957: + return 0x7dbe; + case 0xf958: + return 0x83f1; + case 0xf959: + return 0x9675; + case 0xf95a: + return 0x8b80; + case 0xf95b: + return 0x62cf; + case 0xf95c: + return 0x6a02; + case 0xf95d: + return 0x8afe; + case 0xf95e: + return 0x4e39; + case 0xf95f: + return 0x5be7; + case 0xf960: + return 0x6012; + case 0xf961: + return 0x7387; + case 0xf962: + return 0x7570; + case 0xf963: + return 0x5317; + case 0xf964: + return 0x78fb; + case 0xf965: + return 0x4fbf; + case 0xf966: + return 0x5fa9; + case 0xf967: + return 0x4e0d; + case 0xf968: + return 0x6ccc; + case 0xf969: + return 0x6578; + case 0xf96a: + return 0x7d22; + case 0xf96b: + return 0x53c3; + case 0xf96c: + return 0x585e; + case 0xf96d: + return 0x7701; + case 0xf96e: + return 0x8449; + case 0xf96f: + return 0x8aaa; + case 0xf970: + return 0x6bba; + case 0xf971: + return 0x8fb0; + case 0xf972: + return 0x6c88; + case 0xf973: + return 0x62fe; + case 0xf974: + return 0x82e5; + case 0xf975: + return 0x63a0; + case 0xf976: + return 0x7565; + case 0xf977: + return 0x4eae; + case 0xf978: + return 0x5169; + case 0xf979: + return 0x51c9; + case 0xf97a: + return 0x6881; + case 0xf97b: + return 0x7ce7; + case 0xf97c: + return 0x826f; + case 0xf97d: + return 0x8ad2; + case 0xf97e: + return 0x91cf; + case 0xf97f: + return 0x52f5; + case 0xf980: + return 0x5442; + case 0xf981: + return 0x5973; + case 0xf982: + return 0x5eec; + case 0xf983: + return 0x65c5; + case 0xf984: + return 0x6ffe; + case 0xf985: + return 0x792a; + case 0xf986: + return 0x95ad; + case 0xf987: + return 0x9a6a; + case 0xf988: + return 0x9e97; + case 0xf989: + return 0x9ece; + case 0xf98a: + return 0x529b; + case 0xf98b: + return 0x66c6; + case 0xf98c: + return 0x6b77; + case 0xf98d: + return 0x8f62; + case 0xf98e: + return 0x5e74; + case 0xf98f: + return 0x6190; + case 0xf990: + return 0x6200; + case 0xf991: + return 0x649a; + case 0xf992: + return 0x6f23; + case 0xf993: + return 0x7149; + case 0xf994: + return 0x7489; + case 0xf995: + return 0x79ca; + case 0xf996: + return 0x7df4; + case 0xf997: + return 0x806f; + case 0xf998: + return 0x8f26; + case 0xf999: + return 0x84ee; + case 0xf99a: + return 0x9023; + case 0xf99b: + return 0x934a; + case 0xf99c: + return 0x5217; + case 0xf99d: + return 0x52a3; + case 0xf99e: + return 0x54bd; + case 0xf99f: + return 0x70c8; + case 0xf9a0: + return 0x88c2; + case 0xf9a1: + return 0x8aaa; + case 0xf9a2: + return 0x5ec9; + case 0xf9a3: + return 0x5ff5; + case 0xf9a4: + return 0x637b; + case 0xf9a5: + return 0x6bae; + case 0xf9a6: + return 0x7c3e; + case 0xf9a7: + return 0x7375; + case 0xf9a8: + return 0x4ee4; + case 0xf9a9: + return 0x56f9; + case 0xf9aa: + return 0x5be7; + case 0xf9ab: + return 0x5dba; + case 0xf9ac: + return 0x601c; + case 0xf9ad: + return 0x73b2; + case 0xf9ae: + return 0x7469; + case 0xf9af: + return 0x7f9a; + case 0xf9b0: + return 0x8046; + case 0xf9b1: + return 0x9234; + case 0xf9b2: + return 0x96f6; + case 0xf9b3: + return 0x9748; + case 0xf9b4: + return 0x9818; + case 0xf9b5: + return 0x4f8b; + case 0xf9b6: + return 0x79ae; + case 0xf9b7: + return 0x91b4; + case 0xf9b8: + return 0x96b8; + case 0xf9b9: + return 0x60e1; + case 0xf9ba: + return 0x4e86; + case 0xf9bb: + return 0x50da; + case 0xf9bc: + return 0x5bee; + case 0xf9bd: + return 0x5c3f; + case 0xf9be: + return 0x6599; + case 0xf9bf: + return 0x6a02; + case 0xf9c0: + return 0x71ce; + case 0xf9c1: + return 0x7642; + case 0xf9c2: + return 0x84fc; + case 0xf9c3: + return 0x907c; + case 0xf9c4: + return 0x9f8d; + case 0xf9c5: + return 0x6688; + case 0xf9c6: + return 0x962e; + case 0xf9c7: + return 0x5289; + case 0xf9c8: + return 0x677b; + case 0xf9c9: + return 0x67f3; + case 0xf9ca: + return 0x6d41; + case 0xf9cb: + return 0x6e9c; + case 0xf9cc: + return 0x7409; + case 0xf9cd: + return 0x7559; + case 0xf9ce: + return 0x786b; + case 0xf9cf: + return 0x7d10; + case 0xf9d0: + return 0x985e; + case 0xf9d1: + return 0x516d; + case 0xf9d2: + return 0x622e; + case 0xf9d3: + return 0x9678; + case 0xf9d4: + return 0x502b; + case 0xf9d5: + return 0x5d19; + case 0xf9d6: + return 0x6dea; + case 0xf9d7: + return 0x8f2a; + case 0xf9d8: + return 0x5f8b; + case 0xf9d9: + return 0x6144; + case 0xf9da: + return 0x6817; + case 0xf9db: + return 0x7387; + case 0xf9dc: + return 0x9686; + case 0xf9dd: + return 0x5229; + case 0xf9de: + return 0x540f; + case 0xf9df: + return 0x5c65; + case 0xf9e0: + return 0x6613; + case 0xf9e1: + return 0x674e; + case 0xf9e2: + return 0x68a8; + case 0xf9e3: + return 0x6ce5; + case 0xf9e4: + return 0x7406; + case 0xf9e5: + return 0x75e2; + case 0xf9e6: + return 0x7f79; + case 0xf9e7: + return 0x88cf; + case 0xf9e8: + return 0x88e1; + case 0xf9e9: + return 0x91cc; + case 0xf9ea: + return 0x96e2; + case 0xf9eb: + return 0x533f; + case 0xf9ec: + return 0x6eba; + case 0xf9ed: + return 0x541d; + case 0xf9ee: + return 0x71d0; + case 0xf9ef: + return 0x7498; + case 0xf9f0: + return 0x85fa; + case 0xf9f1: + return 0x96a3; + case 0xf9f2: + return 0x9c57; + case 0xf9f3: + return 0x9e9f; + case 0xf9f4: + return 0x6797; + case 0xf9f5: + return 0x6dcb; + case 0xf9f6: + return 0x81e8; + case 0xf9f7: + return 0x7acb; + case 0xf9f8: + return 0x7b20; + case 0xf9f9: + return 0x7c92; + case 0xf9fa: + return 0x72c0; + case 0xf9fb: + return 0x7099; + case 0xf9fc: + return 0x8b58; + case 0xf9fd: + return 0x4ec0; + case 0xf9fe: + return 0x8336; + case 0xf9ff: + return 0x523a; + case 0xfa00: + return 0x5207; + case 0xfa01: + return 0x5ea6; + case 0xfa02: + return 0x62d3; + case 0xfa03: + return 0x7cd6; + case 0xfa04: + return 0x5b85; + case 0xfa05: + return 0x6d1e; + case 0xfa06: + return 0x66b4; + case 0xfa07: + return 0x8f3b; + case 0xfa08: + return 0x884c; + case 0xfa09: + return 0x964d; + case 0xfa0a: + return 0x898b; + case 0xfa0b: + return 0x5ed3; + case 0xfa0c: + return 0x5140; + case 0xfa0d: + return 0x55c0; + case 0xfa10: + return 0x585a; + case 0xfa12: + return 0x6674; + case 0xfa15: + return 0x51de; + case 0xfa16: + return 0x732a; + case 0xfa17: + return 0x76ca; + case 0xfa18: + return 0x793c; + case 0xfa19: + return 0x795e; + case 0xfa1a: + return 0x7965; + case 0xfa1b: + return 0x798f; + case 0xfa1c: + return 0x9756; + case 0xfa1d: + return 0x7cbe; + case 0xfa1e: + return 0x7fbd; + case 0xfa20: + return 0x8612; + case 0xfa22: + return 0x8af8; + case 0xfa25: + return 0x9038; + case 0xfa26: + return 0x90fd; + case 0xfa2a: + return 0x98ef; + case 0xfa2b: + return 0x98fc; + case 0xfa2c: + return 0x9928; + case 0xfa2d: + return 0x9db4; + case 0xfa2e: + return 0x90de; + case 0xfa2f: + return 0x96b7; + case 0xfa30: + return 0x4fae; + case 0xfa31: + return 0x50e7; + case 0xfa32: + return 0x514d; + case 0xfa33: + return 0x52c9; + case 0xfa34: + return 0x52e4; + case 0xfa35: + return 0x5351; + case 0xfa36: + return 0x559d; + case 0xfa37: + return 0x5606; + case 0xfa38: + return 0x5668; + case 0xfa39: + return 0x5840; + case 0xfa3a: + return 0x58a8; + case 0xfa3b: + return 0x5c64; + case 0xfa3c: + return 0x5c6e; + case 0xfa3d: + return 0x6094; + case 0xfa3e: + return 0x6168; + case 0xfa3f: + return 0x618e; + case 0xfa40: + return 0x61f2; + case 0xfa41: + return 0x654f; + case 0xfa42: + return 0x65e2; + case 0xfa43: + return 0x6691; + case 0xfa44: + return 0x6885; + case 0xfa45: + return 0x6d77; + case 0xfa46: + return 0x6e1a; + case 0xfa47: + return 0x6f22; + case 0xfa48: + return 0x716e; + case 0xfa49: + return 0x722b; + case 0xfa4a: + return 0x7422; + case 0xfa4b: + return 0x7891; + case 0xfa4c: + return 0x793e; + case 0xfa4d: + return 0x7949; + case 0xfa4e: + return 0x7948; + case 0xfa4f: + return 0x7950; + case 0xfa50: + return 0x7956; + case 0xfa51: + return 0x795d; + case 0xfa52: + return 0x798d; + case 0xfa53: + return 0x798e; + case 0xfa54: + return 0x7a40; + case 0xfa55: + return 0x7a81; + case 0xfa56: + return 0x7bc0; + case 0xfa57: + return 0x7df4; + case 0xfa58: + return 0x7e09; + case 0xfa59: + return 0x7e41; + case 0xfa5a: + return 0x7f72; + case 0xfa5b: + return 0x8005; + case 0xfa5c: + return 0x81ed; + case 0xfa5d: + return 0x8279; + case 0xfa5e: + return 0x8279; + case 0xfa5f: + return 0x8457; + case 0xfa60: + return 0x8910; + case 0xfa61: + return 0x8996; + case 0xfa62: + return 0x8b01; + case 0xfa63: + return 0x8b39; + case 0xfa64: + return 0x8cd3; + case 0xfa65: + return 0x8d08; + case 0xfa66: + return 0x8fb6; + case 0xfa67: + return 0x9038; + case 0xfa68: + return 0x96e3; + case 0xfa69: + return 0x97ff; + case 0xfa6a: + return 0x983b; + case 0xfa6b: + return 0x6075; + case 0xfa6c: + return 0x242ee; + case 0xfa6d: + return 0x8218; + case 0xfa70: + return 0x4e26; + case 0xfa71: + return 0x51b5; + case 0xfa72: + return 0x5168; + case 0xfa73: + return 0x4f80; + case 0xfa74: + return 0x5145; + case 0xfa75: + return 0x5180; + case 0xfa76: + return 0x52c7; + case 0xfa77: + return 0x52fa; + case 0xfa78: + return 0x559d; + case 0xfa79: + return 0x5555; + case 0xfa7a: + return 0x5599; + case 0xfa7b: + return 0x55e2; + case 0xfa7c: + return 0x585a; + case 0xfa7d: + return 0x58b3; + case 0xfa7e: + return 0x5944; + case 0xfa7f: + return 0x5954; + case 0xfa80: + return 0x5a62; + case 0xfa81: + return 0x5b28; + case 0xfa82: + return 0x5ed2; + case 0xfa83: + return 0x5ed9; + case 0xfa84: + return 0x5f69; + case 0xfa85: + return 0x5fad; + case 0xfa86: + return 0x60d8; + case 0xfa87: + return 0x614e; + case 0xfa88: + return 0x6108; + case 0xfa89: + return 0x618e; + case 0xfa8a: + return 0x6160; + case 0xfa8b: + return 0x61f2; + case 0xfa8c: + return 0x6234; + case 0xfa8d: + return 0x63c4; + case 0xfa8e: + return 0x641c; + case 0xfa8f: + return 0x6452; + case 0xfa90: + return 0x6556; + case 0xfa91: + return 0x6674; + case 0xfa92: + return 0x6717; + case 0xfa93: + return 0x671b; + case 0xfa94: + return 0x6756; + case 0xfa95: + return 0x6b79; + case 0xfa96: + return 0x6bba; + case 0xfa97: + return 0x6d41; + case 0xfa98: + return 0x6edb; + case 0xfa99: + return 0x6ecb; + case 0xfa9a: + return 0x6f22; + case 0xfa9b: + return 0x701e; + case 0xfa9c: + return 0x716e; + case 0xfa9d: + return 0x77a7; + case 0xfa9e: + return 0x7235; + case 0xfa9f: + return 0x72af; + case 0xfaa0: + return 0x732a; + case 0xfaa1: + return 0x7471; + case 0xfaa2: + return 0x7506; + case 0xfaa3: + return 0x753b; + case 0xfaa4: + return 0x761d; + case 0xfaa5: + return 0x761f; + case 0xfaa6: + return 0x76ca; + case 0xfaa7: + return 0x76db; + case 0xfaa8: + return 0x76f4; + case 0xfaa9: + return 0x774a; + case 0xfaaa: + return 0x7740; + case 0xfaab: + return 0x78cc; + case 0xfaac: + return 0x7ab1; + case 0xfaad: + return 0x7bc0; + case 0xfaae: + return 0x7c7b; + case 0xfaaf: + return 0x7d5b; + case 0xfab0: + return 0x7df4; + case 0xfab1: + return 0x7f3e; + case 0xfab2: + return 0x8005; + case 0xfab3: + return 0x8352; + case 0xfab4: + return 0x83ef; + case 0xfab5: + return 0x8779; + case 0xfab6: + return 0x8941; + case 0xfab7: + return 0x8986; + case 0xfab8: + return 0x8996; + case 0xfab9: + return 0x8abf; + case 0xfaba: + return 0x8af8; + case 0xfabb: + return 0x8acb; + case 0xfabc: + return 0x8b01; + case 0xfabd: + return 0x8afe; + case 0xfabe: + return 0x8aed; + case 0xfabf: + return 0x8b39; + case 0xfac0: + return 0x8b8a; + case 0xfac1: + return 0x8d08; + case 0xfac2: + return 0x8f38; + case 0xfac3: + return 0x9072; + case 0xfac4: + return 0x9199; + case 0xfac5: + return 0x9276; + case 0xfac6: + return 0x967c; + case 0xfac7: + return 0x96e3; + case 0xfac8: + return 0x9756; + case 0xfac9: + return 0x97db; + case 0xfaca: + return 0x97ff; + case 0xfacb: + return 0x980b; + case 0xfacc: + return 0x983b; + case 0xfacd: + return 0x9b12; + case 0xface: + return 0x9f9c; + case 0xfacf: + return 0x2284a; + case 0xfad0: + return 0x22844; + case 0xfad1: + return 0x233d5; + case 0xfad2: + return 0x3b9d; + case 0xfad3: + return 0x4018; + case 0xfad4: + return 0x4039; + case 0xfad5: + return 0x25249; + case 0xfad6: + return 0x25cd0; + case 0xfad7: + return 0x27ed3; + case 0xfad8: + return 0x9f43; + case 0xfad9: + return 0x9f8e; + case 0xfb1d: + return 0x5d9; + case 0xfb1f: + return 0x5f2; + case 0xfb2a: + return 0x5e9; + case 0xfb2b: + return 0x5e9; + case 0xfb2c: + return 0x5e9; + case 0xfb2d: + return 0x5e9; + case 0xfb2e: + return 0x5d0; + case 0xfb2f: + return 0x5d0; + case 0xfb30: + return 0x5d0; + case 0xfb31: + return 0x5d1; + case 0xfb32: + return 0x5d2; + case 0xfb33: + return 0x5d3; + case 0xfb34: + return 0x5d4; + case 0xfb35: + return 0x5d5; + case 0xfb36: + return 0x5d6; + case 0xfb38: + return 0x5d8; + case 0xfb39: + return 0x5d9; + case 0xfb3a: + return 0x5da; + case 0xfb3b: + return 0x5db; + case 0xfb3c: + return 0x5dc; + case 0xfb3e: + return 0x5de; + case 0xfb40: + return 0x5e0; + case 0xfb41: + return 0x5e1; + case 0xfb43: + return 0x5e3; + case 0xfb44: + return 0x5e4; + case 0xfb46: + return 0x5e6; + case 0xfb47: + return 0x5e7; + case 0xfb48: + return 0x5e8; + case 0xfb49: + return 0x5e9; + case 0xfb4a: + return 0x5ea; + case 0xfb4b: + return 0x5d5; + case 0xfb4c: + return 0x5d1; + case 0xfb4d: + return 0x5db; + case 0xfb4e: + return 0x5e4; + case 0x1109a: + return 0x11099; + case 0x1109c: + return 0x1109b; + case 0x110ab: + return 0x110a5; + case 0x2f800: + return 0x4e3d; + case 0x2f801: + return 0x4e38; + case 0x2f802: + return 0x4e41; + case 0x2f803: + return 0x20122; + case 0x2f804: + return 0x4f60; + case 0x2f805: + return 0x4fae; + case 0x2f806: + return 0x4fbb; + case 0x2f807: + return 0x5002; + case 0x2f808: + return 0x507a; + case 0x2f809: + return 0x5099; + case 0x2f80a: + return 0x50e7; + case 0x2f80b: + return 0x50cf; + case 0x2f80c: + return 0x349e; + case 0x2f80d: + return 0x2063a; + case 0x2f80e: + return 0x514d; + case 0x2f80f: + return 0x5154; + case 0x2f810: + return 0x5164; + case 0x2f811: + return 0x5177; + case 0x2f812: + return 0x2051c; + case 0x2f813: + return 0x34b9; + case 0x2f814: + return 0x5167; + case 0x2f815: + return 0x518d; + case 0x2f816: + return 0x2054b; + case 0x2f817: + return 0x5197; + case 0x2f818: + return 0x51a4; + case 0x2f819: + return 0x4ecc; + case 0x2f81a: + return 0x51ac; + case 0x2f81b: + return 0x51b5; + case 0x2f81c: + return 0x291df; + case 0x2f81d: + return 0x51f5; + case 0x2f81e: + return 0x5203; + case 0x2f81f: + return 0x34df; + case 0x2f820: + return 0x523b; + case 0x2f821: + return 0x5246; + case 0x2f822: + return 0x5272; + case 0x2f823: + return 0x5277; + case 0x2f824: + return 0x3515; + case 0x2f825: + return 0x52c7; + case 0x2f826: + return 0x52c9; + case 0x2f827: + return 0x52e4; + case 0x2f828: + return 0x52fa; + case 0x2f829: + return 0x5305; + case 0x2f82a: + return 0x5306; + case 0x2f82b: + return 0x5317; + case 0x2f82c: + return 0x5349; + case 0x2f82d: + return 0x5351; + case 0x2f82e: + return 0x535a; + case 0x2f82f: + return 0x5373; + case 0x2f830: + return 0x537d; + case 0x2f831: + return 0x537f; + case 0x2f832: + return 0x537f; + case 0x2f833: + return 0x537f; + case 0x2f834: + return 0x20a2c; + case 0x2f835: + return 0x7070; + case 0x2f836: + return 0x53ca; + case 0x2f837: + return 0x53df; + case 0x2f838: + return 0x20b63; + case 0x2f839: + return 0x53eb; + case 0x2f83a: + return 0x53f1; + case 0x2f83b: + return 0x5406; + case 0x2f83c: + return 0x549e; + case 0x2f83d: + return 0x5438; + case 0x2f83e: + return 0x5448; + case 0x2f83f: + return 0x5468; + case 0x2f840: + return 0x54a2; + case 0x2f841: + return 0x54f6; + case 0x2f842: + return 0x5510; + case 0x2f843: + return 0x5553; + case 0x2f844: + return 0x5563; + case 0x2f845: + return 0x5584; + case 0x2f846: + return 0x5584; + case 0x2f847: + return 0x5599; + case 0x2f848: + return 0x55ab; + case 0x2f849: + return 0x55b3; + case 0x2f84a: + return 0x55c2; + case 0x2f84b: + return 0x5716; + case 0x2f84c: + return 0x5606; + case 0x2f84d: + return 0x5717; + case 0x2f84e: + return 0x5651; + case 0x2f84f: + return 0x5674; + case 0x2f850: + return 0x5207; + case 0x2f851: + return 0x58ee; + case 0x2f852: + return 0x57ce; + case 0x2f853: + return 0x57f4; + case 0x2f854: + return 0x580d; + case 0x2f855: + return 0x578b; + case 0x2f856: + return 0x5832; + case 0x2f857: + return 0x5831; + case 0x2f858: + return 0x58ac; + case 0x2f859: + return 0x214e4; + case 0x2f85a: + return 0x58f2; + case 0x2f85b: + return 0x58f7; + case 0x2f85c: + return 0x5906; + case 0x2f85d: + return 0x591a; + case 0x2f85e: + return 0x5922; + case 0x2f85f: + return 0x5962; + case 0x2f860: + return 0x216a8; + case 0x2f861: + return 0x216ea; + case 0x2f862: + return 0x59ec; + case 0x2f863: + return 0x5a1b; + case 0x2f864: + return 0x5a27; + case 0x2f865: + return 0x59d8; + case 0x2f866: + return 0x5a66; + case 0x2f867: + return 0x36ee; + case 0x2f868: + return 0x36fc; + case 0x2f869: + return 0x5b08; + case 0x2f86a: + return 0x5b3e; + case 0x2f86b: + return 0x5b3e; + case 0x2f86c: + return 0x219c8; + case 0x2f86d: + return 0x5bc3; + case 0x2f86e: + return 0x5bd8; + case 0x2f86f: + return 0x5be7; + case 0x2f870: + return 0x5bf3; + case 0x2f871: + return 0x21b18; + case 0x2f872: + return 0x5bff; + case 0x2f873: + return 0x5c06; + case 0x2f874: + return 0x5f53; + case 0x2f875: + return 0x5c22; + case 0x2f876: + return 0x3781; + case 0x2f877: + return 0x5c60; + case 0x2f878: + return 0x5c6e; + case 0x2f879: + return 0x5cc0; + case 0x2f87a: + return 0x5c8d; + case 0x2f87b: + return 0x21de4; + case 0x2f87c: + return 0x5d43; + case 0x2f87d: + return 0x21de6; + case 0x2f87e: + return 0x5d6e; + case 0x2f87f: + return 0x5d6b; + case 0x2f880: + return 0x5d7c; + case 0x2f881: + return 0x5de1; + case 0x2f882: + return 0x5de2; + case 0x2f883: + return 0x382f; + case 0x2f884: + return 0x5dfd; + case 0x2f885: + return 0x5e28; + case 0x2f886: + return 0x5e3d; + case 0x2f887: + return 0x5e69; + case 0x2f888: + return 0x3862; + case 0x2f889: + return 0x22183; + case 0x2f88a: + return 0x387c; + case 0x2f88b: + return 0x5eb0; + case 0x2f88c: + return 0x5eb3; + case 0x2f88d: + return 0x5eb6; + case 0x2f88e: + return 0x5eca; + case 0x2f88f: + return 0x2a392; + case 0x2f890: + return 0x5efe; + case 0x2f891: + return 0x22331; + case 0x2f892: + return 0x22331; + case 0x2f893: + return 0x8201; + case 0x2f894: + return 0x5f22; + case 0x2f895: + return 0x5f22; + case 0x2f896: + return 0x38c7; + case 0x2f897: + return 0x232b8; + case 0x2f898: + return 0x261da; + case 0x2f899: + return 0x5f62; + case 0x2f89a: + return 0x5f6b; + case 0x2f89b: + return 0x38e3; + case 0x2f89c: + return 0x5f9a; + case 0x2f89d: + return 0x5fcd; + case 0x2f89e: + return 0x5fd7; + case 0x2f89f: + return 0x5ff9; + case 0x2f8a0: + return 0x6081; + case 0x2f8a1: + return 0x393a; + case 0x2f8a2: + return 0x391c; + case 0x2f8a3: + return 0x6094; + case 0x2f8a4: + return 0x226d4; + case 0x2f8a5: + return 0x60c7; + case 0x2f8a6: + return 0x6148; + case 0x2f8a7: + return 0x614c; + case 0x2f8a8: + return 0x614e; + case 0x2f8a9: + return 0x614c; + case 0x2f8aa: + return 0x617a; + case 0x2f8ab: + return 0x618e; + case 0x2f8ac: + return 0x61b2; + case 0x2f8ad: + return 0x61a4; + case 0x2f8ae: + return 0x61af; + case 0x2f8af: + return 0x61de; + case 0x2f8b0: + return 0x61f2; + case 0x2f8b1: + return 0x61f6; + case 0x2f8b2: + return 0x6210; + case 0x2f8b3: + return 0x621b; + case 0x2f8b4: + return 0x625d; + case 0x2f8b5: + return 0x62b1; + case 0x2f8b6: + return 0x62d4; + case 0x2f8b7: + return 0x6350; + case 0x2f8b8: + return 0x22b0c; + case 0x2f8b9: + return 0x633d; + case 0x2f8ba: + return 0x62fc; + case 0x2f8bb: + return 0x6368; + case 0x2f8bc: + return 0x6383; + case 0x2f8bd: + return 0x63e4; + case 0x2f8be: + return 0x22bf1; + case 0x2f8bf: + return 0x6422; + case 0x2f8c0: + return 0x63c5; + case 0x2f8c1: + return 0x63a9; + case 0x2f8c2: + return 0x3a2e; + case 0x2f8c3: + return 0x6469; + case 0x2f8c4: + return 0x647e; + case 0x2f8c5: + return 0x649d; + case 0x2f8c6: + return 0x6477; + case 0x2f8c7: + return 0x3a6c; + case 0x2f8c8: + return 0x654f; + case 0x2f8c9: + return 0x656c; + case 0x2f8ca: + return 0x2300a; + case 0x2f8cb: + return 0x65e3; + case 0x2f8cc: + return 0x66f8; + case 0x2f8cd: + return 0x6649; + case 0x2f8ce: + return 0x3b19; + case 0x2f8cf: + return 0x6691; + case 0x2f8d0: + return 0x3b08; + case 0x2f8d1: + return 0x3ae4; + case 0x2f8d2: + return 0x5192; + case 0x2f8d3: + return 0x5195; + case 0x2f8d4: + return 0x6700; + case 0x2f8d5: + return 0x669c; + case 0x2f8d6: + return 0x80ad; + case 0x2f8d7: + return 0x43d9; + case 0x2f8d8: + return 0x6717; + case 0x2f8d9: + return 0x671b; + case 0x2f8da: + return 0x6721; + case 0x2f8db: + return 0x675e; + case 0x2f8dc: + return 0x6753; + case 0x2f8dd: + return 0x233c3; + case 0x2f8de: + return 0x3b49; + case 0x2f8df: + return 0x67fa; + case 0x2f8e0: + return 0x6785; + case 0x2f8e1: + return 0x6852; + case 0x2f8e2: + return 0x6885; + case 0x2f8e3: + return 0x2346d; + case 0x2f8e4: + return 0x688e; + case 0x2f8e5: + return 0x681f; + case 0x2f8e6: + return 0x6914; + case 0x2f8e7: + return 0x3b9d; + case 0x2f8e8: + return 0x6942; + case 0x2f8e9: + return 0x69a3; + case 0x2f8ea: + return 0x69ea; + case 0x2f8eb: + return 0x6aa8; + case 0x2f8ec: + return 0x236a3; + case 0x2f8ed: + return 0x6adb; + case 0x2f8ee: + return 0x3c18; + case 0x2f8ef: + return 0x6b21; + case 0x2f8f0: + return 0x238a7; + case 0x2f8f1: + return 0x6b54; + case 0x2f8f2: + return 0x3c4e; + case 0x2f8f3: + return 0x6b72; + case 0x2f8f4: + return 0x6b9f; + case 0x2f8f5: + return 0x6bba; + case 0x2f8f6: + return 0x6bbb; + case 0x2f8f7: + return 0x23a8d; + case 0x2f8f8: + return 0x21d0b; + case 0x2f8f9: + return 0x23afa; + case 0x2f8fa: + return 0x6c4e; + case 0x2f8fb: + return 0x23cbc; + case 0x2f8fc: + return 0x6cbf; + case 0x2f8fd: + return 0x6ccd; + case 0x2f8fe: + return 0x6c67; + case 0x2f8ff: + return 0x6d16; + case 0x2f900: + return 0x6d3e; + case 0x2f901: + return 0x6d77; + case 0x2f902: + return 0x6d41; + case 0x2f903: + return 0x6d69; + case 0x2f904: + return 0x6d78; + case 0x2f905: + return 0x6d85; + case 0x2f906: + return 0x23d1e; + case 0x2f907: + return 0x6d34; + case 0x2f908: + return 0x6e2f; + case 0x2f909: + return 0x6e6e; + case 0x2f90a: + return 0x3d33; + case 0x2f90b: + return 0x6ecb; + case 0x2f90c: + return 0x6ec7; + case 0x2f90d: + return 0x23ed1; + case 0x2f90e: + return 0x6df9; + case 0x2f90f: + return 0x6f6e; + case 0x2f910: + return 0x23f5e; + case 0x2f911: + return 0x23f8e; + case 0x2f912: + return 0x6fc6; + case 0x2f913: + return 0x7039; + case 0x2f914: + return 0x701e; + case 0x2f915: + return 0x701b; + case 0x2f916: + return 0x3d96; + case 0x2f917: + return 0x704a; + case 0x2f918: + return 0x707d; + case 0x2f919: + return 0x7077; + case 0x2f91a: + return 0x70ad; + case 0x2f91b: + return 0x20525; + case 0x2f91c: + return 0x7145; + case 0x2f91d: + return 0x24263; + case 0x2f91e: + return 0x719c; + case 0x2f91f: + return 0x243ab; + case 0x2f920: + return 0x7228; + case 0x2f921: + return 0x7235; + case 0x2f922: + return 0x7250; + case 0x2f923: + return 0x24608; + case 0x2f924: + return 0x7280; + case 0x2f925: + return 0x7295; + case 0x2f926: + return 0x24735; + case 0x2f927: + return 0x24814; + case 0x2f928: + return 0x737a; + case 0x2f929: + return 0x738b; + case 0x2f92a: + return 0x3eac; + case 0x2f92b: + return 0x73a5; + case 0x2f92c: + return 0x3eb8; + case 0x2f92d: + return 0x3eb8; + case 0x2f92e: + return 0x7447; + case 0x2f92f: + return 0x745c; + case 0x2f930: + return 0x7471; + case 0x2f931: + return 0x7485; + case 0x2f932: + return 0x74ca; + case 0x2f933: + return 0x3f1b; + case 0x2f934: + return 0x7524; + case 0x2f935: + return 0x24c36; + case 0x2f936: + return 0x753e; + case 0x2f937: + return 0x24c92; + case 0x2f938: + return 0x7570; + case 0x2f939: + return 0x2219f; + case 0x2f93a: + return 0x7610; + case 0x2f93b: + return 0x24fa1; + case 0x2f93c: + return 0x24fb8; + case 0x2f93d: + return 0x25044; + case 0x2f93e: + return 0x3ffc; + case 0x2f93f: + return 0x4008; + case 0x2f940: + return 0x76f4; + case 0x2f941: + return 0x250f3; + case 0x2f942: + return 0x250f2; + case 0x2f943: + return 0x25119; + case 0x2f944: + return 0x25133; + case 0x2f945: + return 0x771e; + case 0x2f946: + return 0x771f; + case 0x2f947: + return 0x771f; + case 0x2f948: + return 0x774a; + case 0x2f949: + return 0x4039; + case 0x2f94a: + return 0x778b; + case 0x2f94b: + return 0x4046; + case 0x2f94c: + return 0x4096; + case 0x2f94d: + return 0x2541d; + case 0x2f94e: + return 0x784e; + case 0x2f94f: + return 0x788c; + case 0x2f950: + return 0x78cc; + case 0x2f951: + return 0x40e3; + case 0x2f952: + return 0x25626; + case 0x2f953: + return 0x7956; + case 0x2f954: + return 0x2569a; + case 0x2f955: + return 0x256c5; + case 0x2f956: + return 0x798f; + case 0x2f957: + return 0x79eb; + case 0x2f958: + return 0x412f; + case 0x2f959: + return 0x7a40; + case 0x2f95a: + return 0x7a4a; + case 0x2f95b: + return 0x7a4f; + case 0x2f95c: + return 0x2597c; + case 0x2f95d: + return 0x25aa7; + case 0x2f95e: + return 0x25aa7; + case 0x2f95f: + return 0x7aee; + case 0x2f960: + return 0x4202; + case 0x2f961: + return 0x25bab; + case 0x2f962: + return 0x7bc6; + case 0x2f963: + return 0x7bc9; + case 0x2f964: + return 0x4227; + case 0x2f965: + return 0x25c80; + case 0x2f966: + return 0x7cd2; + case 0x2f967: + return 0x42a0; + case 0x2f968: + return 0x7ce8; + case 0x2f969: + return 0x7ce3; + case 0x2f96a: + return 0x7d00; + case 0x2f96b: + return 0x25f86; + case 0x2f96c: + return 0x7d63; + case 0x2f96d: + return 0x4301; + case 0x2f96e: + return 0x7dc7; + case 0x2f96f: + return 0x7e02; + case 0x2f970: + return 0x7e45; + case 0x2f971: + return 0x4334; + case 0x2f972: + return 0x26228; + case 0x2f973: + return 0x26247; + case 0x2f974: + return 0x4359; + case 0x2f975: + return 0x262d9; + case 0x2f976: + return 0x7f7a; + case 0x2f977: + return 0x2633e; + case 0x2f978: + return 0x7f95; + case 0x2f979: + return 0x7ffa; + case 0x2f97a: + return 0x8005; + case 0x2f97b: + return 0x264da; + case 0x2f97c: + return 0x26523; + case 0x2f97d: + return 0x8060; + case 0x2f97e: + return 0x265a8; + case 0x2f97f: + return 0x8070; + case 0x2f980: + return 0x2335f; + case 0x2f981: + return 0x43d5; + case 0x2f982: + return 0x80b2; + case 0x2f983: + return 0x8103; + case 0x2f984: + return 0x440b; + case 0x2f985: + return 0x813e; + case 0x2f986: + return 0x5ab5; + case 0x2f987: + return 0x267a7; + case 0x2f988: + return 0x267b5; + case 0x2f989: + return 0x23393; + case 0x2f98a: + return 0x2339c; + case 0x2f98b: + return 0x8201; + case 0x2f98c: + return 0x8204; + case 0x2f98d: + return 0x8f9e; + case 0x2f98e: + return 0x446b; + case 0x2f98f: + return 0x8291; + case 0x2f990: + return 0x828b; + case 0x2f991: + return 0x829d; + case 0x2f992: + return 0x52b3; + case 0x2f993: + return 0x82b1; + case 0x2f994: + return 0x82b3; + case 0x2f995: + return 0x82bd; + case 0x2f996: + return 0x82e6; + case 0x2f997: + return 0x26b3c; + case 0x2f998: + return 0x82e5; + case 0x2f999: + return 0x831d; + case 0x2f99a: + return 0x8363; + case 0x2f99b: + return 0x83ad; + case 0x2f99c: + return 0x8323; + case 0x2f99d: + return 0x83bd; + case 0x2f99e: + return 0x83e7; + case 0x2f99f: + return 0x8457; + case 0x2f9a0: + return 0x8353; + case 0x2f9a1: + return 0x83ca; + case 0x2f9a2: + return 0x83cc; + case 0x2f9a3: + return 0x83dc; + case 0x2f9a4: + return 0x26c36; + case 0x2f9a5: + return 0x26d6b; + case 0x2f9a6: + return 0x26cd5; + case 0x2f9a7: + return 0x452b; + case 0x2f9a8: + return 0x84f1; + case 0x2f9a9: + return 0x84f3; + case 0x2f9aa: + return 0x8516; + case 0x2f9ab: + return 0x273ca; + case 0x2f9ac: + return 0x8564; + case 0x2f9ad: + return 0x26f2c; + case 0x2f9ae: + return 0x455d; + case 0x2f9af: + return 0x4561; + case 0x2f9b0: + return 0x26fb1; + case 0x2f9b1: + return 0x270d2; + case 0x2f9b2: + return 0x456b; + case 0x2f9b3: + return 0x8650; + case 0x2f9b4: + return 0x865c; + case 0x2f9b5: + return 0x8667; + case 0x2f9b6: + return 0x8669; + case 0x2f9b7: + return 0x86a9; + case 0x2f9b8: + return 0x8688; + case 0x2f9b9: + return 0x870e; + case 0x2f9ba: + return 0x86e2; + case 0x2f9bb: + return 0x8779; + case 0x2f9bc: + return 0x8728; + case 0x2f9bd: + return 0x876b; + case 0x2f9be: + return 0x8786; + case 0x2f9bf: + return 0x45d7; + case 0x2f9c0: + return 0x87e1; + case 0x2f9c1: + return 0x8801; + case 0x2f9c2: + return 0x45f9; + case 0x2f9c3: + return 0x8860; + case 0x2f9c4: + return 0x8863; + case 0x2f9c5: + return 0x27667; + case 0x2f9c6: + return 0x88d7; + case 0x2f9c7: + return 0x88de; + case 0x2f9c8: + return 0x4635; + case 0x2f9c9: + return 0x88fa; + case 0x2f9ca: + return 0x34bb; + case 0x2f9cb: + return 0x278ae; + case 0x2f9cc: + return 0x27966; + case 0x2f9cd: + return 0x46be; + case 0x2f9ce: + return 0x46c7; + case 0x2f9cf: + return 0x8aa0; + case 0x2f9d0: + return 0x8aed; + case 0x2f9d1: + return 0x8b8a; + case 0x2f9d2: + return 0x8c55; + case 0x2f9d3: + return 0x27ca8; + case 0x2f9d4: + return 0x8cab; + case 0x2f9d5: + return 0x8cc1; + case 0x2f9d6: + return 0x8d1b; + case 0x2f9d7: + return 0x8d77; + case 0x2f9d8: + return 0x27f2f; + case 0x2f9d9: + return 0x20804; + case 0x2f9da: + return 0x8dcb; + case 0x2f9db: + return 0x8dbc; + case 0x2f9dc: + return 0x8df0; + case 0x2f9dd: + return 0x208de; + case 0x2f9de: + return 0x8ed4; + case 0x2f9df: + return 0x8f38; + case 0x2f9e0: + return 0x285d2; + case 0x2f9e1: + return 0x285ed; + case 0x2f9e2: + return 0x9094; + case 0x2f9e3: + return 0x90f1; + case 0x2f9e4: + return 0x9111; + case 0x2f9e5: + return 0x2872e; + case 0x2f9e6: + return 0x911b; + case 0x2f9e7: + return 0x9238; + case 0x2f9e8: + return 0x92d7; + case 0x2f9e9: + return 0x92d8; + case 0x2f9ea: + return 0x927c; + case 0x2f9eb: + return 0x93f9; + case 0x2f9ec: + return 0x9415; + case 0x2f9ed: + return 0x28bfa; + case 0x2f9ee: + return 0x958b; + case 0x2f9ef: + return 0x4995; + case 0x2f9f0: + return 0x95b7; + case 0x2f9f1: + return 0x28d77; + case 0x2f9f2: + return 0x49e6; + case 0x2f9f3: + return 0x96c3; + case 0x2f9f4: + return 0x5db2; + case 0x2f9f5: + return 0x9723; + case 0x2f9f6: + return 0x29145; + case 0x2f9f7: + return 0x2921a; + case 0x2f9f8: + return 0x4a6e; + case 0x2f9f9: + return 0x4a76; + case 0x2f9fa: + return 0x97e0; + case 0x2f9fb: + return 0x2940a; + case 0x2f9fc: + return 0x4ab2; + case 0x2f9fd: + return 0x29496; + case 0x2f9fe: + return 0x980b; + case 0x2f9ff: + return 0x980b; + case 0x2fa00: + return 0x9829; + case 0x2fa01: + return 0x295b6; + case 0x2fa02: + return 0x98e2; + case 0x2fa03: + return 0x4b33; + case 0x2fa04: + return 0x9929; + case 0x2fa05: + return 0x99a7; + case 0x2fa06: + return 0x99c2; + case 0x2fa07: + return 0x99fe; + case 0x2fa08: + return 0x4bce; + case 0x2fa09: + return 0x29b30; + case 0x2fa0a: + return 0x9b12; + case 0x2fa0b: + return 0x9c40; + case 0x2fa0c: + return 0x9cfd; + case 0x2fa0d: + return 0x4cce; + case 0x2fa0e: + return 0x4ced; + case 0x2fa0f: + return 0x9d67; + case 0x2fa10: + return 0x2a0ce; + case 0x2fa11: + return 0x4cf8; + case 0x2fa12: + return 0x2a105; + case 0x2fa13: + return 0x2a20e; + case 0x2fa14: + return 0x2a291; + case 0x2fa15: + return 0x9ebb; + case 0x2fa16: + return 0x4d56; + case 0x2fa17: + return 0x9ef9; + case 0x2fa18: + return 0x9efe; + case 0x2fa19: + return 0x9f05; + case 0x2fa1a: + return 0x9f0f; + case 0x2fa1b: + return 0x9f16; + case 0x2fa1c: + return 0x9f3b; + case 0x2fa1d: + return 0x2a600; + default: + return codepoint; + } +} +} // namespace unicode +} // namespace mongo diff --git a/src/mongo/db/fts/unicode/codepoints_test.cpp b/src/mongo/db/fts/unicode/codepoints_test.cpp new file mode 100644 index 00000000000..90510666cba --- /dev/null +++ b/src/mongo/db/fts/unicode/codepoints_test.cpp @@ -0,0 +1,94 @@ +/** + * Copyright (C) 2015 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/fts/unicode/codepoints.h" +#include "mongo/unittest/unittest.h" + +namespace mongo { +namespace unicode { + +/** + * Above most of the arrays in this class are the UTF-32 character literals that correspond to the + * codepoints in the array. + */ + +TEST(UnicodeCodepoints, Diacritics) { + // There are no character literals for combining marks. + const char32_t marks[] = {0x0301, 0x0339, 0x1AB4, 0x1DC5, 0xA69D}; + + // const char32_t not_marks[] = {U'-', U'.', U'\'', U'*', U'm'}; + const char32_t not_marks[] = {0x2D, 0x2E, 0x27, 0x2A, 0x6D}; + + for (auto i = 0; i < 5; ++i) { + ASSERT(codepointIsDiacritic(marks[i])); + ASSERT_FALSE(codepointIsDiacritic(not_marks[i])); + } +} + +TEST(UnicodeCodepoints, Delimiters) { + // const char32_t delimiters[] = {U'-', U'.', U'"', U'¿', U'«'}; + const char32_t delimiters[] = {0x2D, 0x2E, 0x22, 0xBF, 0xAB}; + // const char32_t not_delimiters[] = {U'a', U'ê', U'π', U'Ω', U'å'}; + const char32_t not_delimiters[] = {0x61, 0xEA, 0x3C0, 0x3A9, 0xE5}; + + for (auto i = 0; i < 5; ++i) { + ASSERT(codepointIsDelimiter(delimiters[i], DelimiterListLanguage::kEnglish)); + ASSERT(codepointIsDelimiter(delimiters[i], DelimiterListLanguage::kNotEnglish)); + ASSERT_FALSE(codepointIsDelimiter(not_delimiters[i], DelimiterListLanguage::kEnglish)); + ASSERT_FALSE(codepointIsDelimiter(not_delimiters[i], DelimiterListLanguage::kNotEnglish)); + } + + // Special case for English. + ASSERT(codepointIsDelimiter(0x27, DelimiterListLanguage::kNotEnglish)); + ASSERT_FALSE(codepointIsDelimiter(0x27, DelimiterListLanguage::kEnglish)); +} + +TEST(UnicodeCodepoints, RemoveDiacritics) { + // const char32_t originals[] = {U'á', U'ê', U'ñ', U'å', U'ç'}; + const char32_t originals[] = {0xE1, 0xEA, 0xF1, 0xE5, 0xE7}; + // const char32_t clean[] = {U'a', U'e', U'n', U'a', U'c'}; + const char32_t clean[] = {0x61, 0x65, 0x6E, 0x61, 0x63}; + + for (auto i = 0; i < 5; ++i) { + ASSERT_EQUALS(clean[i], codepointRemoveDiacritics(originals[i])); + } +} + +TEST(UnicodeCodepoints, ToLower) { + // const char32_t upper[] = {U'Á', U'Ê', U'Ñ', U'Å', U'Ç'}; + const char32_t upper[] = {0xC1, 0xCA, 0xD1, 0xC5, 0xC7}; + // const char32_t lower[] = {U'á', U'ê', U'ñ', U'å', U'ç'}; + const char32_t lower[] = {0xE1, 0xEA, 0xF1, 0xE5, 0xE7}; + + for (auto i = 0; i < 5; ++i) { + ASSERT_EQUALS(lower[i], codepointToLower(upper[i])); + } +} + +} // namespace unicode +} // namespace mongo diff --git a/src/mongo/db/fts/unicode/gen_casefold_map.py b/src/mongo/db/fts/unicode/gen_casefold_map.py new file mode 100644 index 00000000000..fc55cdd57c7 --- /dev/null +++ b/src/mongo/db/fts/unicode/gen_casefold_map.py @@ -0,0 +1,76 @@ + #!/usr/bin/python + # -*- coding: utf-8 -*- +import os +import sys + +from gen_helper import getCopyrightNotice, openNamespaces, closeNamespaces, \ + include + +def generate(unicode_casefold_file, target): + """Generates a C++ source file that contains a Unicode case folding + function. + + The case folding function contains a switch statement with cases for every + Unicode codepoint that has a case folding mapping. + """ + out = open(target, "w") + + out.write(getCopyrightNotice()) + out.write(include("mongo/db/fts/unicode/codepoints.h")) + out.write("\n") + out.write(openNamespaces()) + + case_mappings = {} + + cf_file = open(unicode_casefold_file, 'r') + + for line in cf_file: + # Filter out blank lines and lines that start with # + data = line[:line.find('#')] + if(data == ""): + continue + + # Parse the data on the line + values = data.split("; ") + assert(len(values) == 4) + + status = values[1] + if status == 'C' or status == 'S': + # We only include the "Common" and "Simple" mappings. "Full" case + # folding mappings expand certain letters to multiple codepoints, + # which we currently do not support. + original_codepoint = int(values[0], 16) + codepoint_mapping = int(values[2], 16) + case_mappings[original_codepoint] = codepoint_mapping + + out.write("""char32_t codepointToLower(char32_t codepoint, CaseFoldMode \ +mode) { + if (mode == CaseFoldMode::kTurkish) { + if (codepoint == 0x049) { // I -> ı + return 0x131; + } else if (codepoint == 0x130) { // İ -> i + return 0x069; + } + } + + switch (codepoint) {\n""") + + mappings_list = [] + + for mapping in case_mappings: + mappings_list.append((mapping, case_mappings[mapping])) + + sorted_mappings = sorted(mappings_list, key=lambda mapping: mapping[0]) + + for mapping in sorted_mappings: + out.write("\ + case " + str(hex(mapping[0])) + ": return " + \ + str(hex(mapping[1])) +";\n") + + out.write("\ + default: return codepoint;\n }\n}") + + out.write(closeNamespaces()) + +if __name__ == "__main__": + generate(sys.argv[1], sys.argv[2]) diff --git a/src/mongo/db/fts/unicode/gen_delimiter_list.py b/src/mongo/db/fts/unicode/gen_delimiter_list.py new file mode 100644 index 00000000000..52b79544c6b --- /dev/null +++ b/src/mongo/db/fts/unicode/gen_delimiter_list.py @@ -0,0 +1,80 @@ + #!/usr/bin/python + # -*- coding: utf-8 -*- +import sys + +from gen_helper import getCopyrightNotice, openNamespaces, closeNamespaces, \ + include + +def generate(unicode_proplist_file, target): + """Generates a C++ source file that contains a delimiter checking function. + + The delimiter checking function contains a switch statement with cases for + every delimiter in the Unicode Character Database with the properties + specified in delim_properties. + """ + out = open(target, "w") + + out.write(getCopyrightNotice()) + out.write(include("mongo/db/fts/unicode/codepoints.h")) + out.write("\n") + out.write(openNamespaces()) + + delim_codepoints = set() + + proplist_file = open(unicode_proplist_file, 'r') + + delim_properties = ["White_Space", + "Dash", + "Hyphen", + "Quotation_Mark", + "Terminal_Punctuation", + "Pattern_Syntax", + "STerm"] + + for line in proplist_file: + # Filter out blank lines and lines that start with # + data = line[:line.find('#')] + if(data == ""): + continue + + # Parse the data on the line + values = data.split("; ") + assert(len(values) == 2) + + uproperty = values[1].strip() + if uproperty in delim_properties: + if len(values[0].split('..')) == 2: + codepoint_range = values[0].split('..') + + start = int(codepoint_range[0], 16) + end = int(codepoint_range[1], 16) + 1 + + for i in range(start, end): + if i not in delim_codepoints: + delim_codepoints.add(i) + else: + if int(values[0], 16) not in delim_codepoints: + delim_codepoints.add(int(values[0], 16)) + + # As of Unicode 8.0.0, all of the delimiters we used for text index + # version 2 are also in the list. + + out.write("""bool codepointIsDelimiter(char32_t codepoint, \ +DelimiterListLanguage lang) { + if (lang == DelimiterListLanguage::kEnglish && codepoint == '\\'') { + return false; + } + + switch (codepoint) {\n""") + + for delim in sorted(delim_codepoints): + out.write("\ + case " + str(hex(delim)) + ": return true;\n") + + out.write("\ + default: return false;\n }\n}") + + out.write(closeNamespaces()) + +if __name__ == "__main__": + generate(sys.argv[1], sys.argv[2]) diff --git a/src/mongo/db/fts/unicode/gen_diacritic_list.py b/src/mongo/db/fts/unicode/gen_diacritic_list.py new file mode 100644 index 00000000000..260a85307af --- /dev/null +++ b/src/mongo/db/fts/unicode/gen_diacritic_list.py @@ -0,0 +1,63 @@ + #!/usr/bin/python + # -*- coding: utf-8 -*- +import sys + +from gen_helper import getCopyrightNotice, openNamespaces, closeNamespaces, \ + include + +def generate(unicode_proplist_file, target): + """Generates a C++ source file that contains a diacritic checking function. + + The diacritic checking function contains a switch statement with cases for + every diacritic in the Unicode Character Database. + """ + out = open(target, "w") + + out.write(getCopyrightNotice()) + out.write(include("mongo/db/fts/unicode/codepoints.h")) + out.write("\n") + out.write(openNamespaces()) + + diacritics = set() + + proplist_file = open(unicode_proplist_file, 'r') + + for line in proplist_file: + # Filter out blank lines and lines that start with # + data = line[:line.find('#')] + if(data == ""): + continue + + # Parse the data on the line + values = data.split("; ") + assert(len(values) == 2) + + uproperty = values[1].strip() + if uproperty in "Diacritic": + if len(values[0].split('..')) == 2: + codepoint_range = values[0].split('..') + + start = int(codepoint_range[0], 16) + end = int(codepoint_range[1], 16) + 1 + + for i in range(start, end): + if i not in diacritics: + diacritics.add(i) + else: + if int(values[0], 16) not in diacritics: + diacritics.add(int(values[0], 16)) + + out.write("""bool codepointIsDiacritic(char32_t codepoint) { + switch (codepoint) {\n""") + + for diacritic in sorted(diacritics): + out.write("\ + case " + str(hex(diacritic)) + ": return true;\n") + + out.write("\ + default: return false;\n }\n}") + + out.write(closeNamespaces()) + +if __name__ == "__main__": + generate(sys.argv[1], sys.argv[2]) diff --git a/src/mongo/db/fts/unicode/gen_diacritic_map.py b/src/mongo/db/fts/unicode/gen_diacritic_map.py new file mode 100644 index 00000000000..d002a1acbac --- /dev/null +++ b/src/mongo/db/fts/unicode/gen_diacritic_map.py @@ -0,0 +1,105 @@ + #!/usr/bin/python + # -*- coding: utf-8 -*- +import sys +from unicodedata import normalize, category, unidata_version + +from gen_helper import getCopyrightNotice, openNamespaces, closeNamespaces, \ + include + +diacritics = set() + +def load_diacritics(unicode_proplist_file): + proplist_file = open(unicode_proplist_file, 'r') + + for line in proplist_file: + # Filter out blank lines and lines that start with # + data = line[:line.find('#')] + if(data == ""): + continue + + # Parse the data on the line + values = data.split("; ") + assert(len(values) == 2) + + uproperty = values[1].strip() + if uproperty == "Diacritic": + if len(values[0].split('..')) == 2: + codepoint_range = values[0].split('..') + + start = int(codepoint_range[0], 16) + end = int(codepoint_range[1], 16) + 1 + + for i in range(start, end): + if i not in diacritics: + diacritics.add(i) + else: + if int(values[0], 16) not in diacritics: + diacritics.add(int(values[0], 16)) + +diacritic_mappings = {} + +def add_diacritic_mapping(codepoint): + # a : original unicode character + # d : decomposed unicode character + # r : decomposed unicode character with diacritics removed + # c : recomposed unicode character with diacritics removed + a = chr(codepoint) + d = normalize('NFD', a) + r = u'' + + for i in range(len(d)): + if ord(d[i]) not in diacritics: + r += d[i] + + c = normalize('NFC', r) + + # Only use mappings where the final recomposed form is a single codepoint + if (a != c and len(c) == 1): + diacritic_mappings[codepoint] = ord(c[0]) + +def add_diacritic_range(start, end): + for x in range(start, end + 1): + add_diacritic_mapping(x) + +def generate(target): + """Generates a C++ source file that contains a diacritic removal mapping + function. + + The delimiter checking function contains a switch statement with cases for + every character in Unicode that has a removable combining diacritical mark. + """ + out = open(target, "w") + + out.write(getCopyrightNotice()) + out.write(include("mongo/db/fts/unicode/codepoints.h")) + out.write("\n") + out.write(openNamespaces()) + + # Map diacritics from 0 to the maximum Unicode codepoint + add_diacritic_range(0x0000, 0x10FFFF) + + out.write("""char32_t codepointRemoveDiacritics(char32_t codepoint) { + switch (codepoint) {\n""") + + mappings_list = [] + + for mapping in diacritic_mappings: + mappings_list.append((mapping, diacritic_mappings[mapping])) + + sorted_mappings = sorted(mappings_list, key=lambda mapping: mapping[0]) + + for mapping in sorted_mappings: + out.write(" case " + str(hex(mapping[0])) + ": return " + \ + str(hex(mapping[1])) +";\n") + + out.write(" default: return codepoint;\n }\n}") + + out.write(closeNamespaces()) + +if __name__ == "__main__": + if(unidata_version != '8.0.0'): + print("""ERROR: This script must be run with a version of Python that \ + contains the Unicode 8.0.0 Character Database.""") + sys.exit(1) + load_diacritics(sys.argv[1]) + generate(sys.argv[2]) diff --git a/src/mongo/db/fts/unicode/gen_helper.py b/src/mongo/db/fts/unicode/gen_helper.py new file mode 100644 index 00000000000..d3698e7894e --- /dev/null +++ b/src/mongo/db/fts/unicode/gen_helper.py @@ -0,0 +1,39 @@ +def getCopyrightNotice(): + return """/** + * Copyright (C) 2015 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + * + * THIS IS A GENERATED FILE, DO NOT MODIFY. + */\n\n""" + +def openNamespaces(): + return "namespace mongo {\nnamespace unicode {\n\n" + +def closeNamespaces(): + return "\n} // namespace unicode\n} // namespace mongo\n" + +def include(header): + return '#include "' + header + '"\n' diff --git a/src/mongo/db/fts/unicode/string.cpp b/src/mongo/db/fts/unicode/string.cpp new file mode 100644 index 00000000000..24c6ff8027e --- /dev/null +++ b/src/mongo/db/fts/unicode/string.cpp @@ -0,0 +1,157 @@ +/** + * Copyright (C) 2015 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/fts/unicode/string.h" + +#include <algorithm> + +#include "mongo/shell/linenoise_utf8.h" +#include "mongo/util/assert_util.h" + +namespace mongo { +namespace unicode { + +using linenoise_utf8::copyString32to8; +using linenoise_utf8::copyString8to32; + +using std::u32string; + +String::String(const StringData utf8_src) { + // _data is the target, resize it so that it's guaranteed to fit all of the input characters, + // plus a null character if there isn't one. + _data.resize(utf8_src.size() + 1); + + int result = 0; + size_t resultSize = 0; + + // Although utf8_src.rawData() is not guaranteed to be null-terminated, copyString8to32 won't + // access bad memory because it is limited by the size of its output buffer, which is set to the + // size of utf8_src. + copyString8to32(&_data[0], + reinterpret_cast<const unsigned char*>(&utf8_src.rawData()[0]), + _data.size(), + resultSize, + result); + + uassert(28755, "text contains invalid UTF-8", result == 0); + + // Resize _data so it is only as big as what it contains. + _data.resize(resultSize); +} + +String::String(u32string&& src) : _data(std::move(src)) {} + +std::string String::toString() const { + // output is the target, resize it so that it's guaranteed to fit all of the input characters, + // plus a null character if there isn't one. + std::string output(_data.size() * 4 + 1, '\0'); + size_t resultSize = + copyString32to8(reinterpret_cast<unsigned char*>(&output[0]), &_data[0], output.size()); + + // Resize output so it is only as large as what it contains. + output.resize(resultSize); + return output; +} + +size_t String::size() const { + return _data.size(); +} + +const char32_t& String::operator[](int i) const { + return _data[i]; +} + +String String::substr(size_t pos, size_t len) const { + return String(_data.substr(pos, len)); +} + +String String::toLower(CaseFoldMode mode) const { + u32string newdata(_data.size(), 0); + auto index = 0; + for (auto codepoint : _data) { + newdata[index++] = codepointToLower(codepoint, mode); + } + + return String(std::move(newdata)); +} + +String String::removeDiacritics() const { + u32string newdata(_data.size(), 0); + auto index = 0; + for (auto codepoint : _data) { + if (!codepointIsDiacritic(codepoint)) { + newdata[index++] = codepointRemoveDiacritics(codepoint); + } + } + + newdata.resize(index); + return String(std::move(newdata)); +} + +bool String::substrMatch(const String& str, + const String& find, + SubstrMatchOptions options, + CaseFoldMode cfMode) { + // In Turkish, lowercasing needs to be applied first because the letter İ has a different case + // folding mapping than the letter I, but removing diacritics removes the dot from İ. + if (cfMode == CaseFoldMode::kTurkish) { + String cleanStr = str.toLower(cfMode); + String cleanFind = find.toLower(cfMode); + return substrMatch(cleanStr, cleanFind, options | kCaseSensitive, CaseFoldMode::kNormal); + } + + if (options & kDiacriticSensitive) { + if (options & kCaseSensitive) { + // Case sensitive and diacritic sensitive. + return std::search(str._data.cbegin(), + str._data.cend(), + find._data.cbegin(), + find._data.cend(), + [&](char32_t c1, char32_t c2) { return (c1 == c2); }) != + str._data.cend(); + } + + // Case insensitive and diacritic sensitive. + return std::search(str._data.cbegin(), + str._data.cend(), + find._data.cbegin(), + find._data.cend(), + [&](char32_t c1, char32_t c2) { + return (codepointToLower(c1, cfMode) == + codepointToLower(c2, cfMode)); + }) != str._data.cend(); + } + + String cleanStr = str.removeDiacritics(); + String cleanFind = find.removeDiacritics(); + + return substrMatch(cleanStr, cleanFind, options | kDiacriticSensitive, cfMode); +} + +} // namespace unicode +} // namespace mongo diff --git a/src/mongo/db/fts/unicode/string.h b/src/mongo/db/fts/unicode/string.h new file mode 100644 index 00000000000..1fa77af2f3f --- /dev/null +++ b/src/mongo/db/fts/unicode/string.h @@ -0,0 +1,139 @@ +/** + * Copyright (C) 2015 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include <cstdint> +#include <string> + +#include "mongo/base/string_data.h" +#include "mongo/db/fts/unicode/codepoints.h" + +namespace mongo { +namespace unicode { + +/** + * A string class that support basic Unicode functionality such as removing diacritic marks, and + * lowercasing. The String is constructed with UTF-8 source data, and is converted under the hood to + * a u32string (UTF-32) so operations can be easily done with individual Unicode code points. + */ +class String { +public: + String() = default; + +#if defined(_MSC_VER) && _MSC_VER < 1900 + String(String&& other) : _data(std::move(other._data)) {} + + String& operator=(String&& other) { + _data = std::move(other._data); + return *this; + } +#endif + + /** + * Construct a String with UTF-8 source data (supports standard C++ string literals, and + * std::strings). + */ + explicit String(StringData utf8_src); + + /** + * Return a lowercased version of the String instance using the Unicode data in u_data.h. + */ + String toLower(CaseFoldMode mode = CaseFoldMode::kNormal) const; + + /** + * Returns a version of the String instance with diacritics and combining marks removed. + */ + String removeDiacritics() const; + + /** + * Returns a substring of the String instance, using the same semantics as std::string::substr. + */ + String substr(size_t begin, size_t end) const; + + /** + * Returns a UTF-8 encoded std::string version of the String instance. + */ + std::string toString() const; + + /** + * Returns the number Unicode codepoints in the String. + */ + size_t size() const; + + /** + * Returns the Unicode codepoint at index i of the String. + */ + const char32_t& operator[](int i) const; + + /** + * Options for the substrMatch method. + */ + using SubstrMatchOptions = uint8_t; + + /** + * No options (case insensitive and diacritic insensitive). + */ + static const SubstrMatchOptions kNone = 0; + + /** + * Perform case sensitive substring match. + */ + static const SubstrMatchOptions kCaseSensitive = 1 << 0; + + /** + * Perform diacritic sensitive substring match. + */ + static const SubstrMatchOptions kDiacriticSensitive = 1 << 1; + + /** + * Search the string 'str' for the string 'find'. If 'find' exists in 'str', return true, else + * return false. Optionally searches can be made case sensitive and diacritic insensitive. If + * the search is case insensitive, non-Turkish case folding is used unless the + * CaseFoldMode::Turkish is passed to mode. + */ + static bool substrMatch(const String& str, + const String& find, + SubstrMatchOptions options, + CaseFoldMode mode = CaseFoldMode::kNormal); + +private: + /** + * Private constructor used by substr, toLower, and removeDiacritics to build a String from + * UTF-32 data. + */ + String(std::u32string&& src); + + /** + * The underlying UTF-32 data. + */ + std::u32string _data; +}; + +} // namespace unicode +} // namespace mongo diff --git a/src/mongo/db/fts/unicode/string_test.cpp b/src/mongo/db/fts/unicode/string_test.cpp new file mode 100644 index 00000000000..9354f7bf25c --- /dev/null +++ b/src/mongo/db/fts/unicode/string_test.cpp @@ -0,0 +1,187 @@ +/** + * Copyright (C) 2015 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/fts/unicode/string.h" +#include "mongo/shell/linenoise_utf8.h" +#include "mongo/unittest/unittest.h" +#include "mongo/util/text.h" + +#ifdef MSC_VER +// Microsoft VS 2013 does not handle UTF-8 strings in char literal strings, error C4566 +// The Microsoft compiler can be tricked into using UTF-8 strings as follows: +// 1. The file has a UTF-8 BOM +// 2. The string literal is a wide character string literal (ie, prefixed with L) +// at this point. +#define UTF8(x) toUtf8String(L##x) +#else +#define UTF8(x) x +#endif + +namespace mongo { +namespace unicode { + +using linenoise_utf8::copyString32to8; + +TEST(UnicodeString, RemoveDiacritics) { + // NFC Normalized Text. + String test1 = String(UTF8("¿CUÁNTOS AÑOS TIENES TÚ?")); + + // NFD Normalized Text ("Café"). + const char test2[] = {'C', 'a', 'f', 'e', static_cast<char>(0xcc), static_cast<char>(0x81), 0}; + + ASSERT_EQUALS(UTF8("¿CUANTOS ANOS TIENES TU?"), test1.removeDiacritics().toString()); + ASSERT_EQUALS(UTF8("Cafe"), String(test2).removeDiacritics().toString()); +} + +TEST(UnicodeString, CaseFolding) { + String test1 = String(UTF8("СКОЛЬКО ТЕБЕ ЛЕТ?")); + String test2 = String(UTF8("¿CUÁNTOS AÑOS TIENES TÚ?")); + + ASSERT_EQUALS(UTF8("сколько тебе лет?"), test1.toLower().toString()); + ASSERT_EQUALS(UTF8("¿cuántos años tienes tú?"), test2.toLower().toString()); +} + +TEST(UnicodeString, CaseFoldingTurkish) { + String test1 = String(UTF8("KAC YASINDASINIZ")); + String test2 = String(UTF8("KAC YASİNDASİNİZ")); + + ASSERT_EQUALS(UTF8("kac yasındasınız"), test1.toLower(CaseFoldMode::kTurkish).toString()); + ASSERT_EQUALS(UTF8("kac yasindasiniz"), test2.toLower(CaseFoldMode::kTurkish).toString()); +} + +TEST(UnicodeString, CaseFoldingAndRemoveDiacritics) { + // NFC Normalized Text. + String test1 = String(UTF8("Πόσο χρονών είσαι?")); + String test2 = String(UTF8("¿CUÁNTOS AÑOS TIENES TÚ?")); + + // NFD Normalized Text ("CAFÉ"). + const char test3[] = {'C', 'A', 'F', 'E', static_cast<char>(0xcc), static_cast<char>(0x81), 0}; + + ASSERT_EQUALS(UTF8("ποσο χρονων εισαι?"), test1.toLower().removeDiacritics().toString()); + ASSERT_EQUALS(UTF8("¿cuantos anos tienes tu?"), test2.toLower().removeDiacritics().toString()); + ASSERT_EQUALS(UTF8("cafe"), String(test3).toLower().removeDiacritics().toString()); +} + +TEST(UnicodeString, SubstringMatch) { + String str = String(UTF8("Одумайся! Престол свой сохрани; И ярость укроти.")); + + // Case insensitive & diacritic insensitive. + ASSERT(String::substrMatch(str, String(UTF8("ПРЁСТОЛ СВОИ")), String::kNone)); + ASSERT_FALSE(String::substrMatch(str, String(UTF8("Престол сохрани")), String::kNone)); + + // Case sensitive & diacritic insensitive. + ASSERT(String::substrMatch(str, String(UTF8("Одумаися!")), String::kCaseSensitive)); + ASSERT_FALSE(String::substrMatch(str, String(UTF8("одумайся!")), String::kCaseSensitive)); + + // Case insensitive & diacritic sensitive. + ASSERT(String::substrMatch(str, String(UTF8("одумайся!")), String::kDiacriticSensitive)); + ASSERT_FALSE(String::substrMatch(str, String(UTF8("Одумаися!")), String::kDiacriticSensitive)); + + // Case sensitive & diacritic sensitive. + ASSERT(String::substrMatch( + str, String(UTF8("Одумайся!")), String::kDiacriticSensitive | String::kCaseSensitive)); + ASSERT_FALSE(String::substrMatch( + str, String(UTF8("Одумаися!")), String::kDiacriticSensitive | String::kCaseSensitive)); +} + +TEST(UnicodeString, SubstringMatchTurkish) { + String str = String(UTF8("KAÇ YAŞINDASINIZ?")); + + // Case insensitive & diacritic insensitive. + ASSERT(String::substrMatch( + str, String(UTF8("yasındasınız")), String::kNone, CaseFoldMode::kTurkish)); + ASSERT_FALSE(String::substrMatch( + str, String(UTF8("yasindasiniz")), String::kNone, CaseFoldMode::kTurkish)); + + // Case insensitive & diacritic sensitive. + ASSERT(String::substrMatch( + str, String(UTF8("yaşındasınız")), String::kDiacriticSensitive, CaseFoldMode::kTurkish)); + ASSERT_FALSE(String::substrMatch( + str, String(UTF8("yaşindasiniz")), String::kDiacriticSensitive, CaseFoldMode::kTurkish)); +} + +TEST(UnicodeString, BadUTF8) { + // Overlong. + const char invalid1[] = {static_cast<char>(0xC0), static_cast<char>(0xAF), 0}; + + // Invalid code positions. + const char invalid2[] = { + static_cast<char>(0xED), static_cast<char>(0xA0), static_cast<char>(0x80), 0}; + const char invalid3[] = { + static_cast<char>(0xC2), static_cast<char>(0x41), static_cast<char>(0x42), 0}; + const char invalid4[] = {static_cast<char>(0x61), + static_cast<char>(0xF1), + static_cast<char>(0x80), + static_cast<char>(0x80), + static_cast<char>(0xE1), + static_cast<char>(0x80), + static_cast<char>(0xC2), + static_cast<char>(0x62), + static_cast<char>(0x80), + static_cast<char>(0x63), + static_cast<char>(0x80), + static_cast<char>(0xBF), + static_cast<char>(0x64), + 0}; + + ASSERT_THROWS(String test1(invalid1), AssertionException); + ASSERT_THROWS(String test2(invalid2), AssertionException); + ASSERT_THROWS(String test3(invalid3), AssertionException); + ASSERT_THROWS(String test4(invalid4), AssertionException); +} + +TEST(UnicodeString, UTF32ToUTF8) { + std::u32string original; + original.push_back(0x004D); + original.push_back(0x0430); + original.push_back(0x4E8C); + original.push_back(0x10302); + original.push_back(0); + + std::string expected_result; + expected_result.push_back(0x4D); + expected_result.push_back(0xD0); + expected_result.push_back(0xB0); + expected_result.push_back(0xE4); + expected_result.push_back(0xBA); + expected_result.push_back(0x8C); + expected_result.push_back(0xF0); + expected_result.push_back(0x90); + expected_result.push_back(0x8C); + expected_result.push_back(0x82); + expected_result.push_back(0); + + std::string result(11, '\0'); + + copyString32to8(reinterpret_cast<unsigned char*>(&result[0]), &original[0], 11); + + ASSERT_EQUALS(expected_result, result); +} + +} // namespace unicode +} // namespace mongo diff --git a/src/mongo/shell/linenoise_utf8.h b/src/mongo/shell/linenoise_utf8.h index 4bd4c2bdc7e..aab3e6f73f2 100644 --- a/src/mongo/shell/linenoise_utf8.h +++ b/src/mongo/shell/linenoise_utf8.h @@ -31,10 +31,14 @@ #include <memory> #include <string.h> +#if defined(_MSC_VER) && _MSC_VER < 1900 +#include <string> +#endif + namespace linenoise_utf8 { typedef unsigned char UChar8; // UTF-8 octet -typedef unsigned int UChar32; // Unicode code point +typedef char32_t UChar32; // Unicode code point // Error bits (or-ed together) returned from utf8toUChar32string // diff --git a/src/third_party/unicode-8.0.0/CaseFolding.txt b/src/third_party/unicode-8.0.0/CaseFolding.txt new file mode 100644 index 00000000000..0197a6c40fb --- /dev/null +++ b/src/third_party/unicode-8.0.0/CaseFolding.txt @@ -0,0 +1,1414 @@ +# CaseFolding-8.0.0.txt +# Date: 2015-01-13, 18:16:36 GMT [MD] +# +# Unicode Character Database +# Copyright (c) 1991-2015 Unicode, Inc. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# For documentation, see http://www.unicode.org/reports/tr44/ +# +# Case Folding Properties +# +# This file is a supplement to the UnicodeData file. +# It provides a case folding mapping generated from the Unicode Character Database. +# If all characters are mapped according to the full mapping below, then +# case differences (according to UnicodeData.txt and SpecialCasing.txt) +# are eliminated. +# +# The data supports both implementations that require simple case foldings +# (where string lengths don't change), and implementations that allow full case folding +# (where string lengths may grow). Note that where they can be supported, the +# full case foldings are superior: for example, they allow "MASSE" and "Maße" to match. +# +# All code points not listed in this file map to themselves. +# +# NOTE: case folding does not preserve normalization formats! +# +# For information on case folding, including how to have case folding +# preserve normalization formats, see Section 3.13 Default Case Algorithms in +# The Unicode Standard. +# +# ================================================================================ +# Format +# ================================================================================ +# The entries in this file are in the following machine-readable format: +# +# <code>; <status>; <mapping>; # <name> +# +# The status field is: +# C: common case folding, common mappings shared by both simple and full mappings. +# F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces. +# S: simple case folding, mappings to single characters where different from F. +# T: special case for uppercase I and dotted uppercase I +# - For non-Turkic languages, this mapping is normally not used. +# - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters. +# Note that the Turkic mappings do not maintain canonical equivalence without additional processing. +# See the discussions of case mapping in the Unicode Standard for more information. +# +# Usage: +# A. To do a simple case folding, use the mappings with status C + S. +# B. To do a full case folding, use the mappings with status C + F. +# +# The mappings with status T can be used or omitted depending on the desired case-folding +# behavior. (The default option is to exclude them.) +# +# ================================================================= + +# Property: Case_Folding + +# All code points not explicitly listed for Case_Folding +# have the value C for the status field, and the code point itself for the mapping field. + +# ================================================================= +0041; C; 0061; # LATIN CAPITAL LETTER A +0042; C; 0062; # LATIN CAPITAL LETTER B +0043; C; 0063; # LATIN CAPITAL LETTER C +0044; C; 0064; # LATIN CAPITAL LETTER D +0045; C; 0065; # LATIN CAPITAL LETTER E +0046; C; 0066; # LATIN CAPITAL LETTER F +0047; C; 0067; # LATIN CAPITAL LETTER G +0048; C; 0068; # LATIN CAPITAL LETTER H +0049; C; 0069; # LATIN CAPITAL LETTER I +0049; T; 0131; # LATIN CAPITAL LETTER I +004A; C; 006A; # LATIN CAPITAL LETTER J +004B; C; 006B; # LATIN CAPITAL LETTER K +004C; C; 006C; # LATIN CAPITAL LETTER L +004D; C; 006D; # LATIN CAPITAL LETTER M +004E; C; 006E; # LATIN CAPITAL LETTER N +004F; C; 006F; # LATIN CAPITAL LETTER O +0050; C; 0070; # LATIN CAPITAL LETTER P +0051; C; 0071; # LATIN CAPITAL LETTER Q +0052; C; 0072; # LATIN CAPITAL LETTER R +0053; C; 0073; # LATIN CAPITAL LETTER S +0054; C; 0074; # LATIN CAPITAL LETTER T +0055; C; 0075; # LATIN CAPITAL LETTER U +0056; C; 0076; # LATIN CAPITAL LETTER V +0057; C; 0077; # LATIN CAPITAL LETTER W +0058; C; 0078; # LATIN CAPITAL LETTER X +0059; C; 0079; # LATIN CAPITAL LETTER Y +005A; C; 007A; # LATIN CAPITAL LETTER Z +00B5; C; 03BC; # MICRO SIGN +00C0; C; 00E0; # LATIN CAPITAL LETTER A WITH GRAVE +00C1; C; 00E1; # LATIN CAPITAL LETTER A WITH ACUTE +00C2; C; 00E2; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX +00C3; C; 00E3; # LATIN CAPITAL LETTER A WITH TILDE +00C4; C; 00E4; # LATIN CAPITAL LETTER A WITH DIAERESIS +00C5; C; 00E5; # LATIN CAPITAL LETTER A WITH RING ABOVE +00C6; C; 00E6; # LATIN CAPITAL LETTER AE +00C7; C; 00E7; # LATIN CAPITAL LETTER C WITH CEDILLA +00C8; C; 00E8; # LATIN CAPITAL LETTER E WITH GRAVE +00C9; C; 00E9; # LATIN CAPITAL LETTER E WITH ACUTE +00CA; C; 00EA; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX +00CB; C; 00EB; # LATIN CAPITAL LETTER E WITH DIAERESIS +00CC; C; 00EC; # LATIN CAPITAL LETTER I WITH GRAVE +00CD; C; 00ED; # LATIN CAPITAL LETTER I WITH ACUTE +00CE; C; 00EE; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX +00CF; C; 00EF; # LATIN CAPITAL LETTER I WITH DIAERESIS +00D0; C; 00F0; # LATIN CAPITAL LETTER ETH +00D1; C; 00F1; # LATIN CAPITAL LETTER N WITH TILDE +00D2; C; 00F2; # LATIN CAPITAL LETTER O WITH GRAVE +00D3; C; 00F3; # LATIN CAPITAL LETTER O WITH ACUTE +00D4; C; 00F4; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX +00D5; C; 00F5; # LATIN CAPITAL LETTER O WITH TILDE +00D6; C; 00F6; # LATIN CAPITAL LETTER O WITH DIAERESIS +00D8; C; 00F8; # LATIN CAPITAL LETTER O WITH STROKE +00D9; C; 00F9; # LATIN CAPITAL LETTER U WITH GRAVE +00DA; C; 00FA; # LATIN CAPITAL LETTER U WITH ACUTE +00DB; C; 00FB; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX +00DC; C; 00FC; # LATIN CAPITAL LETTER U WITH DIAERESIS +00DD; C; 00FD; # LATIN CAPITAL LETTER Y WITH ACUTE +00DE; C; 00FE; # LATIN CAPITAL LETTER THORN +00DF; F; 0073 0073; # LATIN SMALL LETTER SHARP S +0100; C; 0101; # LATIN CAPITAL LETTER A WITH MACRON +0102; C; 0103; # LATIN CAPITAL LETTER A WITH BREVE +0104; C; 0105; # LATIN CAPITAL LETTER A WITH OGONEK +0106; C; 0107; # LATIN CAPITAL LETTER C WITH ACUTE +0108; C; 0109; # LATIN CAPITAL LETTER C WITH CIRCUMFLEX +010A; C; 010B; # LATIN CAPITAL LETTER C WITH DOT ABOVE +010C; C; 010D; # LATIN CAPITAL LETTER C WITH CARON +010E; C; 010F; # LATIN CAPITAL LETTER D WITH CARON +0110; C; 0111; # LATIN CAPITAL LETTER D WITH STROKE +0112; C; 0113; # LATIN CAPITAL LETTER E WITH MACRON +0114; C; 0115; # LATIN CAPITAL LETTER E WITH BREVE +0116; C; 0117; # LATIN CAPITAL LETTER E WITH DOT ABOVE +0118; C; 0119; # LATIN CAPITAL LETTER E WITH OGONEK +011A; C; 011B; # LATIN CAPITAL LETTER E WITH CARON +011C; C; 011D; # LATIN CAPITAL LETTER G WITH CIRCUMFLEX +011E; C; 011F; # LATIN CAPITAL LETTER G WITH BREVE +0120; C; 0121; # LATIN CAPITAL LETTER G WITH DOT ABOVE +0122; C; 0123; # LATIN CAPITAL LETTER G WITH CEDILLA +0124; C; 0125; # LATIN CAPITAL LETTER H WITH CIRCUMFLEX +0126; C; 0127; # LATIN CAPITAL LETTER H WITH STROKE +0128; C; 0129; # LATIN CAPITAL LETTER I WITH TILDE +012A; C; 012B; # LATIN CAPITAL LETTER I WITH MACRON +012C; C; 012D; # LATIN CAPITAL LETTER I WITH BREVE +012E; C; 012F; # LATIN CAPITAL LETTER I WITH OGONEK +0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE +0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE +0132; C; 0133; # LATIN CAPITAL LIGATURE IJ +0134; C; 0135; # LATIN CAPITAL LETTER J WITH CIRCUMFLEX +0136; C; 0137; # LATIN CAPITAL LETTER K WITH CEDILLA +0139; C; 013A; # LATIN CAPITAL LETTER L WITH ACUTE +013B; C; 013C; # LATIN CAPITAL LETTER L WITH CEDILLA +013D; C; 013E; # LATIN CAPITAL LETTER L WITH CARON +013F; C; 0140; # LATIN CAPITAL LETTER L WITH MIDDLE DOT +0141; C; 0142; # LATIN CAPITAL LETTER L WITH STROKE +0143; C; 0144; # LATIN CAPITAL LETTER N WITH ACUTE +0145; C; 0146; # LATIN CAPITAL LETTER N WITH CEDILLA +0147; C; 0148; # LATIN CAPITAL LETTER N WITH CARON +0149; F; 02BC 006E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE +014A; C; 014B; # LATIN CAPITAL LETTER ENG +014C; C; 014D; # LATIN CAPITAL LETTER O WITH MACRON +014E; C; 014F; # LATIN CAPITAL LETTER O WITH BREVE +0150; C; 0151; # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +0152; C; 0153; # LATIN CAPITAL LIGATURE OE +0154; C; 0155; # LATIN CAPITAL LETTER R WITH ACUTE +0156; C; 0157; # LATIN CAPITAL LETTER R WITH CEDILLA +0158; C; 0159; # LATIN CAPITAL LETTER R WITH CARON +015A; C; 015B; # LATIN CAPITAL LETTER S WITH ACUTE +015C; C; 015D; # LATIN CAPITAL LETTER S WITH CIRCUMFLEX +015E; C; 015F; # LATIN CAPITAL LETTER S WITH CEDILLA +0160; C; 0161; # LATIN CAPITAL LETTER S WITH CARON +0162; C; 0163; # LATIN CAPITAL LETTER T WITH CEDILLA +0164; C; 0165; # LATIN CAPITAL LETTER T WITH CARON +0166; C; 0167; # LATIN CAPITAL LETTER T WITH STROKE +0168; C; 0169; # LATIN CAPITAL LETTER U WITH TILDE +016A; C; 016B; # LATIN CAPITAL LETTER U WITH MACRON +016C; C; 016D; # LATIN CAPITAL LETTER U WITH BREVE +016E; C; 016F; # LATIN CAPITAL LETTER U WITH RING ABOVE +0170; C; 0171; # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +0172; C; 0173; # LATIN CAPITAL LETTER U WITH OGONEK +0174; C; 0175; # LATIN CAPITAL LETTER W WITH CIRCUMFLEX +0176; C; 0177; # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX +0178; C; 00FF; # LATIN CAPITAL LETTER Y WITH DIAERESIS +0179; C; 017A; # LATIN CAPITAL LETTER Z WITH ACUTE +017B; C; 017C; # LATIN CAPITAL LETTER Z WITH DOT ABOVE +017D; C; 017E; # LATIN CAPITAL LETTER Z WITH CARON +017F; C; 0073; # LATIN SMALL LETTER LONG S +0181; C; 0253; # LATIN CAPITAL LETTER B WITH HOOK +0182; C; 0183; # LATIN CAPITAL LETTER B WITH TOPBAR +0184; C; 0185; # LATIN CAPITAL LETTER TONE SIX +0186; C; 0254; # LATIN CAPITAL LETTER OPEN O +0187; C; 0188; # LATIN CAPITAL LETTER C WITH HOOK +0189; C; 0256; # LATIN CAPITAL LETTER AFRICAN D +018A; C; 0257; # LATIN CAPITAL LETTER D WITH HOOK +018B; C; 018C; # LATIN CAPITAL LETTER D WITH TOPBAR +018E; C; 01DD; # LATIN CAPITAL LETTER REVERSED E +018F; C; 0259; # LATIN CAPITAL LETTER SCHWA +0190; C; 025B; # LATIN CAPITAL LETTER OPEN E +0191; C; 0192; # LATIN CAPITAL LETTER F WITH HOOK +0193; C; 0260; # LATIN CAPITAL LETTER G WITH HOOK +0194; C; 0263; # LATIN CAPITAL LETTER GAMMA +0196; C; 0269; # LATIN CAPITAL LETTER IOTA +0197; C; 0268; # LATIN CAPITAL LETTER I WITH STROKE +0198; C; 0199; # LATIN CAPITAL LETTER K WITH HOOK +019C; C; 026F; # LATIN CAPITAL LETTER TURNED M +019D; C; 0272; # LATIN CAPITAL LETTER N WITH LEFT HOOK +019F; C; 0275; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE +01A0; C; 01A1; # LATIN CAPITAL LETTER O WITH HORN +01A2; C; 01A3; # LATIN CAPITAL LETTER OI +01A4; C; 01A5; # LATIN CAPITAL LETTER P WITH HOOK +01A6; C; 0280; # LATIN LETTER YR +01A7; C; 01A8; # LATIN CAPITAL LETTER TONE TWO +01A9; C; 0283; # LATIN CAPITAL LETTER ESH +01AC; C; 01AD; # LATIN CAPITAL LETTER T WITH HOOK +01AE; C; 0288; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK +01AF; C; 01B0; # LATIN CAPITAL LETTER U WITH HORN +01B1; C; 028A; # LATIN CAPITAL LETTER UPSILON +01B2; C; 028B; # LATIN CAPITAL LETTER V WITH HOOK +01B3; C; 01B4; # LATIN CAPITAL LETTER Y WITH HOOK +01B5; C; 01B6; # LATIN CAPITAL LETTER Z WITH STROKE +01B7; C; 0292; # LATIN CAPITAL LETTER EZH +01B8; C; 01B9; # LATIN CAPITAL LETTER EZH REVERSED +01BC; C; 01BD; # LATIN CAPITAL LETTER TONE FIVE +01C4; C; 01C6; # LATIN CAPITAL LETTER DZ WITH CARON +01C5; C; 01C6; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON +01C7; C; 01C9; # LATIN CAPITAL LETTER LJ +01C8; C; 01C9; # LATIN CAPITAL LETTER L WITH SMALL LETTER J +01CA; C; 01CC; # LATIN CAPITAL LETTER NJ +01CB; C; 01CC; # LATIN CAPITAL LETTER N WITH SMALL LETTER J +01CD; C; 01CE; # LATIN CAPITAL LETTER A WITH CARON +01CF; C; 01D0; # LATIN CAPITAL LETTER I WITH CARON +01D1; C; 01D2; # LATIN CAPITAL LETTER O WITH CARON +01D3; C; 01D4; # LATIN CAPITAL LETTER U WITH CARON +01D5; C; 01D6; # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON +01D7; C; 01D8; # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE +01D9; C; 01DA; # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON +01DB; C; 01DC; # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE +01DE; C; 01DF; # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON +01E0; C; 01E1; # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON +01E2; C; 01E3; # LATIN CAPITAL LETTER AE WITH MACRON +01E4; C; 01E5; # LATIN CAPITAL LETTER G WITH STROKE +01E6; C; 01E7; # LATIN CAPITAL LETTER G WITH CARON +01E8; C; 01E9; # LATIN CAPITAL LETTER K WITH CARON +01EA; C; 01EB; # LATIN CAPITAL LETTER O WITH OGONEK +01EC; C; 01ED; # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON +01EE; C; 01EF; # LATIN CAPITAL LETTER EZH WITH CARON +01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON +01F1; C; 01F3; # LATIN CAPITAL LETTER DZ +01F2; C; 01F3; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z +01F4; C; 01F5; # LATIN CAPITAL LETTER G WITH ACUTE +01F6; C; 0195; # LATIN CAPITAL LETTER HWAIR +01F7; C; 01BF; # LATIN CAPITAL LETTER WYNN +01F8; C; 01F9; # LATIN CAPITAL LETTER N WITH GRAVE +01FA; C; 01FB; # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE +01FC; C; 01FD; # LATIN CAPITAL LETTER AE WITH ACUTE +01FE; C; 01FF; # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE +0200; C; 0201; # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE +0202; C; 0203; # LATIN CAPITAL LETTER A WITH INVERTED BREVE +0204; C; 0205; # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE +0206; C; 0207; # LATIN CAPITAL LETTER E WITH INVERTED BREVE +0208; C; 0209; # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE +020A; C; 020B; # LATIN CAPITAL LETTER I WITH INVERTED BREVE +020C; C; 020D; # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE +020E; C; 020F; # LATIN CAPITAL LETTER O WITH INVERTED BREVE +0210; C; 0211; # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE +0212; C; 0213; # LATIN CAPITAL LETTER R WITH INVERTED BREVE +0214; C; 0215; # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE +0216; C; 0217; # LATIN CAPITAL LETTER U WITH INVERTED BREVE +0218; C; 0219; # LATIN CAPITAL LETTER S WITH COMMA BELOW +021A; C; 021B; # LATIN CAPITAL LETTER T WITH COMMA BELOW +021C; C; 021D; # LATIN CAPITAL LETTER YOGH +021E; C; 021F; # LATIN CAPITAL LETTER H WITH CARON +0220; C; 019E; # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG +0222; C; 0223; # LATIN CAPITAL LETTER OU +0224; C; 0225; # LATIN CAPITAL LETTER Z WITH HOOK +0226; C; 0227; # LATIN CAPITAL LETTER A WITH DOT ABOVE +0228; C; 0229; # LATIN CAPITAL LETTER E WITH CEDILLA +022A; C; 022B; # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON +022C; C; 022D; # LATIN CAPITAL LETTER O WITH TILDE AND MACRON +022E; C; 022F; # LATIN CAPITAL LETTER O WITH DOT ABOVE +0230; C; 0231; # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON +0232; C; 0233; # LATIN CAPITAL LETTER Y WITH MACRON +023A; C; 2C65; # LATIN CAPITAL LETTER A WITH STROKE +023B; C; 023C; # LATIN CAPITAL LETTER C WITH STROKE +023D; C; 019A; # LATIN CAPITAL LETTER L WITH BAR +023E; C; 2C66; # LATIN CAPITAL LETTER T WITH DIAGONAL STROKE +0241; C; 0242; # LATIN CAPITAL LETTER GLOTTAL STOP +0243; C; 0180; # LATIN CAPITAL LETTER B WITH STROKE +0244; C; 0289; # LATIN CAPITAL LETTER U BAR +0245; C; 028C; # LATIN CAPITAL LETTER TURNED V +0246; C; 0247; # LATIN CAPITAL LETTER E WITH STROKE +0248; C; 0249; # LATIN CAPITAL LETTER J WITH STROKE +024A; C; 024B; # LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL +024C; C; 024D; # LATIN CAPITAL LETTER R WITH STROKE +024E; C; 024F; # LATIN CAPITAL LETTER Y WITH STROKE +0345; C; 03B9; # COMBINING GREEK YPOGEGRAMMENI +0370; C; 0371; # GREEK CAPITAL LETTER HETA +0372; C; 0373; # GREEK CAPITAL LETTER ARCHAIC SAMPI +0376; C; 0377; # GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA +037F; C; 03F3; # GREEK CAPITAL LETTER YOT +0386; C; 03AC; # GREEK CAPITAL LETTER ALPHA WITH TONOS +0388; C; 03AD; # GREEK CAPITAL LETTER EPSILON WITH TONOS +0389; C; 03AE; # GREEK CAPITAL LETTER ETA WITH TONOS +038A; C; 03AF; # GREEK CAPITAL LETTER IOTA WITH TONOS +038C; C; 03CC; # GREEK CAPITAL LETTER OMICRON WITH TONOS +038E; C; 03CD; # GREEK CAPITAL LETTER UPSILON WITH TONOS +038F; C; 03CE; # GREEK CAPITAL LETTER OMEGA WITH TONOS +0390; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS +0391; C; 03B1; # GREEK CAPITAL LETTER ALPHA +0392; C; 03B2; # GREEK CAPITAL LETTER BETA +0393; C; 03B3; # GREEK CAPITAL LETTER GAMMA +0394; C; 03B4; # GREEK CAPITAL LETTER DELTA +0395; C; 03B5; # GREEK CAPITAL LETTER EPSILON +0396; C; 03B6; # GREEK CAPITAL LETTER ZETA +0397; C; 03B7; # GREEK CAPITAL LETTER ETA +0398; C; 03B8; # GREEK CAPITAL LETTER THETA +0399; C; 03B9; # GREEK CAPITAL LETTER IOTA +039A; C; 03BA; # GREEK CAPITAL LETTER KAPPA +039B; C; 03BB; # GREEK CAPITAL LETTER LAMDA +039C; C; 03BC; # GREEK CAPITAL LETTER MU +039D; C; 03BD; # GREEK CAPITAL LETTER NU +039E; C; 03BE; # GREEK CAPITAL LETTER XI +039F; C; 03BF; # GREEK CAPITAL LETTER OMICRON +03A0; C; 03C0; # GREEK CAPITAL LETTER PI +03A1; C; 03C1; # GREEK CAPITAL LETTER RHO +03A3; C; 03C3; # GREEK CAPITAL LETTER SIGMA +03A4; C; 03C4; # GREEK CAPITAL LETTER TAU +03A5; C; 03C5; # GREEK CAPITAL LETTER UPSILON +03A6; C; 03C6; # GREEK CAPITAL LETTER PHI +03A7; C; 03C7; # GREEK CAPITAL LETTER CHI +03A8; C; 03C8; # GREEK CAPITAL LETTER PSI +03A9; C; 03C9; # GREEK CAPITAL LETTER OMEGA +03AA; C; 03CA; # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA +03AB; C; 03CB; # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA +03B0; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS +03C2; C; 03C3; # GREEK SMALL LETTER FINAL SIGMA +03CF; C; 03D7; # GREEK CAPITAL KAI SYMBOL +03D0; C; 03B2; # GREEK BETA SYMBOL +03D1; C; 03B8; # GREEK THETA SYMBOL +03D5; C; 03C6; # GREEK PHI SYMBOL +03D6; C; 03C0; # GREEK PI SYMBOL +03D8; C; 03D9; # GREEK LETTER ARCHAIC KOPPA +03DA; C; 03DB; # GREEK LETTER STIGMA +03DC; C; 03DD; # GREEK LETTER DIGAMMA +03DE; C; 03DF; # GREEK LETTER KOPPA +03E0; C; 03E1; # GREEK LETTER SAMPI +03E2; C; 03E3; # COPTIC CAPITAL LETTER SHEI +03E4; C; 03E5; # COPTIC CAPITAL LETTER FEI +03E6; C; 03E7; # COPTIC CAPITAL LETTER KHEI +03E8; C; 03E9; # COPTIC CAPITAL LETTER HORI +03EA; C; 03EB; # COPTIC CAPITAL LETTER GANGIA +03EC; C; 03ED; # COPTIC CAPITAL LETTER SHIMA +03EE; C; 03EF; # COPTIC CAPITAL LETTER DEI +03F0; C; 03BA; # GREEK KAPPA SYMBOL +03F1; C; 03C1; # GREEK RHO SYMBOL +03F4; C; 03B8; # GREEK CAPITAL THETA SYMBOL +03F5; C; 03B5; # GREEK LUNATE EPSILON SYMBOL +03F7; C; 03F8; # GREEK CAPITAL LETTER SHO +03F9; C; 03F2; # GREEK CAPITAL LUNATE SIGMA SYMBOL +03FA; C; 03FB; # GREEK CAPITAL LETTER SAN +03FD; C; 037B; # GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL +03FE; C; 037C; # GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL +03FF; C; 037D; # GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL +0400; C; 0450; # CYRILLIC CAPITAL LETTER IE WITH GRAVE +0401; C; 0451; # CYRILLIC CAPITAL LETTER IO +0402; C; 0452; # CYRILLIC CAPITAL LETTER DJE +0403; C; 0453; # CYRILLIC CAPITAL LETTER GJE +0404; C; 0454; # CYRILLIC CAPITAL LETTER UKRAINIAN IE +0405; C; 0455; # CYRILLIC CAPITAL LETTER DZE +0406; C; 0456; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I +0407; C; 0457; # CYRILLIC CAPITAL LETTER YI +0408; C; 0458; # CYRILLIC CAPITAL LETTER JE +0409; C; 0459; # CYRILLIC CAPITAL LETTER LJE +040A; C; 045A; # CYRILLIC CAPITAL LETTER NJE +040B; C; 045B; # CYRILLIC CAPITAL LETTER TSHE +040C; C; 045C; # CYRILLIC CAPITAL LETTER KJE +040D; C; 045D; # CYRILLIC CAPITAL LETTER I WITH GRAVE +040E; C; 045E; # CYRILLIC CAPITAL LETTER SHORT U +040F; C; 045F; # CYRILLIC CAPITAL LETTER DZHE +0410; C; 0430; # CYRILLIC CAPITAL LETTER A +0411; C; 0431; # CYRILLIC CAPITAL LETTER BE +0412; C; 0432; # CYRILLIC CAPITAL LETTER VE +0413; C; 0433; # CYRILLIC CAPITAL LETTER GHE +0414; C; 0434; # CYRILLIC CAPITAL LETTER DE +0415; C; 0435; # CYRILLIC CAPITAL LETTER IE +0416; C; 0436; # CYRILLIC CAPITAL LETTER ZHE +0417; C; 0437; # CYRILLIC CAPITAL LETTER ZE +0418; C; 0438; # CYRILLIC CAPITAL LETTER I +0419; C; 0439; # CYRILLIC CAPITAL LETTER SHORT I +041A; C; 043A; # CYRILLIC CAPITAL LETTER KA +041B; C; 043B; # CYRILLIC CAPITAL LETTER EL +041C; C; 043C; # CYRILLIC CAPITAL LETTER EM +041D; C; 043D; # CYRILLIC CAPITAL LETTER EN +041E; C; 043E; # CYRILLIC CAPITAL LETTER O +041F; C; 043F; # CYRILLIC CAPITAL LETTER PE +0420; C; 0440; # CYRILLIC CAPITAL LETTER ER +0421; C; 0441; # CYRILLIC CAPITAL LETTER ES +0422; C; 0442; # CYRILLIC CAPITAL LETTER TE +0423; C; 0443; # CYRILLIC CAPITAL LETTER U +0424; C; 0444; # CYRILLIC CAPITAL LETTER EF +0425; C; 0445; # CYRILLIC CAPITAL LETTER HA +0426; C; 0446; # CYRILLIC CAPITAL LETTER TSE +0427; C; 0447; # CYRILLIC CAPITAL LETTER CHE +0428; C; 0448; # CYRILLIC CAPITAL LETTER SHA +0429; C; 0449; # CYRILLIC CAPITAL LETTER SHCHA +042A; C; 044A; # CYRILLIC CAPITAL LETTER HARD SIGN +042B; C; 044B; # CYRILLIC CAPITAL LETTER YERU +042C; C; 044C; # CYRILLIC CAPITAL LETTER SOFT SIGN +042D; C; 044D; # CYRILLIC CAPITAL LETTER E +042E; C; 044E; # CYRILLIC CAPITAL LETTER YU +042F; C; 044F; # CYRILLIC CAPITAL LETTER YA +0460; C; 0461; # CYRILLIC CAPITAL LETTER OMEGA +0462; C; 0463; # CYRILLIC CAPITAL LETTER YAT +0464; C; 0465; # CYRILLIC CAPITAL LETTER IOTIFIED E +0466; C; 0467; # CYRILLIC CAPITAL LETTER LITTLE YUS +0468; C; 0469; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS +046A; C; 046B; # CYRILLIC CAPITAL LETTER BIG YUS +046C; C; 046D; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS +046E; C; 046F; # CYRILLIC CAPITAL LETTER KSI +0470; C; 0471; # CYRILLIC CAPITAL LETTER PSI +0472; C; 0473; # CYRILLIC CAPITAL LETTER FITA +0474; C; 0475; # CYRILLIC CAPITAL LETTER IZHITSA +0476; C; 0477; # CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT +0478; C; 0479; # CYRILLIC CAPITAL LETTER UK +047A; C; 047B; # CYRILLIC CAPITAL LETTER ROUND OMEGA +047C; C; 047D; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO +047E; C; 047F; # CYRILLIC CAPITAL LETTER OT +0480; C; 0481; # CYRILLIC CAPITAL LETTER KOPPA +048A; C; 048B; # CYRILLIC CAPITAL LETTER SHORT I WITH TAIL +048C; C; 048D; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN +048E; C; 048F; # CYRILLIC CAPITAL LETTER ER WITH TICK +0490; C; 0491; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN +0492; C; 0493; # CYRILLIC CAPITAL LETTER GHE WITH STROKE +0494; C; 0495; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK +0496; C; 0497; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER +0498; C; 0499; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER +049A; C; 049B; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER +049C; C; 049D; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE +049E; C; 049F; # CYRILLIC CAPITAL LETTER KA WITH STROKE +04A0; C; 04A1; # CYRILLIC CAPITAL LETTER BASHKIR KA +04A2; C; 04A3; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER +04A4; C; 04A5; # CYRILLIC CAPITAL LIGATURE EN GHE +04A6; C; 04A7; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK +04A8; C; 04A9; # CYRILLIC CAPITAL LETTER ABKHASIAN HA +04AA; C; 04AB; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER +04AC; C; 04AD; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER +04AE; C; 04AF; # CYRILLIC CAPITAL LETTER STRAIGHT U +04B0; C; 04B1; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE +04B2; C; 04B3; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER +04B4; C; 04B5; # CYRILLIC CAPITAL LIGATURE TE TSE +04B6; C; 04B7; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER +04B8; C; 04B9; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE +04BA; C; 04BB; # CYRILLIC CAPITAL LETTER SHHA +04BC; C; 04BD; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE +04BE; C; 04BF; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER +04C0; C; 04CF; # CYRILLIC LETTER PALOCHKA +04C1; C; 04C2; # CYRILLIC CAPITAL LETTER ZHE WITH BREVE +04C3; C; 04C4; # CYRILLIC CAPITAL LETTER KA WITH HOOK +04C5; C; 04C6; # CYRILLIC CAPITAL LETTER EL WITH TAIL +04C7; C; 04C8; # CYRILLIC CAPITAL LETTER EN WITH HOOK +04C9; C; 04CA; # CYRILLIC CAPITAL LETTER EN WITH TAIL +04CB; C; 04CC; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE +04CD; C; 04CE; # CYRILLIC CAPITAL LETTER EM WITH TAIL +04D0; C; 04D1; # CYRILLIC CAPITAL LETTER A WITH BREVE +04D2; C; 04D3; # CYRILLIC CAPITAL LETTER A WITH DIAERESIS +04D4; C; 04D5; # CYRILLIC CAPITAL LIGATURE A IE +04D6; C; 04D7; # CYRILLIC CAPITAL LETTER IE WITH BREVE +04D8; C; 04D9; # CYRILLIC CAPITAL LETTER SCHWA +04DA; C; 04DB; # CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS +04DC; C; 04DD; # CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS +04DE; C; 04DF; # CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS +04E0; C; 04E1; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE +04E2; C; 04E3; # CYRILLIC CAPITAL LETTER I WITH MACRON +04E4; C; 04E5; # CYRILLIC CAPITAL LETTER I WITH DIAERESIS +04E6; C; 04E7; # CYRILLIC CAPITAL LETTER O WITH DIAERESIS +04E8; C; 04E9; # CYRILLIC CAPITAL LETTER BARRED O +04EA; C; 04EB; # CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS +04EC; C; 04ED; # CYRILLIC CAPITAL LETTER E WITH DIAERESIS +04EE; C; 04EF; # CYRILLIC CAPITAL LETTER U WITH MACRON +04F0; C; 04F1; # CYRILLIC CAPITAL LETTER U WITH DIAERESIS +04F2; C; 04F3; # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE +04F4; C; 04F5; # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS +04F6; C; 04F7; # CYRILLIC CAPITAL LETTER GHE WITH DESCENDER +04F8; C; 04F9; # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS +04FA; C; 04FB; # CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK +04FC; C; 04FD; # CYRILLIC CAPITAL LETTER HA WITH HOOK +04FE; C; 04FF; # CYRILLIC CAPITAL LETTER HA WITH STROKE +0500; C; 0501; # CYRILLIC CAPITAL LETTER KOMI DE +0502; C; 0503; # CYRILLIC CAPITAL LETTER KOMI DJE +0504; C; 0505; # CYRILLIC CAPITAL LETTER KOMI ZJE +0506; C; 0507; # CYRILLIC CAPITAL LETTER KOMI DZJE +0508; C; 0509; # CYRILLIC CAPITAL LETTER KOMI LJE +050A; C; 050B; # CYRILLIC CAPITAL LETTER KOMI NJE +050C; C; 050D; # CYRILLIC CAPITAL LETTER KOMI SJE +050E; C; 050F; # CYRILLIC CAPITAL LETTER KOMI TJE +0510; C; 0511; # CYRILLIC CAPITAL LETTER REVERSED ZE +0512; C; 0513; # CYRILLIC CAPITAL LETTER EL WITH HOOK +0514; C; 0515; # CYRILLIC CAPITAL LETTER LHA +0516; C; 0517; # CYRILLIC CAPITAL LETTER RHA +0518; C; 0519; # CYRILLIC CAPITAL LETTER YAE +051A; C; 051B; # CYRILLIC CAPITAL LETTER QA +051C; C; 051D; # CYRILLIC CAPITAL LETTER WE +051E; C; 051F; # CYRILLIC CAPITAL LETTER ALEUT KA +0520; C; 0521; # CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK +0522; C; 0523; # CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK +0524; C; 0525; # CYRILLIC CAPITAL LETTER PE WITH DESCENDER +0526; C; 0527; # CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER +0528; C; 0529; # CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK +052A; C; 052B; # CYRILLIC CAPITAL LETTER DZZHE +052C; C; 052D; # CYRILLIC CAPITAL LETTER DCHE +052E; C; 052F; # CYRILLIC CAPITAL LETTER EL WITH DESCENDER +0531; C; 0561; # ARMENIAN CAPITAL LETTER AYB +0532; C; 0562; # ARMENIAN CAPITAL LETTER BEN +0533; C; 0563; # ARMENIAN CAPITAL LETTER GIM +0534; C; 0564; # ARMENIAN CAPITAL LETTER DA +0535; C; 0565; # ARMENIAN CAPITAL LETTER ECH +0536; C; 0566; # ARMENIAN CAPITAL LETTER ZA +0537; C; 0567; # ARMENIAN CAPITAL LETTER EH +0538; C; 0568; # ARMENIAN CAPITAL LETTER ET +0539; C; 0569; # ARMENIAN CAPITAL LETTER TO +053A; C; 056A; # ARMENIAN CAPITAL LETTER ZHE +053B; C; 056B; # ARMENIAN CAPITAL LETTER INI +053C; C; 056C; # ARMENIAN CAPITAL LETTER LIWN +053D; C; 056D; # ARMENIAN CAPITAL LETTER XEH +053E; C; 056E; # ARMENIAN CAPITAL LETTER CA +053F; C; 056F; # ARMENIAN CAPITAL LETTER KEN +0540; C; 0570; # ARMENIAN CAPITAL LETTER HO +0541; C; 0571; # ARMENIAN CAPITAL LETTER JA +0542; C; 0572; # ARMENIAN CAPITAL LETTER GHAD +0543; C; 0573; # ARMENIAN CAPITAL LETTER CHEH +0544; C; 0574; # ARMENIAN CAPITAL LETTER MEN +0545; C; 0575; # ARMENIAN CAPITAL LETTER YI +0546; C; 0576; # ARMENIAN CAPITAL LETTER NOW +0547; C; 0577; # ARMENIAN CAPITAL LETTER SHA +0548; C; 0578; # ARMENIAN CAPITAL LETTER VO +0549; C; 0579; # ARMENIAN CAPITAL LETTER CHA +054A; C; 057A; # ARMENIAN CAPITAL LETTER PEH +054B; C; 057B; # ARMENIAN CAPITAL LETTER JHEH +054C; C; 057C; # ARMENIAN CAPITAL LETTER RA +054D; C; 057D; # ARMENIAN CAPITAL LETTER SEH +054E; C; 057E; # ARMENIAN CAPITAL LETTER VEW +054F; C; 057F; # ARMENIAN CAPITAL LETTER TIWN +0550; C; 0580; # ARMENIAN CAPITAL LETTER REH +0551; C; 0581; # ARMENIAN CAPITAL LETTER CO +0552; C; 0582; # ARMENIAN CAPITAL LETTER YIWN +0553; C; 0583; # ARMENIAN CAPITAL LETTER PIWR +0554; C; 0584; # ARMENIAN CAPITAL LETTER KEH +0555; C; 0585; # ARMENIAN CAPITAL LETTER OH +0556; C; 0586; # ARMENIAN CAPITAL LETTER FEH +0587; F; 0565 0582; # ARMENIAN SMALL LIGATURE ECH YIWN +10A0; C; 2D00; # GEORGIAN CAPITAL LETTER AN +10A1; C; 2D01; # GEORGIAN CAPITAL LETTER BAN +10A2; C; 2D02; # GEORGIAN CAPITAL LETTER GAN +10A3; C; 2D03; # GEORGIAN CAPITAL LETTER DON +10A4; C; 2D04; # GEORGIAN CAPITAL LETTER EN +10A5; C; 2D05; # GEORGIAN CAPITAL LETTER VIN +10A6; C; 2D06; # GEORGIAN CAPITAL LETTER ZEN +10A7; C; 2D07; # GEORGIAN CAPITAL LETTER TAN +10A8; C; 2D08; # GEORGIAN CAPITAL LETTER IN +10A9; C; 2D09; # GEORGIAN CAPITAL LETTER KAN +10AA; C; 2D0A; # GEORGIAN CAPITAL LETTER LAS +10AB; C; 2D0B; # GEORGIAN CAPITAL LETTER MAN +10AC; C; 2D0C; # GEORGIAN CAPITAL LETTER NAR +10AD; C; 2D0D; # GEORGIAN CAPITAL LETTER ON +10AE; C; 2D0E; # GEORGIAN CAPITAL LETTER PAR +10AF; C; 2D0F; # GEORGIAN CAPITAL LETTER ZHAR +10B0; C; 2D10; # GEORGIAN CAPITAL LETTER RAE +10B1; C; 2D11; # GEORGIAN CAPITAL LETTER SAN +10B2; C; 2D12; # GEORGIAN CAPITAL LETTER TAR +10B3; C; 2D13; # GEORGIAN CAPITAL LETTER UN +10B4; C; 2D14; # GEORGIAN CAPITAL LETTER PHAR +10B5; C; 2D15; # GEORGIAN CAPITAL LETTER KHAR +10B6; C; 2D16; # GEORGIAN CAPITAL LETTER GHAN +10B7; C; 2D17; # GEORGIAN CAPITAL LETTER QAR +10B8; C; 2D18; # GEORGIAN CAPITAL LETTER SHIN +10B9; C; 2D19; # GEORGIAN CAPITAL LETTER CHIN +10BA; C; 2D1A; # GEORGIAN CAPITAL LETTER CAN +10BB; C; 2D1B; # GEORGIAN CAPITAL LETTER JIL +10BC; C; 2D1C; # GEORGIAN CAPITAL LETTER CIL +10BD; C; 2D1D; # GEORGIAN CAPITAL LETTER CHAR +10BE; C; 2D1E; # GEORGIAN CAPITAL LETTER XAN +10BF; C; 2D1F; # GEORGIAN CAPITAL LETTER JHAN +10C0; C; 2D20; # GEORGIAN CAPITAL LETTER HAE +10C1; C; 2D21; # GEORGIAN CAPITAL LETTER HE +10C2; C; 2D22; # GEORGIAN CAPITAL LETTER HIE +10C3; C; 2D23; # GEORGIAN CAPITAL LETTER WE +10C4; C; 2D24; # GEORGIAN CAPITAL LETTER HAR +10C5; C; 2D25; # GEORGIAN CAPITAL LETTER HOE +10C7; C; 2D27; # GEORGIAN CAPITAL LETTER YN +10CD; C; 2D2D; # GEORGIAN CAPITAL LETTER AEN +13F8; C; 13F0; # CHEROKEE SMALL LETTER YE +13F9; C; 13F1; # CHEROKEE SMALL LETTER YI +13FA; C; 13F2; # CHEROKEE SMALL LETTER YO +13FB; C; 13F3; # CHEROKEE SMALL LETTER YU +13FC; C; 13F4; # CHEROKEE SMALL LETTER YV +13FD; C; 13F5; # CHEROKEE SMALL LETTER MV +1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW +1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE +1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW +1E06; C; 1E07; # LATIN CAPITAL LETTER B WITH LINE BELOW +1E08; C; 1E09; # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE +1E0A; C; 1E0B; # LATIN CAPITAL LETTER D WITH DOT ABOVE +1E0C; C; 1E0D; # LATIN CAPITAL LETTER D WITH DOT BELOW +1E0E; C; 1E0F; # LATIN CAPITAL LETTER D WITH LINE BELOW +1E10; C; 1E11; # LATIN CAPITAL LETTER D WITH CEDILLA +1E12; C; 1E13; # LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW +1E14; C; 1E15; # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE +1E16; C; 1E17; # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE +1E18; C; 1E19; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW +1E1A; C; 1E1B; # LATIN CAPITAL LETTER E WITH TILDE BELOW +1E1C; C; 1E1D; # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE +1E1E; C; 1E1F; # LATIN CAPITAL LETTER F WITH DOT ABOVE +1E20; C; 1E21; # LATIN CAPITAL LETTER G WITH MACRON +1E22; C; 1E23; # LATIN CAPITAL LETTER H WITH DOT ABOVE +1E24; C; 1E25; # LATIN CAPITAL LETTER H WITH DOT BELOW +1E26; C; 1E27; # LATIN CAPITAL LETTER H WITH DIAERESIS +1E28; C; 1E29; # LATIN CAPITAL LETTER H WITH CEDILLA +1E2A; C; 1E2B; # LATIN CAPITAL LETTER H WITH BREVE BELOW +1E2C; C; 1E2D; # LATIN CAPITAL LETTER I WITH TILDE BELOW +1E2E; C; 1E2F; # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE +1E30; C; 1E31; # LATIN CAPITAL LETTER K WITH ACUTE +1E32; C; 1E33; # LATIN CAPITAL LETTER K WITH DOT BELOW +1E34; C; 1E35; # LATIN CAPITAL LETTER K WITH LINE BELOW +1E36; C; 1E37; # LATIN CAPITAL LETTER L WITH DOT BELOW +1E38; C; 1E39; # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON +1E3A; C; 1E3B; # LATIN CAPITAL LETTER L WITH LINE BELOW +1E3C; C; 1E3D; # LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW +1E3E; C; 1E3F; # LATIN CAPITAL LETTER M WITH ACUTE +1E40; C; 1E41; # LATIN CAPITAL LETTER M WITH DOT ABOVE +1E42; C; 1E43; # LATIN CAPITAL LETTER M WITH DOT BELOW +1E44; C; 1E45; # LATIN CAPITAL LETTER N WITH DOT ABOVE +1E46; C; 1E47; # LATIN CAPITAL LETTER N WITH DOT BELOW +1E48; C; 1E49; # LATIN CAPITAL LETTER N WITH LINE BELOW +1E4A; C; 1E4B; # LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW +1E4C; C; 1E4D; # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE +1E4E; C; 1E4F; # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS +1E50; C; 1E51; # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE +1E52; C; 1E53; # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE +1E54; C; 1E55; # LATIN CAPITAL LETTER P WITH ACUTE +1E56; C; 1E57; # LATIN CAPITAL LETTER P WITH DOT ABOVE +1E58; C; 1E59; # LATIN CAPITAL LETTER R WITH DOT ABOVE +1E5A; C; 1E5B; # LATIN CAPITAL LETTER R WITH DOT BELOW +1E5C; C; 1E5D; # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON +1E5E; C; 1E5F; # LATIN CAPITAL LETTER R WITH LINE BELOW +1E60; C; 1E61; # LATIN CAPITAL LETTER S WITH DOT ABOVE +1E62; C; 1E63; # LATIN CAPITAL LETTER S WITH DOT BELOW +1E64; C; 1E65; # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE +1E66; C; 1E67; # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE +1E68; C; 1E69; # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE +1E6A; C; 1E6B; # LATIN CAPITAL LETTER T WITH DOT ABOVE +1E6C; C; 1E6D; # LATIN CAPITAL LETTER T WITH DOT BELOW +1E6E; C; 1E6F; # LATIN CAPITAL LETTER T WITH LINE BELOW +1E70; C; 1E71; # LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW +1E72; C; 1E73; # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW +1E74; C; 1E75; # LATIN CAPITAL LETTER U WITH TILDE BELOW +1E76; C; 1E77; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW +1E78; C; 1E79; # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE +1E7A; C; 1E7B; # LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS +1E7C; C; 1E7D; # LATIN CAPITAL LETTER V WITH TILDE +1E7E; C; 1E7F; # LATIN CAPITAL LETTER V WITH DOT BELOW +1E80; C; 1E81; # LATIN CAPITAL LETTER W WITH GRAVE +1E82; C; 1E83; # LATIN CAPITAL LETTER W WITH ACUTE +1E84; C; 1E85; # LATIN CAPITAL LETTER W WITH DIAERESIS +1E86; C; 1E87; # LATIN CAPITAL LETTER W WITH DOT ABOVE +1E88; C; 1E89; # LATIN CAPITAL LETTER W WITH DOT BELOW +1E8A; C; 1E8B; # LATIN CAPITAL LETTER X WITH DOT ABOVE +1E8C; C; 1E8D; # LATIN CAPITAL LETTER X WITH DIAERESIS +1E8E; C; 1E8F; # LATIN CAPITAL LETTER Y WITH DOT ABOVE +1E90; C; 1E91; # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX +1E92; C; 1E93; # LATIN CAPITAL LETTER Z WITH DOT BELOW +1E94; C; 1E95; # LATIN CAPITAL LETTER Z WITH LINE BELOW +1E96; F; 0068 0331; # LATIN SMALL LETTER H WITH LINE BELOW +1E97; F; 0074 0308; # LATIN SMALL LETTER T WITH DIAERESIS +1E98; F; 0077 030A; # LATIN SMALL LETTER W WITH RING ABOVE +1E99; F; 0079 030A; # LATIN SMALL LETTER Y WITH RING ABOVE +1E9A; F; 0061 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING +1E9B; C; 1E61; # LATIN SMALL LETTER LONG S WITH DOT ABOVE +1E9E; F; 0073 0073; # LATIN CAPITAL LETTER SHARP S +1E9E; S; 00DF; # LATIN CAPITAL LETTER SHARP S +1EA0; C; 1EA1; # LATIN CAPITAL LETTER A WITH DOT BELOW +1EA2; C; 1EA3; # LATIN CAPITAL LETTER A WITH HOOK ABOVE +1EA4; C; 1EA5; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE +1EA6; C; 1EA7; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE +1EA8; C; 1EA9; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE +1EAA; C; 1EAB; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE +1EAC; C; 1EAD; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW +1EAE; C; 1EAF; # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE +1EB0; C; 1EB1; # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE +1EB2; C; 1EB3; # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE +1EB4; C; 1EB5; # LATIN CAPITAL LETTER A WITH BREVE AND TILDE +1EB6; C; 1EB7; # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW +1EB8; C; 1EB9; # LATIN CAPITAL LETTER E WITH DOT BELOW +1EBA; C; 1EBB; # LATIN CAPITAL LETTER E WITH HOOK ABOVE +1EBC; C; 1EBD; # LATIN CAPITAL LETTER E WITH TILDE +1EBE; C; 1EBF; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE +1EC0; C; 1EC1; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE +1EC2; C; 1EC3; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE +1EC4; C; 1EC5; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE +1EC6; C; 1EC7; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW +1EC8; C; 1EC9; # LATIN CAPITAL LETTER I WITH HOOK ABOVE +1ECA; C; 1ECB; # LATIN CAPITAL LETTER I WITH DOT BELOW +1ECC; C; 1ECD; # LATIN CAPITAL LETTER O WITH DOT BELOW +1ECE; C; 1ECF; # LATIN CAPITAL LETTER O WITH HOOK ABOVE +1ED0; C; 1ED1; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE +1ED2; C; 1ED3; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE +1ED4; C; 1ED5; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE +1ED6; C; 1ED7; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE +1ED8; C; 1ED9; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW +1EDA; C; 1EDB; # LATIN CAPITAL LETTER O WITH HORN AND ACUTE +1EDC; C; 1EDD; # LATIN CAPITAL LETTER O WITH HORN AND GRAVE +1EDE; C; 1EDF; # LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE +1EE0; C; 1EE1; # LATIN CAPITAL LETTER O WITH HORN AND TILDE +1EE2; C; 1EE3; # LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW +1EE4; C; 1EE5; # LATIN CAPITAL LETTER U WITH DOT BELOW +1EE6; C; 1EE7; # LATIN CAPITAL LETTER U WITH HOOK ABOVE +1EE8; C; 1EE9; # LATIN CAPITAL LETTER U WITH HORN AND ACUTE +1EEA; C; 1EEB; # LATIN CAPITAL LETTER U WITH HORN AND GRAVE +1EEC; C; 1EED; # LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE +1EEE; C; 1EEF; # LATIN CAPITAL LETTER U WITH HORN AND TILDE +1EF0; C; 1EF1; # LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW +1EF2; C; 1EF3; # LATIN CAPITAL LETTER Y WITH GRAVE +1EF4; C; 1EF5; # LATIN CAPITAL LETTER Y WITH DOT BELOW +1EF6; C; 1EF7; # LATIN CAPITAL LETTER Y WITH HOOK ABOVE +1EF8; C; 1EF9; # LATIN CAPITAL LETTER Y WITH TILDE +1EFA; C; 1EFB; # LATIN CAPITAL LETTER MIDDLE-WELSH LL +1EFC; C; 1EFD; # LATIN CAPITAL LETTER MIDDLE-WELSH V +1EFE; C; 1EFF; # LATIN CAPITAL LETTER Y WITH LOOP +1F08; C; 1F00; # GREEK CAPITAL LETTER ALPHA WITH PSILI +1F09; C; 1F01; # GREEK CAPITAL LETTER ALPHA WITH DASIA +1F0A; C; 1F02; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA +1F0B; C; 1F03; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA +1F0C; C; 1F04; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA +1F0D; C; 1F05; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA +1F0E; C; 1F06; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI +1F0F; C; 1F07; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI +1F18; C; 1F10; # GREEK CAPITAL LETTER EPSILON WITH PSILI +1F19; C; 1F11; # GREEK CAPITAL LETTER EPSILON WITH DASIA +1F1A; C; 1F12; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA +1F1B; C; 1F13; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA +1F1C; C; 1F14; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA +1F1D; C; 1F15; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F28; C; 1F20; # GREEK CAPITAL LETTER ETA WITH PSILI +1F29; C; 1F21; # GREEK CAPITAL LETTER ETA WITH DASIA +1F2A; C; 1F22; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA +1F2B; C; 1F23; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA +1F2C; C; 1F24; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA +1F2D; C; 1F25; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA +1F2E; C; 1F26; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI +1F2F; C; 1F27; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI +1F38; C; 1F30; # GREEK CAPITAL LETTER IOTA WITH PSILI +1F39; C; 1F31; # GREEK CAPITAL LETTER IOTA WITH DASIA +1F3A; C; 1F32; # GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA +1F3B; C; 1F33; # GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA +1F3C; C; 1F34; # GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA +1F3D; C; 1F35; # GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA +1F3E; C; 1F36; # GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI +1F3F; C; 1F37; # GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI +1F48; C; 1F40; # GREEK CAPITAL LETTER OMICRON WITH PSILI +1F49; C; 1F41; # GREEK CAPITAL LETTER OMICRON WITH DASIA +1F4A; C; 1F42; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA +1F4B; C; 1F43; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA +1F4C; C; 1F44; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA +1F4D; C; 1F45; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F50; F; 03C5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI +1F52; F; 03C5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA +1F54; F; 03C5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA +1F56; F; 03C5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI +1F59; C; 1F51; # GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B; C; 1F53; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D; C; 1F55; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F; C; 1F57; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F68; C; 1F60; # GREEK CAPITAL LETTER OMEGA WITH PSILI +1F69; C; 1F61; # GREEK CAPITAL LETTER OMEGA WITH DASIA +1F6A; C; 1F62; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA +1F6B; C; 1F63; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA +1F6C; C; 1F64; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA +1F6D; C; 1F65; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA +1F6E; C; 1F66; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI +1F6F; C; 1F67; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI +1F80; F; 1F00 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI +1F81; F; 1F01 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI +1F82; F; 1F02 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI +1F83; F; 1F03 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI +1F84; F; 1F04 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI +1F85; F; 1F05 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI +1F86; F; 1F06 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI +1F87; F; 1F07 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1F88; F; 1F00 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI +1F88; S; 1F80; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI +1F89; F; 1F01 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI +1F89; S; 1F81; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI +1F8A; F; 1F02 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1F8A; S; 1F82; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1F8B; F; 1F03 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1F8B; S; 1F83; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1F8C; F; 1F04 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1F8C; S; 1F84; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1F8D; F; 1F05 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1F8D; S; 1F85; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1F8E; F; 1F06 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1F8E; S; 1F86; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1F8F; F; 1F07 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1F8F; S; 1F87; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1F90; F; 1F20 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI +1F91; F; 1F21 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI +1F92; F; 1F22 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI +1F93; F; 1F23 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI +1F94; F; 1F24 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI +1F95; F; 1F25 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI +1F96; F; 1F26 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI +1F97; F; 1F27 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1F98; F; 1F20 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI +1F98; S; 1F90; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI +1F99; F; 1F21 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI +1F99; S; 1F91; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI +1F9A; F; 1F22 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1F9A; S; 1F92; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1F9B; F; 1F23 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1F9B; S; 1F93; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1F9C; F; 1F24 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1F9C; S; 1F94; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1F9D; F; 1F25 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1F9D; S; 1F95; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1F9E; F; 1F26 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1F9E; S; 1F96; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1F9F; F; 1F27 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1F9F; S; 1F97; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1FA0; F; 1F60 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI +1FA1; F; 1F61 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI +1FA2; F; 1F62 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI +1FA3; F; 1F63 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI +1FA4; F; 1F64 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI +1FA5; F; 1F65 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI +1FA6; F; 1F66 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI +1FA7; F; 1F67 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1FA8; F; 1F60 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI +1FA8; S; 1FA0; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI +1FA9; F; 1F61 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI +1FA9; S; 1FA1; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI +1FAA; F; 1F62 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1FAA; S; 1FA2; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI +1FAB; F; 1F63 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1FAB; S; 1FA3; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI +1FAC; F; 1F64 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1FAC; S; 1FA4; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI +1FAD; F; 1F65 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1FAD; S; 1FA5; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI +1FAE; F; 1F66 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1FAE; S; 1FA6; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI +1FAF; F; 1F67 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1FAF; S; 1FA7; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1FB2; F; 1F70 03B9; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI +1FB3; F; 03B1 03B9; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI +1FB4; F; 03AC 03B9; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6; F; 03B1 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI +1FB7; F; 03B1 0342 03B9; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI +1FB8; C; 1FB0; # GREEK CAPITAL LETTER ALPHA WITH VRACHY +1FB9; C; 1FB1; # GREEK CAPITAL LETTER ALPHA WITH MACRON +1FBA; C; 1F70; # GREEK CAPITAL LETTER ALPHA WITH VARIA +1FBB; C; 1F71; # GREEK CAPITAL LETTER ALPHA WITH OXIA +1FBC; F; 03B1 03B9; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBC; S; 1FB3; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBE; C; 03B9; # GREEK PROSGEGRAMMENI +1FC2; F; 1F74 03B9; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI +1FC3; F; 03B7 03B9; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI +1FC4; F; 03AE 03B9; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6; F; 03B7 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI +1FC7; F; 03B7 0342 03B9; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI +1FC8; C; 1F72; # GREEK CAPITAL LETTER EPSILON WITH VARIA +1FC9; C; 1F73; # GREEK CAPITAL LETTER EPSILON WITH OXIA +1FCA; C; 1F74; # GREEK CAPITAL LETTER ETA WITH VARIA +1FCB; C; 1F75; # GREEK CAPITAL LETTER ETA WITH OXIA +1FCC; F; 03B7 03B9; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FCC; S; 1FC3; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FD2; F; 03B9 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA +1FD3; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6; F; 03B9 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI +1FD7; F; 03B9 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI +1FD8; C; 1FD0; # GREEK CAPITAL LETTER IOTA WITH VRACHY +1FD9; C; 1FD1; # GREEK CAPITAL LETTER IOTA WITH MACRON +1FDA; C; 1F76; # GREEK CAPITAL LETTER IOTA WITH VARIA +1FDB; C; 1F77; # GREEK CAPITAL LETTER IOTA WITH OXIA +1FE2; F; 03C5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA +1FE3; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA +1FE4; F; 03C1 0313; # GREEK SMALL LETTER RHO WITH PSILI +1FE6; F; 03C5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI +1FE7; F; 03C5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI +1FE8; C; 1FE0; # GREEK CAPITAL LETTER UPSILON WITH VRACHY +1FE9; C; 1FE1; # GREEK CAPITAL LETTER UPSILON WITH MACRON +1FEA; C; 1F7A; # GREEK CAPITAL LETTER UPSILON WITH VARIA +1FEB; C; 1F7B; # GREEK CAPITAL LETTER UPSILON WITH OXIA +1FEC; C; 1FE5; # GREEK CAPITAL LETTER RHO WITH DASIA +1FF2; F; 1F7C 03B9; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI +1FF3; F; 03C9 03B9; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI +1FF4; F; 03CE 03B9; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6; F; 03C9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI +1FF7; F; 03C9 0342 03B9; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI +1FF8; C; 1F78; # GREEK CAPITAL LETTER OMICRON WITH VARIA +1FF9; C; 1F79; # GREEK CAPITAL LETTER OMICRON WITH OXIA +1FFA; C; 1F7C; # GREEK CAPITAL LETTER OMEGA WITH VARIA +1FFB; C; 1F7D; # GREEK CAPITAL LETTER OMEGA WITH OXIA +1FFC; F; 03C9 03B9; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +1FFC; S; 1FF3; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +2126; C; 03C9; # OHM SIGN +212A; C; 006B; # KELVIN SIGN +212B; C; 00E5; # ANGSTROM SIGN +2132; C; 214E; # TURNED CAPITAL F +2160; C; 2170; # ROMAN NUMERAL ONE +2161; C; 2171; # ROMAN NUMERAL TWO +2162; C; 2172; # ROMAN NUMERAL THREE +2163; C; 2173; # ROMAN NUMERAL FOUR +2164; C; 2174; # ROMAN NUMERAL FIVE +2165; C; 2175; # ROMAN NUMERAL SIX +2166; C; 2176; # ROMAN NUMERAL SEVEN +2167; C; 2177; # ROMAN NUMERAL EIGHT +2168; C; 2178; # ROMAN NUMERAL NINE +2169; C; 2179; # ROMAN NUMERAL TEN +216A; C; 217A; # ROMAN NUMERAL ELEVEN +216B; C; 217B; # ROMAN NUMERAL TWELVE +216C; C; 217C; # ROMAN NUMERAL FIFTY +216D; C; 217D; # ROMAN NUMERAL ONE HUNDRED +216E; C; 217E; # ROMAN NUMERAL FIVE HUNDRED +216F; C; 217F; # ROMAN NUMERAL ONE THOUSAND +2183; C; 2184; # ROMAN NUMERAL REVERSED ONE HUNDRED +24B6; C; 24D0; # CIRCLED LATIN CAPITAL LETTER A +24B7; C; 24D1; # CIRCLED LATIN CAPITAL LETTER B +24B8; C; 24D2; # CIRCLED LATIN CAPITAL LETTER C +24B9; C; 24D3; # CIRCLED LATIN CAPITAL LETTER D +24BA; C; 24D4; # CIRCLED LATIN CAPITAL LETTER E +24BB; C; 24D5; # CIRCLED LATIN CAPITAL LETTER F +24BC; C; 24D6; # CIRCLED LATIN CAPITAL LETTER G +24BD; C; 24D7; # CIRCLED LATIN CAPITAL LETTER H +24BE; C; 24D8; # CIRCLED LATIN CAPITAL LETTER I +24BF; C; 24D9; # CIRCLED LATIN CAPITAL LETTER J +24C0; C; 24DA; # CIRCLED LATIN CAPITAL LETTER K +24C1; C; 24DB; # CIRCLED LATIN CAPITAL LETTER L +24C2; C; 24DC; # CIRCLED LATIN CAPITAL LETTER M +24C3; C; 24DD; # CIRCLED LATIN CAPITAL LETTER N +24C4; C; 24DE; # CIRCLED LATIN CAPITAL LETTER O +24C5; C; 24DF; # CIRCLED LATIN CAPITAL LETTER P +24C6; C; 24E0; # CIRCLED LATIN CAPITAL LETTER Q +24C7; C; 24E1; # CIRCLED LATIN CAPITAL LETTER R +24C8; C; 24E2; # CIRCLED LATIN CAPITAL LETTER S +24C9; C; 24E3; # CIRCLED LATIN CAPITAL LETTER T +24CA; C; 24E4; # CIRCLED LATIN CAPITAL LETTER U +24CB; C; 24E5; # CIRCLED LATIN CAPITAL LETTER V +24CC; C; 24E6; # CIRCLED LATIN CAPITAL LETTER W +24CD; C; 24E7; # CIRCLED LATIN CAPITAL LETTER X +24CE; C; 24E8; # CIRCLED LATIN CAPITAL LETTER Y +24CF; C; 24E9; # CIRCLED LATIN CAPITAL LETTER Z +2C00; C; 2C30; # GLAGOLITIC CAPITAL LETTER AZU +2C01; C; 2C31; # GLAGOLITIC CAPITAL LETTER BUKY +2C02; C; 2C32; # GLAGOLITIC CAPITAL LETTER VEDE +2C03; C; 2C33; # GLAGOLITIC CAPITAL LETTER GLAGOLI +2C04; C; 2C34; # GLAGOLITIC CAPITAL LETTER DOBRO +2C05; C; 2C35; # GLAGOLITIC CAPITAL LETTER YESTU +2C06; C; 2C36; # GLAGOLITIC CAPITAL LETTER ZHIVETE +2C07; C; 2C37; # GLAGOLITIC CAPITAL LETTER DZELO +2C08; C; 2C38; # GLAGOLITIC CAPITAL LETTER ZEMLJA +2C09; C; 2C39; # GLAGOLITIC CAPITAL LETTER IZHE +2C0A; C; 2C3A; # GLAGOLITIC CAPITAL LETTER INITIAL IZHE +2C0B; C; 2C3B; # GLAGOLITIC CAPITAL LETTER I +2C0C; C; 2C3C; # GLAGOLITIC CAPITAL LETTER DJERVI +2C0D; C; 2C3D; # GLAGOLITIC CAPITAL LETTER KAKO +2C0E; C; 2C3E; # GLAGOLITIC CAPITAL LETTER LJUDIJE +2C0F; C; 2C3F; # GLAGOLITIC CAPITAL LETTER MYSLITE +2C10; C; 2C40; # GLAGOLITIC CAPITAL LETTER NASHI +2C11; C; 2C41; # GLAGOLITIC CAPITAL LETTER ONU +2C12; C; 2C42; # GLAGOLITIC CAPITAL LETTER POKOJI +2C13; C; 2C43; # GLAGOLITIC CAPITAL LETTER RITSI +2C14; C; 2C44; # GLAGOLITIC CAPITAL LETTER SLOVO +2C15; C; 2C45; # GLAGOLITIC CAPITAL LETTER TVRIDO +2C16; C; 2C46; # GLAGOLITIC CAPITAL LETTER UKU +2C17; C; 2C47; # GLAGOLITIC CAPITAL LETTER FRITU +2C18; C; 2C48; # GLAGOLITIC CAPITAL LETTER HERU +2C19; C; 2C49; # GLAGOLITIC CAPITAL LETTER OTU +2C1A; C; 2C4A; # GLAGOLITIC CAPITAL LETTER PE +2C1B; C; 2C4B; # GLAGOLITIC CAPITAL LETTER SHTA +2C1C; C; 2C4C; # GLAGOLITIC CAPITAL LETTER TSI +2C1D; C; 2C4D; # GLAGOLITIC CAPITAL LETTER CHRIVI +2C1E; C; 2C4E; # GLAGOLITIC CAPITAL LETTER SHA +2C1F; C; 2C4F; # GLAGOLITIC CAPITAL LETTER YERU +2C20; C; 2C50; # GLAGOLITIC CAPITAL LETTER YERI +2C21; C; 2C51; # GLAGOLITIC CAPITAL LETTER YATI +2C22; C; 2C52; # GLAGOLITIC CAPITAL LETTER SPIDERY HA +2C23; C; 2C53; # GLAGOLITIC CAPITAL LETTER YU +2C24; C; 2C54; # GLAGOLITIC CAPITAL LETTER SMALL YUS +2C25; C; 2C55; # GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL +2C26; C; 2C56; # GLAGOLITIC CAPITAL LETTER YO +2C27; C; 2C57; # GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS +2C28; C; 2C58; # GLAGOLITIC CAPITAL LETTER BIG YUS +2C29; C; 2C59; # GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS +2C2A; C; 2C5A; # GLAGOLITIC CAPITAL LETTER FITA +2C2B; C; 2C5B; # GLAGOLITIC CAPITAL LETTER IZHITSA +2C2C; C; 2C5C; # GLAGOLITIC CAPITAL LETTER SHTAPIC +2C2D; C; 2C5D; # GLAGOLITIC CAPITAL LETTER TROKUTASTI A +2C2E; C; 2C5E; # GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE +2C60; C; 2C61; # LATIN CAPITAL LETTER L WITH DOUBLE BAR +2C62; C; 026B; # LATIN CAPITAL LETTER L WITH MIDDLE TILDE +2C63; C; 1D7D; # LATIN CAPITAL LETTER P WITH STROKE +2C64; C; 027D; # LATIN CAPITAL LETTER R WITH TAIL +2C67; C; 2C68; # LATIN CAPITAL LETTER H WITH DESCENDER +2C69; C; 2C6A; # LATIN CAPITAL LETTER K WITH DESCENDER +2C6B; C; 2C6C; # LATIN CAPITAL LETTER Z WITH DESCENDER +2C6D; C; 0251; # LATIN CAPITAL LETTER ALPHA +2C6E; C; 0271; # LATIN CAPITAL LETTER M WITH HOOK +2C6F; C; 0250; # LATIN CAPITAL LETTER TURNED A +2C70; C; 0252; # LATIN CAPITAL LETTER TURNED ALPHA +2C72; C; 2C73; # LATIN CAPITAL LETTER W WITH HOOK +2C75; C; 2C76; # LATIN CAPITAL LETTER HALF H +2C7E; C; 023F; # LATIN CAPITAL LETTER S WITH SWASH TAIL +2C7F; C; 0240; # LATIN CAPITAL LETTER Z WITH SWASH TAIL +2C80; C; 2C81; # COPTIC CAPITAL LETTER ALFA +2C82; C; 2C83; # COPTIC CAPITAL LETTER VIDA +2C84; C; 2C85; # COPTIC CAPITAL LETTER GAMMA +2C86; C; 2C87; # COPTIC CAPITAL LETTER DALDA +2C88; C; 2C89; # COPTIC CAPITAL LETTER EIE +2C8A; C; 2C8B; # COPTIC CAPITAL LETTER SOU +2C8C; C; 2C8D; # COPTIC CAPITAL LETTER ZATA +2C8E; C; 2C8F; # COPTIC CAPITAL LETTER HATE +2C90; C; 2C91; # COPTIC CAPITAL LETTER THETHE +2C92; C; 2C93; # COPTIC CAPITAL LETTER IAUDA +2C94; C; 2C95; # COPTIC CAPITAL LETTER KAPA +2C96; C; 2C97; # COPTIC CAPITAL LETTER LAULA +2C98; C; 2C99; # COPTIC CAPITAL LETTER MI +2C9A; C; 2C9B; # COPTIC CAPITAL LETTER NI +2C9C; C; 2C9D; # COPTIC CAPITAL LETTER KSI +2C9E; C; 2C9F; # COPTIC CAPITAL LETTER O +2CA0; C; 2CA1; # COPTIC CAPITAL LETTER PI +2CA2; C; 2CA3; # COPTIC CAPITAL LETTER RO +2CA4; C; 2CA5; # COPTIC CAPITAL LETTER SIMA +2CA6; C; 2CA7; # COPTIC CAPITAL LETTER TAU +2CA8; C; 2CA9; # COPTIC CAPITAL LETTER UA +2CAA; C; 2CAB; # COPTIC CAPITAL LETTER FI +2CAC; C; 2CAD; # COPTIC CAPITAL LETTER KHI +2CAE; C; 2CAF; # COPTIC CAPITAL LETTER PSI +2CB0; C; 2CB1; # COPTIC CAPITAL LETTER OOU +2CB2; C; 2CB3; # COPTIC CAPITAL LETTER DIALECT-P ALEF +2CB4; C; 2CB5; # COPTIC CAPITAL LETTER OLD COPTIC AIN +2CB6; C; 2CB7; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE +2CB8; C; 2CB9; # COPTIC CAPITAL LETTER DIALECT-P KAPA +2CBA; C; 2CBB; # COPTIC CAPITAL LETTER DIALECT-P NI +2CBC; C; 2CBD; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI +2CBE; C; 2CBF; # COPTIC CAPITAL LETTER OLD COPTIC OOU +2CC0; C; 2CC1; # COPTIC CAPITAL LETTER SAMPI +2CC2; C; 2CC3; # COPTIC CAPITAL LETTER CROSSED SHEI +2CC4; C; 2CC5; # COPTIC CAPITAL LETTER OLD COPTIC SHEI +2CC6; C; 2CC7; # COPTIC CAPITAL LETTER OLD COPTIC ESH +2CC8; C; 2CC9; # COPTIC CAPITAL LETTER AKHMIMIC KHEI +2CCA; C; 2CCB; # COPTIC CAPITAL LETTER DIALECT-P HORI +2CCC; C; 2CCD; # COPTIC CAPITAL LETTER OLD COPTIC HORI +2CCE; C; 2CCF; # COPTIC CAPITAL LETTER OLD COPTIC HA +2CD0; C; 2CD1; # COPTIC CAPITAL LETTER L-SHAPED HA +2CD2; C; 2CD3; # COPTIC CAPITAL LETTER OLD COPTIC HEI +2CD4; C; 2CD5; # COPTIC CAPITAL LETTER OLD COPTIC HAT +2CD6; C; 2CD7; # COPTIC CAPITAL LETTER OLD COPTIC GANGIA +2CD8; C; 2CD9; # COPTIC CAPITAL LETTER OLD COPTIC DJA +2CDA; C; 2CDB; # COPTIC CAPITAL LETTER OLD COPTIC SHIMA +2CDC; C; 2CDD; # COPTIC CAPITAL LETTER OLD NUBIAN SHIMA +2CDE; C; 2CDF; # COPTIC CAPITAL LETTER OLD NUBIAN NGI +2CE0; C; 2CE1; # COPTIC CAPITAL LETTER OLD NUBIAN NYI +2CE2; C; 2CE3; # COPTIC CAPITAL LETTER OLD NUBIAN WAU +2CEB; C; 2CEC; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI +2CED; C; 2CEE; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA +2CF2; C; 2CF3; # COPTIC CAPITAL LETTER BOHAIRIC KHEI +A640; C; A641; # CYRILLIC CAPITAL LETTER ZEMLYA +A642; C; A643; # CYRILLIC CAPITAL LETTER DZELO +A644; C; A645; # CYRILLIC CAPITAL LETTER REVERSED DZE +A646; C; A647; # CYRILLIC CAPITAL LETTER IOTA +A648; C; A649; # CYRILLIC CAPITAL LETTER DJERV +A64A; C; A64B; # CYRILLIC CAPITAL LETTER MONOGRAPH UK +A64C; C; A64D; # CYRILLIC CAPITAL LETTER BROAD OMEGA +A64E; C; A64F; # CYRILLIC CAPITAL LETTER NEUTRAL YER +A650; C; A651; # CYRILLIC CAPITAL LETTER YERU WITH BACK YER +A652; C; A653; # CYRILLIC CAPITAL LETTER IOTIFIED YAT +A654; C; A655; # CYRILLIC CAPITAL LETTER REVERSED YU +A656; C; A657; # CYRILLIC CAPITAL LETTER IOTIFIED A +A658; C; A659; # CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS +A65A; C; A65B; # CYRILLIC CAPITAL LETTER BLENDED YUS +A65C; C; A65D; # CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS +A65E; C; A65F; # CYRILLIC CAPITAL LETTER YN +A660; C; A661; # CYRILLIC CAPITAL LETTER REVERSED TSE +A662; C; A663; # CYRILLIC CAPITAL LETTER SOFT DE +A664; C; A665; # CYRILLIC CAPITAL LETTER SOFT EL +A666; C; A667; # CYRILLIC CAPITAL LETTER SOFT EM +A668; C; A669; # CYRILLIC CAPITAL LETTER MONOCULAR O +A66A; C; A66B; # CYRILLIC CAPITAL LETTER BINOCULAR O +A66C; C; A66D; # CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O +A680; C; A681; # CYRILLIC CAPITAL LETTER DWE +A682; C; A683; # CYRILLIC CAPITAL LETTER DZWE +A684; C; A685; # CYRILLIC CAPITAL LETTER ZHWE +A686; C; A687; # CYRILLIC CAPITAL LETTER CCHE +A688; C; A689; # CYRILLIC CAPITAL LETTER DZZE +A68A; C; A68B; # CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK +A68C; C; A68D; # CYRILLIC CAPITAL LETTER TWE +A68E; C; A68F; # CYRILLIC CAPITAL LETTER TSWE +A690; C; A691; # CYRILLIC CAPITAL LETTER TSSE +A692; C; A693; # CYRILLIC CAPITAL LETTER TCHE +A694; C; A695; # CYRILLIC CAPITAL LETTER HWE +A696; C; A697; # CYRILLIC CAPITAL LETTER SHWE +A698; C; A699; # CYRILLIC CAPITAL LETTER DOUBLE O +A69A; C; A69B; # CYRILLIC CAPITAL LETTER CROSSED O +A722; C; A723; # LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF +A724; C; A725; # LATIN CAPITAL LETTER EGYPTOLOGICAL AIN +A726; C; A727; # LATIN CAPITAL LETTER HENG +A728; C; A729; # LATIN CAPITAL LETTER TZ +A72A; C; A72B; # LATIN CAPITAL LETTER TRESILLO +A72C; C; A72D; # LATIN CAPITAL LETTER CUATRILLO +A72E; C; A72F; # LATIN CAPITAL LETTER CUATRILLO WITH COMMA +A732; C; A733; # LATIN CAPITAL LETTER AA +A734; C; A735; # LATIN CAPITAL LETTER AO +A736; C; A737; # LATIN CAPITAL LETTER AU +A738; C; A739; # LATIN CAPITAL LETTER AV +A73A; C; A73B; # LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR +A73C; C; A73D; # LATIN CAPITAL LETTER AY +A73E; C; A73F; # LATIN CAPITAL LETTER REVERSED C WITH DOT +A740; C; A741; # LATIN CAPITAL LETTER K WITH STROKE +A742; C; A743; # LATIN CAPITAL LETTER K WITH DIAGONAL STROKE +A744; C; A745; # LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE +A746; C; A747; # LATIN CAPITAL LETTER BROKEN L +A748; C; A749; # LATIN CAPITAL LETTER L WITH HIGH STROKE +A74A; C; A74B; # LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY +A74C; C; A74D; # LATIN CAPITAL LETTER O WITH LOOP +A74E; C; A74F; # LATIN CAPITAL LETTER OO +A750; C; A751; # LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER +A752; C; A753; # LATIN CAPITAL LETTER P WITH FLOURISH +A754; C; A755; # LATIN CAPITAL LETTER P WITH SQUIRREL TAIL +A756; C; A757; # LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER +A758; C; A759; # LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE +A75A; C; A75B; # LATIN CAPITAL LETTER R ROTUNDA +A75C; C; A75D; # LATIN CAPITAL LETTER RUM ROTUNDA +A75E; C; A75F; # LATIN CAPITAL LETTER V WITH DIAGONAL STROKE +A760; C; A761; # LATIN CAPITAL LETTER VY +A762; C; A763; # LATIN CAPITAL LETTER VISIGOTHIC Z +A764; C; A765; # LATIN CAPITAL LETTER THORN WITH STROKE +A766; C; A767; # LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER +A768; C; A769; # LATIN CAPITAL LETTER VEND +A76A; C; A76B; # LATIN CAPITAL LETTER ET +A76C; C; A76D; # LATIN CAPITAL LETTER IS +A76E; C; A76F; # LATIN CAPITAL LETTER CON +A779; C; A77A; # LATIN CAPITAL LETTER INSULAR D +A77B; C; A77C; # LATIN CAPITAL LETTER INSULAR F +A77D; C; 1D79; # LATIN CAPITAL LETTER INSULAR G +A77E; C; A77F; # LATIN CAPITAL LETTER TURNED INSULAR G +A780; C; A781; # LATIN CAPITAL LETTER TURNED L +A782; C; A783; # LATIN CAPITAL LETTER INSULAR R +A784; C; A785; # LATIN CAPITAL LETTER INSULAR S +A786; C; A787; # LATIN CAPITAL LETTER INSULAR T +A78B; C; A78C; # LATIN CAPITAL LETTER SALTILLO +A78D; C; 0265; # LATIN CAPITAL LETTER TURNED H +A790; C; A791; # LATIN CAPITAL LETTER N WITH DESCENDER +A792; C; A793; # LATIN CAPITAL LETTER C WITH BAR +A796; C; A797; # LATIN CAPITAL LETTER B WITH FLOURISH +A798; C; A799; # LATIN CAPITAL LETTER F WITH STROKE +A79A; C; A79B; # LATIN CAPITAL LETTER VOLAPUK AE +A79C; C; A79D; # LATIN CAPITAL LETTER VOLAPUK OE +A79E; C; A79F; # LATIN CAPITAL LETTER VOLAPUK UE +A7A0; C; A7A1; # LATIN CAPITAL LETTER G WITH OBLIQUE STROKE +A7A2; C; A7A3; # LATIN CAPITAL LETTER K WITH OBLIQUE STROKE +A7A4; C; A7A5; # LATIN CAPITAL LETTER N WITH OBLIQUE STROKE +A7A6; C; A7A7; # LATIN CAPITAL LETTER R WITH OBLIQUE STROKE +A7A8; C; A7A9; # LATIN CAPITAL LETTER S WITH OBLIQUE STROKE +A7AA; C; 0266; # LATIN CAPITAL LETTER H WITH HOOK +A7AB; C; 025C; # LATIN CAPITAL LETTER REVERSED OPEN E +A7AC; C; 0261; # LATIN CAPITAL LETTER SCRIPT G +A7AD; C; 026C; # LATIN CAPITAL LETTER L WITH BELT +A7B0; C; 029E; # LATIN CAPITAL LETTER TURNED K +A7B1; C; 0287; # LATIN CAPITAL LETTER TURNED T +A7B2; C; 029D; # LATIN CAPITAL LETTER J WITH CROSSED-TAIL +A7B3; C; AB53; # LATIN CAPITAL LETTER CHI +A7B4; C; A7B5; # LATIN CAPITAL LETTER BETA +A7B6; C; A7B7; # LATIN CAPITAL LETTER OMEGA +AB70; C; 13A0; # CHEROKEE SMALL LETTER A +AB71; C; 13A1; # CHEROKEE SMALL LETTER E +AB72; C; 13A2; # CHEROKEE SMALL LETTER I +AB73; C; 13A3; # CHEROKEE SMALL LETTER O +AB74; C; 13A4; # CHEROKEE SMALL LETTER U +AB75; C; 13A5; # CHEROKEE SMALL LETTER V +AB76; C; 13A6; # CHEROKEE SMALL LETTER GA +AB77; C; 13A7; # CHEROKEE SMALL LETTER KA +AB78; C; 13A8; # CHEROKEE SMALL LETTER GE +AB79; C; 13A9; # CHEROKEE SMALL LETTER GI +AB7A; C; 13AA; # CHEROKEE SMALL LETTER GO +AB7B; C; 13AB; # CHEROKEE SMALL LETTER GU +AB7C; C; 13AC; # CHEROKEE SMALL LETTER GV +AB7D; C; 13AD; # CHEROKEE SMALL LETTER HA +AB7E; C; 13AE; # CHEROKEE SMALL LETTER HE +AB7F; C; 13AF; # CHEROKEE SMALL LETTER HI +AB80; C; 13B0; # CHEROKEE SMALL LETTER HO +AB81; C; 13B1; # CHEROKEE SMALL LETTER HU +AB82; C; 13B2; # CHEROKEE SMALL LETTER HV +AB83; C; 13B3; # CHEROKEE SMALL LETTER LA +AB84; C; 13B4; # CHEROKEE SMALL LETTER LE +AB85; C; 13B5; # CHEROKEE SMALL LETTER LI +AB86; C; 13B6; # CHEROKEE SMALL LETTER LO +AB87; C; 13B7; # CHEROKEE SMALL LETTER LU +AB88; C; 13B8; # CHEROKEE SMALL LETTER LV +AB89; C; 13B9; # CHEROKEE SMALL LETTER MA +AB8A; C; 13BA; # CHEROKEE SMALL LETTER ME +AB8B; C; 13BB; # CHEROKEE SMALL LETTER MI +AB8C; C; 13BC; # CHEROKEE SMALL LETTER MO +AB8D; C; 13BD; # CHEROKEE SMALL LETTER MU +AB8E; C; 13BE; # CHEROKEE SMALL LETTER NA +AB8F; C; 13BF; # CHEROKEE SMALL LETTER HNA +AB90; C; 13C0; # CHEROKEE SMALL LETTER NAH +AB91; C; 13C1; # CHEROKEE SMALL LETTER NE +AB92; C; 13C2; # CHEROKEE SMALL LETTER NI +AB93; C; 13C3; # CHEROKEE SMALL LETTER NO +AB94; C; 13C4; # CHEROKEE SMALL LETTER NU +AB95; C; 13C5; # CHEROKEE SMALL LETTER NV +AB96; C; 13C6; # CHEROKEE SMALL LETTER QUA +AB97; C; 13C7; # CHEROKEE SMALL LETTER QUE +AB98; C; 13C8; # CHEROKEE SMALL LETTER QUI +AB99; C; 13C9; # CHEROKEE SMALL LETTER QUO +AB9A; C; 13CA; # CHEROKEE SMALL LETTER QUU +AB9B; C; 13CB; # CHEROKEE SMALL LETTER QUV +AB9C; C; 13CC; # CHEROKEE SMALL LETTER SA +AB9D; C; 13CD; # CHEROKEE SMALL LETTER S +AB9E; C; 13CE; # CHEROKEE SMALL LETTER SE +AB9F; C; 13CF; # CHEROKEE SMALL LETTER SI +ABA0; C; 13D0; # CHEROKEE SMALL LETTER SO +ABA1; C; 13D1; # CHEROKEE SMALL LETTER SU +ABA2; C; 13D2; # CHEROKEE SMALL LETTER SV +ABA3; C; 13D3; # CHEROKEE SMALL LETTER DA +ABA4; C; 13D4; # CHEROKEE SMALL LETTER TA +ABA5; C; 13D5; # CHEROKEE SMALL LETTER DE +ABA6; C; 13D6; # CHEROKEE SMALL LETTER TE +ABA7; C; 13D7; # CHEROKEE SMALL LETTER DI +ABA8; C; 13D8; # CHEROKEE SMALL LETTER TI +ABA9; C; 13D9; # CHEROKEE SMALL LETTER DO +ABAA; C; 13DA; # CHEROKEE SMALL LETTER DU +ABAB; C; 13DB; # CHEROKEE SMALL LETTER DV +ABAC; C; 13DC; # CHEROKEE SMALL LETTER DLA +ABAD; C; 13DD; # CHEROKEE SMALL LETTER TLA +ABAE; C; 13DE; # CHEROKEE SMALL LETTER TLE +ABAF; C; 13DF; # CHEROKEE SMALL LETTER TLI +ABB0; C; 13E0; # CHEROKEE SMALL LETTER TLO +ABB1; C; 13E1; # CHEROKEE SMALL LETTER TLU +ABB2; C; 13E2; # CHEROKEE SMALL LETTER TLV +ABB3; C; 13E3; # CHEROKEE SMALL LETTER TSA +ABB4; C; 13E4; # CHEROKEE SMALL LETTER TSE +ABB5; C; 13E5; # CHEROKEE SMALL LETTER TSI +ABB6; C; 13E6; # CHEROKEE SMALL LETTER TSO +ABB7; C; 13E7; # CHEROKEE SMALL LETTER TSU +ABB8; C; 13E8; # CHEROKEE SMALL LETTER TSV +ABB9; C; 13E9; # CHEROKEE SMALL LETTER WA +ABBA; C; 13EA; # CHEROKEE SMALL LETTER WE +ABBB; C; 13EB; # CHEROKEE SMALL LETTER WI +ABBC; C; 13EC; # CHEROKEE SMALL LETTER WO +ABBD; C; 13ED; # CHEROKEE SMALL LETTER WU +ABBE; C; 13EE; # CHEROKEE SMALL LETTER WV +ABBF; C; 13EF; # CHEROKEE SMALL LETTER YA +FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF +FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI +FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL +FB03; F; 0066 0066 0069; # LATIN SMALL LIGATURE FFI +FB04; F; 0066 0066 006C; # LATIN SMALL LIGATURE FFL +FB05; F; 0073 0074; # LATIN SMALL LIGATURE LONG S T +FB06; F; 0073 0074; # LATIN SMALL LIGATURE ST +FB13; F; 0574 0576; # ARMENIAN SMALL LIGATURE MEN NOW +FB14; F; 0574 0565; # ARMENIAN SMALL LIGATURE MEN ECH +FB15; F; 0574 056B; # ARMENIAN SMALL LIGATURE MEN INI +FB16; F; 057E 0576; # ARMENIAN SMALL LIGATURE VEW NOW +FB17; F; 0574 056D; # ARMENIAN SMALL LIGATURE MEN XEH +FF21; C; FF41; # FULLWIDTH LATIN CAPITAL LETTER A +FF22; C; FF42; # FULLWIDTH LATIN CAPITAL LETTER B +FF23; C; FF43; # FULLWIDTH LATIN CAPITAL LETTER C +FF24; C; FF44; # FULLWIDTH LATIN CAPITAL LETTER D +FF25; C; FF45; # FULLWIDTH LATIN CAPITAL LETTER E +FF26; C; FF46; # FULLWIDTH LATIN CAPITAL LETTER F +FF27; C; FF47; # FULLWIDTH LATIN CAPITAL LETTER G +FF28; C; FF48; # FULLWIDTH LATIN CAPITAL LETTER H +FF29; C; FF49; # FULLWIDTH LATIN CAPITAL LETTER I +FF2A; C; FF4A; # FULLWIDTH LATIN CAPITAL LETTER J +FF2B; C; FF4B; # FULLWIDTH LATIN CAPITAL LETTER K +FF2C; C; FF4C; # FULLWIDTH LATIN CAPITAL LETTER L +FF2D; C; FF4D; # FULLWIDTH LATIN CAPITAL LETTER M +FF2E; C; FF4E; # FULLWIDTH LATIN CAPITAL LETTER N +FF2F; C; FF4F; # FULLWIDTH LATIN CAPITAL LETTER O +FF30; C; FF50; # FULLWIDTH LATIN CAPITAL LETTER P +FF31; C; FF51; # FULLWIDTH LATIN CAPITAL LETTER Q +FF32; C; FF52; # FULLWIDTH LATIN CAPITAL LETTER R +FF33; C; FF53; # FULLWIDTH LATIN CAPITAL LETTER S +FF34; C; FF54; # FULLWIDTH LATIN CAPITAL LETTER T +FF35; C; FF55; # FULLWIDTH LATIN CAPITAL LETTER U +FF36; C; FF56; # FULLWIDTH LATIN CAPITAL LETTER V +FF37; C; FF57; # FULLWIDTH LATIN CAPITAL LETTER W +FF38; C; FF58; # FULLWIDTH LATIN CAPITAL LETTER X +FF39; C; FF59; # FULLWIDTH LATIN CAPITAL LETTER Y +FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z +10400; C; 10428; # DESERET CAPITAL LETTER LONG I +10401; C; 10429; # DESERET CAPITAL LETTER LONG E +10402; C; 1042A; # DESERET CAPITAL LETTER LONG A +10403; C; 1042B; # DESERET CAPITAL LETTER LONG AH +10404; C; 1042C; # DESERET CAPITAL LETTER LONG O +10405; C; 1042D; # DESERET CAPITAL LETTER LONG OO +10406; C; 1042E; # DESERET CAPITAL LETTER SHORT I +10407; C; 1042F; # DESERET CAPITAL LETTER SHORT E +10408; C; 10430; # DESERET CAPITAL LETTER SHORT A +10409; C; 10431; # DESERET CAPITAL LETTER SHORT AH +1040A; C; 10432; # DESERET CAPITAL LETTER SHORT O +1040B; C; 10433; # DESERET CAPITAL LETTER SHORT OO +1040C; C; 10434; # DESERET CAPITAL LETTER AY +1040D; C; 10435; # DESERET CAPITAL LETTER OW +1040E; C; 10436; # DESERET CAPITAL LETTER WU +1040F; C; 10437; # DESERET CAPITAL LETTER YEE +10410; C; 10438; # DESERET CAPITAL LETTER H +10411; C; 10439; # DESERET CAPITAL LETTER PEE +10412; C; 1043A; # DESERET CAPITAL LETTER BEE +10413; C; 1043B; # DESERET CAPITAL LETTER TEE +10414; C; 1043C; # DESERET CAPITAL LETTER DEE +10415; C; 1043D; # DESERET CAPITAL LETTER CHEE +10416; C; 1043E; # DESERET CAPITAL LETTER JEE +10417; C; 1043F; # DESERET CAPITAL LETTER KAY +10418; C; 10440; # DESERET CAPITAL LETTER GAY +10419; C; 10441; # DESERET CAPITAL LETTER EF +1041A; C; 10442; # DESERET CAPITAL LETTER VEE +1041B; C; 10443; # DESERET CAPITAL LETTER ETH +1041C; C; 10444; # DESERET CAPITAL LETTER THEE +1041D; C; 10445; # DESERET CAPITAL LETTER ES +1041E; C; 10446; # DESERET CAPITAL LETTER ZEE +1041F; C; 10447; # DESERET CAPITAL LETTER ESH +10420; C; 10448; # DESERET CAPITAL LETTER ZHEE +10421; C; 10449; # DESERET CAPITAL LETTER ER +10422; C; 1044A; # DESERET CAPITAL LETTER EL +10423; C; 1044B; # DESERET CAPITAL LETTER EM +10424; C; 1044C; # DESERET CAPITAL LETTER EN +10425; C; 1044D; # DESERET CAPITAL LETTER ENG +10426; C; 1044E; # DESERET CAPITAL LETTER OI +10427; C; 1044F; # DESERET CAPITAL LETTER EW +10C80; C; 10CC0; # OLD HUNGARIAN CAPITAL LETTER A +10C81; C; 10CC1; # OLD HUNGARIAN CAPITAL LETTER AA +10C82; C; 10CC2; # OLD HUNGARIAN CAPITAL LETTER EB +10C83; C; 10CC3; # OLD HUNGARIAN CAPITAL LETTER AMB +10C84; C; 10CC4; # OLD HUNGARIAN CAPITAL LETTER EC +10C85; C; 10CC5; # OLD HUNGARIAN CAPITAL LETTER ENC +10C86; C; 10CC6; # OLD HUNGARIAN CAPITAL LETTER ECS +10C87; C; 10CC7; # OLD HUNGARIAN CAPITAL LETTER ED +10C88; C; 10CC8; # OLD HUNGARIAN CAPITAL LETTER AND +10C89; C; 10CC9; # OLD HUNGARIAN CAPITAL LETTER E +10C8A; C; 10CCA; # OLD HUNGARIAN CAPITAL LETTER CLOSE E +10C8B; C; 10CCB; # OLD HUNGARIAN CAPITAL LETTER EE +10C8C; C; 10CCC; # OLD HUNGARIAN CAPITAL LETTER EF +10C8D; C; 10CCD; # OLD HUNGARIAN CAPITAL LETTER EG +10C8E; C; 10CCE; # OLD HUNGARIAN CAPITAL LETTER EGY +10C8F; C; 10CCF; # OLD HUNGARIAN CAPITAL LETTER EH +10C90; C; 10CD0; # OLD HUNGARIAN CAPITAL LETTER I +10C91; C; 10CD1; # OLD HUNGARIAN CAPITAL LETTER II +10C92; C; 10CD2; # OLD HUNGARIAN CAPITAL LETTER EJ +10C93; C; 10CD3; # OLD HUNGARIAN CAPITAL LETTER EK +10C94; C; 10CD4; # OLD HUNGARIAN CAPITAL LETTER AK +10C95; C; 10CD5; # OLD HUNGARIAN CAPITAL LETTER UNK +10C96; C; 10CD6; # OLD HUNGARIAN CAPITAL LETTER EL +10C97; C; 10CD7; # OLD HUNGARIAN CAPITAL LETTER ELY +10C98; C; 10CD8; # OLD HUNGARIAN CAPITAL LETTER EM +10C99; C; 10CD9; # OLD HUNGARIAN CAPITAL LETTER EN +10C9A; C; 10CDA; # OLD HUNGARIAN CAPITAL LETTER ENY +10C9B; C; 10CDB; # OLD HUNGARIAN CAPITAL LETTER O +10C9C; C; 10CDC; # OLD HUNGARIAN CAPITAL LETTER OO +10C9D; C; 10CDD; # OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG OE +10C9E; C; 10CDE; # OLD HUNGARIAN CAPITAL LETTER RUDIMENTA OE +10C9F; C; 10CDF; # OLD HUNGARIAN CAPITAL LETTER OEE +10CA0; C; 10CE0; # OLD HUNGARIAN CAPITAL LETTER EP +10CA1; C; 10CE1; # OLD HUNGARIAN CAPITAL LETTER EMP +10CA2; C; 10CE2; # OLD HUNGARIAN CAPITAL LETTER ER +10CA3; C; 10CE3; # OLD HUNGARIAN CAPITAL LETTER SHORT ER +10CA4; C; 10CE4; # OLD HUNGARIAN CAPITAL LETTER ES +10CA5; C; 10CE5; # OLD HUNGARIAN CAPITAL LETTER ESZ +10CA6; C; 10CE6; # OLD HUNGARIAN CAPITAL LETTER ET +10CA7; C; 10CE7; # OLD HUNGARIAN CAPITAL LETTER ENT +10CA8; C; 10CE8; # OLD HUNGARIAN CAPITAL LETTER ETY +10CA9; C; 10CE9; # OLD HUNGARIAN CAPITAL LETTER ECH +10CAA; C; 10CEA; # OLD HUNGARIAN CAPITAL LETTER U +10CAB; C; 10CEB; # OLD HUNGARIAN CAPITAL LETTER UU +10CAC; C; 10CEC; # OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG UE +10CAD; C; 10CED; # OLD HUNGARIAN CAPITAL LETTER RUDIMENTA UE +10CAE; C; 10CEE; # OLD HUNGARIAN CAPITAL LETTER EV +10CAF; C; 10CEF; # OLD HUNGARIAN CAPITAL LETTER EZ +10CB0; C; 10CF0; # OLD HUNGARIAN CAPITAL LETTER EZS +10CB1; C; 10CF1; # OLD HUNGARIAN CAPITAL LETTER ENT-SHAPED SIGN +10CB2; C; 10CF2; # OLD HUNGARIAN CAPITAL LETTER US +118A0; C; 118C0; # WARANG CITI CAPITAL LETTER NGAA +118A1; C; 118C1; # WARANG CITI CAPITAL LETTER A +118A2; C; 118C2; # WARANG CITI CAPITAL LETTER WI +118A3; C; 118C3; # WARANG CITI CAPITAL LETTER YU +118A4; C; 118C4; # WARANG CITI CAPITAL LETTER YA +118A5; C; 118C5; # WARANG CITI CAPITAL LETTER YO +118A6; C; 118C6; # WARANG CITI CAPITAL LETTER II +118A7; C; 118C7; # WARANG CITI CAPITAL LETTER UU +118A8; C; 118C8; # WARANG CITI CAPITAL LETTER E +118A9; C; 118C9; # WARANG CITI CAPITAL LETTER O +118AA; C; 118CA; # WARANG CITI CAPITAL LETTER ANG +118AB; C; 118CB; # WARANG CITI CAPITAL LETTER GA +118AC; C; 118CC; # WARANG CITI CAPITAL LETTER KO +118AD; C; 118CD; # WARANG CITI CAPITAL LETTER ENY +118AE; C; 118CE; # WARANG CITI CAPITAL LETTER YUJ +118AF; C; 118CF; # WARANG CITI CAPITAL LETTER UC +118B0; C; 118D0; # WARANG CITI CAPITAL LETTER ENN +118B1; C; 118D1; # WARANG CITI CAPITAL LETTER ODD +118B2; C; 118D2; # WARANG CITI CAPITAL LETTER TTE +118B3; C; 118D3; # WARANG CITI CAPITAL LETTER NUNG +118B4; C; 118D4; # WARANG CITI CAPITAL LETTER DA +118B5; C; 118D5; # WARANG CITI CAPITAL LETTER AT +118B6; C; 118D6; # WARANG CITI CAPITAL LETTER AM +118B7; C; 118D7; # WARANG CITI CAPITAL LETTER BU +118B8; C; 118D8; # WARANG CITI CAPITAL LETTER PU +118B9; C; 118D9; # WARANG CITI CAPITAL LETTER HIYO +118BA; C; 118DA; # WARANG CITI CAPITAL LETTER HOLO +118BB; C; 118DB; # WARANG CITI CAPITAL LETTER HORR +118BC; C; 118DC; # WARANG CITI CAPITAL LETTER HAR +118BD; C; 118DD; # WARANG CITI CAPITAL LETTER SSUU +118BE; C; 118DE; # WARANG CITI CAPITAL LETTER SII +118BF; C; 118DF; # WARANG CITI CAPITAL LETTER VIYO +# +# EOF diff --git a/src/third_party/unicode-8.0.0/PropList.txt b/src/third_party/unicode-8.0.0/PropList.txt new file mode 100644 index 00000000000..2eb2926e072 --- /dev/null +++ b/src/third_party/unicode-8.0.0/PropList.txt @@ -0,0 +1,1525 @@ +# PropList-8.0.0.txt +# Date: 2015-05-16, 17:50:38 GMT [MD] +# +# Unicode Character Database +# Copyright (c) 1991-2015 Unicode, Inc. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# For documentation, see http://www.unicode.org/reports/tr44/ + +# ================================================ + +0009..000D ; White_Space # Cc [5] <control-0009>..<control-000D> +0020 ; White_Space # Zs SPACE +0085 ; White_Space # Cc <control-0085> +00A0 ; White_Space # Zs NO-BREAK SPACE +1680 ; White_Space # Zs OGHAM SPACE MARK +2000..200A ; White_Space # Zs [11] EN QUAD..HAIR SPACE +2028 ; White_Space # Zl LINE SEPARATOR +2029 ; White_Space # Zp PARAGRAPH SEPARATOR +202F ; White_Space # Zs NARROW NO-BREAK SPACE +205F ; White_Space # Zs MEDIUM MATHEMATICAL SPACE +3000 ; White_Space # Zs IDEOGRAPHIC SPACE + +# Total code points: 25 + +# ================================================ + +061C ; Bidi_Control # Cf ARABIC LETTER MARK +200E..200F ; Bidi_Control # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK +202A..202E ; Bidi_Control # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE +2066..2069 ; Bidi_Control # Cf [4] LEFT-TO-RIGHT ISOLATE..POP DIRECTIONAL ISOLATE + +# Total code points: 12 + +# ================================================ + +200C..200D ; Join_Control # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER + +# Total code points: 2 + +# ================================================ + +002D ; Dash # Pd HYPHEN-MINUS +058A ; Dash # Pd ARMENIAN HYPHEN +05BE ; Dash # Pd HEBREW PUNCTUATION MAQAF +1400 ; Dash # Pd CANADIAN SYLLABICS HYPHEN +1806 ; Dash # Pd MONGOLIAN TODO SOFT HYPHEN +2010..2015 ; Dash # Pd [6] HYPHEN..HORIZONTAL BAR +2053 ; Dash # Po SWUNG DASH +207B ; Dash # Sm SUPERSCRIPT MINUS +208B ; Dash # Sm SUBSCRIPT MINUS +2212 ; Dash # Sm MINUS SIGN +2E17 ; Dash # Pd DOUBLE OBLIQUE HYPHEN +2E1A ; Dash # Pd HYPHEN WITH DIAERESIS +2E3A..2E3B ; Dash # Pd [2] TWO-EM DASH..THREE-EM DASH +2E40 ; Dash # Pd DOUBLE HYPHEN +301C ; Dash # Pd WAVE DASH +3030 ; Dash # Pd WAVY DASH +30A0 ; Dash # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN +FE31..FE32 ; Dash # Pd [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH +FE58 ; Dash # Pd SMALL EM DASH +FE63 ; Dash # Pd SMALL HYPHEN-MINUS +FF0D ; Dash # Pd FULLWIDTH HYPHEN-MINUS + +# Total code points: 28 + +# ================================================ + +002D ; Hyphen # Pd HYPHEN-MINUS +00AD ; Hyphen # Cf SOFT HYPHEN +058A ; Hyphen # Pd ARMENIAN HYPHEN +1806 ; Hyphen # Pd MONGOLIAN TODO SOFT HYPHEN +2010..2011 ; Hyphen # Pd [2] HYPHEN..NON-BREAKING HYPHEN +2E17 ; Hyphen # Pd DOUBLE OBLIQUE HYPHEN +30FB ; Hyphen # Po KATAKANA MIDDLE DOT +FE63 ; Hyphen # Pd SMALL HYPHEN-MINUS +FF0D ; Hyphen # Pd FULLWIDTH HYPHEN-MINUS +FF65 ; Hyphen # Po HALFWIDTH KATAKANA MIDDLE DOT + +# Total code points: 11 + +# ================================================ + +0022 ; Quotation_Mark # Po QUOTATION MARK +0027 ; Quotation_Mark # Po APOSTROPHE +00AB ; Quotation_Mark # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +00BB ; Quotation_Mark # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +2018 ; Quotation_Mark # Pi LEFT SINGLE QUOTATION MARK +2019 ; Quotation_Mark # Pf RIGHT SINGLE QUOTATION MARK +201A ; Quotation_Mark # Ps SINGLE LOW-9 QUOTATION MARK +201B..201C ; Quotation_Mark # Pi [2] SINGLE HIGH-REVERSED-9 QUOTATION MARK..LEFT DOUBLE QUOTATION MARK +201D ; Quotation_Mark # Pf RIGHT DOUBLE QUOTATION MARK +201E ; Quotation_Mark # Ps DOUBLE LOW-9 QUOTATION MARK +201F ; Quotation_Mark # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK +2039 ; Quotation_Mark # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK +203A ; Quotation_Mark # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +2E42 ; Quotation_Mark # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK +300C ; Quotation_Mark # Ps LEFT CORNER BRACKET +300D ; Quotation_Mark # Pe RIGHT CORNER BRACKET +300E ; Quotation_Mark # Ps LEFT WHITE CORNER BRACKET +300F ; Quotation_Mark # Pe RIGHT WHITE CORNER BRACKET +301D ; Quotation_Mark # Ps REVERSED DOUBLE PRIME QUOTATION MARK +301E..301F ; Quotation_Mark # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK +FE41 ; Quotation_Mark # Ps PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET +FE42 ; Quotation_Mark # Pe PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET +FE43 ; Quotation_Mark # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET +FE44 ; Quotation_Mark # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET +FF02 ; Quotation_Mark # Po FULLWIDTH QUOTATION MARK +FF07 ; Quotation_Mark # Po FULLWIDTH APOSTROPHE +FF62 ; Quotation_Mark # Ps HALFWIDTH LEFT CORNER BRACKET +FF63 ; Quotation_Mark # Pe HALFWIDTH RIGHT CORNER BRACKET + +# Total code points: 30 + +# ================================================ + +0021 ; Terminal_Punctuation # Po EXCLAMATION MARK +002C ; Terminal_Punctuation # Po COMMA +002E ; Terminal_Punctuation # Po FULL STOP +003A..003B ; Terminal_Punctuation # Po [2] COLON..SEMICOLON +003F ; Terminal_Punctuation # Po QUESTION MARK +037E ; Terminal_Punctuation # Po GREEK QUESTION MARK +0387 ; Terminal_Punctuation # Po GREEK ANO TELEIA +0589 ; Terminal_Punctuation # Po ARMENIAN FULL STOP +05C3 ; Terminal_Punctuation # Po HEBREW PUNCTUATION SOF PASUQ +060C ; Terminal_Punctuation # Po ARABIC COMMA +061B ; Terminal_Punctuation # Po ARABIC SEMICOLON +061F ; Terminal_Punctuation # Po ARABIC QUESTION MARK +06D4 ; Terminal_Punctuation # Po ARABIC FULL STOP +0700..070A ; Terminal_Punctuation # Po [11] SYRIAC END OF PARAGRAPH..SYRIAC CONTRACTION +070C ; Terminal_Punctuation # Po SYRIAC HARKLEAN METOBELUS +07F8..07F9 ; Terminal_Punctuation # Po [2] NKO COMMA..NKO EXCLAMATION MARK +0830..083E ; Terminal_Punctuation # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU +085E ; Terminal_Punctuation # Po MANDAIC PUNCTUATION +0964..0965 ; Terminal_Punctuation # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA +0E5A..0E5B ; Terminal_Punctuation # Po [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT +0F08 ; Terminal_Punctuation # Po TIBETAN MARK SBRUL SHAD +0F0D..0F12 ; Terminal_Punctuation # Po [6] TIBETAN MARK SHAD..TIBETAN MARK RGYA GRAM SHAD +104A..104B ; Terminal_Punctuation # Po [2] MYANMAR SIGN LITTLE SECTION..MYANMAR SIGN SECTION +1361..1368 ; Terminal_Punctuation # Po [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR +166D..166E ; Terminal_Punctuation # Po [2] CANADIAN SYLLABICS CHI SIGN..CANADIAN SYLLABICS FULL STOP +16EB..16ED ; Terminal_Punctuation # Po [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION +1735..1736 ; Terminal_Punctuation # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION +17D4..17D6 ; Terminal_Punctuation # Po [3] KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH +17DA ; Terminal_Punctuation # Po KHMER SIGN KOOMUUT +1802..1805 ; Terminal_Punctuation # Po [4] MONGOLIAN COMMA..MONGOLIAN FOUR DOTS +1808..1809 ; Terminal_Punctuation # Po [2] MONGOLIAN MANCHU COMMA..MONGOLIAN MANCHU FULL STOP +1944..1945 ; Terminal_Punctuation # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK +1AA8..1AAB ; Terminal_Punctuation # Po [4] TAI THAM SIGN KAAN..TAI THAM SIGN SATKAANKUU +1B5A..1B5B ; Terminal_Punctuation # Po [2] BALINESE PANTI..BALINESE PAMADA +1B5D..1B5F ; Terminal_Punctuation # Po [3] BALINESE CARIK PAMUNGKAH..BALINESE CARIK PAREREN +1C3B..1C3F ; Terminal_Punctuation # Po [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK +1C7E..1C7F ; Terminal_Punctuation # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD +203C..203D ; Terminal_Punctuation # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG +2047..2049 ; Terminal_Punctuation # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK +2E2E ; Terminal_Punctuation # Po REVERSED QUESTION MARK +2E3C ; Terminal_Punctuation # Po STENOGRAPHIC FULL STOP +2E41 ; Terminal_Punctuation # Po REVERSED COMMA +3001..3002 ; Terminal_Punctuation # Po [2] IDEOGRAPHIC COMMA..IDEOGRAPHIC FULL STOP +A4FE..A4FF ; Terminal_Punctuation # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP +A60D..A60F ; Terminal_Punctuation # Po [3] VAI COMMA..VAI QUESTION MARK +A6F3..A6F7 ; Terminal_Punctuation # Po [5] BAMUM FULL STOP..BAMUM QUESTION MARK +A876..A877 ; Terminal_Punctuation # Po [2] PHAGS-PA MARK SHAD..PHAGS-PA MARK DOUBLE SHAD +A8CE..A8CF ; Terminal_Punctuation # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA +A92F ; Terminal_Punctuation # Po KAYAH LI SIGN SHYA +A9C7..A9C9 ; Terminal_Punctuation # Po [3] JAVANESE PADA PANGKAT..JAVANESE PADA LUNGSI +AA5D..AA5F ; Terminal_Punctuation # Po [3] CHAM PUNCTUATION DANDA..CHAM PUNCTUATION TRIPLE DANDA +AADF ; Terminal_Punctuation # Po TAI VIET SYMBOL KOI KOI +AAF0..AAF1 ; Terminal_Punctuation # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM +ABEB ; Terminal_Punctuation # Po MEETEI MAYEK CHEIKHEI +FE50..FE52 ; Terminal_Punctuation # Po [3] SMALL COMMA..SMALL FULL STOP +FE54..FE57 ; Terminal_Punctuation # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK +FF01 ; Terminal_Punctuation # Po FULLWIDTH EXCLAMATION MARK +FF0C ; Terminal_Punctuation # Po FULLWIDTH COMMA +FF0E ; Terminal_Punctuation # Po FULLWIDTH FULL STOP +FF1A..FF1B ; Terminal_Punctuation # Po [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON +FF1F ; Terminal_Punctuation # Po FULLWIDTH QUESTION MARK +FF61 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC FULL STOP +FF64 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA +1039F ; Terminal_Punctuation # Po UGARITIC WORD DIVIDER +103D0 ; Terminal_Punctuation # Po OLD PERSIAN WORD DIVIDER +10857 ; Terminal_Punctuation # Po IMPERIAL ARAMAIC SECTION SIGN +1091F ; Terminal_Punctuation # Po PHOENICIAN WORD SEPARATOR +10A56..10A57 ; Terminal_Punctuation # Po [2] KHAROSHTHI PUNCTUATION DANDA..KHAROSHTHI PUNCTUATION DOUBLE DANDA +10AF0..10AF5 ; Terminal_Punctuation # Po [6] MANICHAEAN PUNCTUATION STAR..MANICHAEAN PUNCTUATION TWO DOTS +10B3A..10B3F ; Terminal_Punctuation # Po [6] TINY TWO DOTS OVER ONE DOT PUNCTUATION..LARGE ONE RING OVER TWO RINGS PUNCTUATION +10B99..10B9C ; Terminal_Punctuation # Po [4] PSALTER PAHLAVI SECTION MARK..PSALTER PAHLAVI FOUR DOTS WITH DOT +11047..1104D ; Terminal_Punctuation # Po [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS +110BE..110C1 ; Terminal_Punctuation # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +11141..11143 ; Terminal_Punctuation # Po [3] CHAKMA DANDA..CHAKMA QUESTION MARK +111C5..111C6 ; Terminal_Punctuation # Po [2] SHARADA DANDA..SHARADA DOUBLE DANDA +111CD ; Terminal_Punctuation # Po SHARADA SUTRA MARK +111DE..111DF ; Terminal_Punctuation # Po [2] SHARADA SECTION MARK-1..SHARADA SECTION MARK-2 +11238..1123C ; Terminal_Punctuation # Po [5] KHOJKI DANDA..KHOJKI DOUBLE SECTION MARK +112A9 ; Terminal_Punctuation # Po MULTANI SECTION MARK +115C2..115C5 ; Terminal_Punctuation # Po [4] SIDDHAM DANDA..SIDDHAM SEPARATOR BAR +115C9..115D7 ; Terminal_Punctuation # Po [15] SIDDHAM END OF TEXT MARK..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES +11641..11642 ; Terminal_Punctuation # Po [2] MODI DANDA..MODI DOUBLE DANDA +1173C..1173E ; Terminal_Punctuation # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI +12470..12474 ; Terminal_Punctuation # Po [5] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON +16A6E..16A6F ; Terminal_Punctuation # Po [2] MRO DANDA..MRO DOUBLE DANDA +16AF5 ; Terminal_Punctuation # Po BASSA VAH FULL STOP +16B37..16B39 ; Terminal_Punctuation # Po [3] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN CIM CHEEM +16B44 ; Terminal_Punctuation # Po PAHAWH HMONG SIGN XAUS +1BC9F ; Terminal_Punctuation # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP +1DA87..1DA8A ; Terminal_Punctuation # Po [4] SIGNWRITING COMMA..SIGNWRITING COLON + +# Total code points: 238 + +# ================================================ + +005E ; Other_Math # Sk CIRCUMFLEX ACCENT +03D0..03D2 ; Other_Math # L& [3] GREEK BETA SYMBOL..GREEK UPSILON WITH HOOK SYMBOL +03D5 ; Other_Math # L& GREEK PHI SYMBOL +03F0..03F1 ; Other_Math # L& [2] GREEK KAPPA SYMBOL..GREEK RHO SYMBOL +03F4..03F5 ; Other_Math # L& [2] GREEK CAPITAL THETA SYMBOL..GREEK LUNATE EPSILON SYMBOL +2016 ; Other_Math # Po DOUBLE VERTICAL LINE +2032..2034 ; Other_Math # Po [3] PRIME..TRIPLE PRIME +2040 ; Other_Math # Pc CHARACTER TIE +2061..2064 ; Other_Math # Cf [4] FUNCTION APPLICATION..INVISIBLE PLUS +207D ; Other_Math # Ps SUPERSCRIPT LEFT PARENTHESIS +207E ; Other_Math # Pe SUPERSCRIPT RIGHT PARENTHESIS +208D ; Other_Math # Ps SUBSCRIPT LEFT PARENTHESIS +208E ; Other_Math # Pe SUBSCRIPT RIGHT PARENTHESIS +20D0..20DC ; Other_Math # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE +20E1 ; Other_Math # Mn COMBINING LEFT RIGHT ARROW ABOVE +20E5..20E6 ; Other_Math # Mn [2] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING DOUBLE VERTICAL STROKE OVERLAY +20EB..20EF ; Other_Math # Mn [5] COMBINING LONG DOUBLE SOLIDUS OVERLAY..COMBINING RIGHT ARROW BELOW +2102 ; Other_Math # L& DOUBLE-STRUCK CAPITAL C +2107 ; Other_Math # L& EULER CONSTANT +210A..2113 ; Other_Math # L& [10] SCRIPT SMALL G..SCRIPT SMALL L +2115 ; Other_Math # L& DOUBLE-STRUCK CAPITAL N +2119..211D ; Other_Math # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +2124 ; Other_Math # L& DOUBLE-STRUCK CAPITAL Z +2128 ; Other_Math # L& BLACK-LETTER CAPITAL Z +2129 ; Other_Math # So TURNED GREEK SMALL LETTER IOTA +212C..212D ; Other_Math # L& [2] SCRIPT CAPITAL B..BLACK-LETTER CAPITAL C +212F..2131 ; Other_Math # L& [3] SCRIPT SMALL E..SCRIPT CAPITAL F +2133..2134 ; Other_Math # L& [2] SCRIPT CAPITAL M..SCRIPT SMALL O +2135..2138 ; Other_Math # Lo [4] ALEF SYMBOL..DALET SYMBOL +213C..213F ; Other_Math # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI +2145..2149 ; Other_Math # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J +2195..2199 ; Other_Math # So [5] UP DOWN ARROW..SOUTH WEST ARROW +219C..219F ; Other_Math # So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW +21A1..21A2 ; Other_Math # So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL +21A4..21A5 ; Other_Math # So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR +21A7 ; Other_Math # So DOWNWARDS ARROW FROM BAR +21A9..21AD ; Other_Math # So [5] LEFTWARDS ARROW WITH HOOK..LEFT RIGHT WAVE ARROW +21B0..21B1 ; Other_Math # So [2] UPWARDS ARROW WITH TIP LEFTWARDS..UPWARDS ARROW WITH TIP RIGHTWARDS +21B6..21B7 ; Other_Math # So [2] ANTICLOCKWISE TOP SEMICIRCLE ARROW..CLOCKWISE TOP SEMICIRCLE ARROW +21BC..21CD ; Other_Math # So [18] LEFTWARDS HARPOON WITH BARB UPWARDS..LEFTWARDS DOUBLE ARROW WITH STROKE +21D0..21D1 ; Other_Math # So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW +21D3 ; Other_Math # So DOWNWARDS DOUBLE ARROW +21D5..21DB ; Other_Math # So [7] UP DOWN DOUBLE ARROW..RIGHTWARDS TRIPLE ARROW +21DD ; Other_Math # So RIGHTWARDS SQUIGGLE ARROW +21E4..21E5 ; Other_Math # So [2] LEFTWARDS ARROW TO BAR..RIGHTWARDS ARROW TO BAR +2308 ; Other_Math # Ps LEFT CEILING +2309 ; Other_Math # Pe RIGHT CEILING +230A ; Other_Math # Ps LEFT FLOOR +230B ; Other_Math # Pe RIGHT FLOOR +23B4..23B5 ; Other_Math # So [2] TOP SQUARE BRACKET..BOTTOM SQUARE BRACKET +23B7 ; Other_Math # So RADICAL SYMBOL BOTTOM +23D0 ; Other_Math # So VERTICAL LINE EXTENSION +23E2 ; Other_Math # So WHITE TRAPEZIUM +25A0..25A1 ; Other_Math # So [2] BLACK SQUARE..WHITE SQUARE +25AE..25B6 ; Other_Math # So [9] BLACK VERTICAL RECTANGLE..BLACK RIGHT-POINTING TRIANGLE +25BC..25C0 ; Other_Math # So [5] BLACK DOWN-POINTING TRIANGLE..BLACK LEFT-POINTING TRIANGLE +25C6..25C7 ; Other_Math # So [2] BLACK DIAMOND..WHITE DIAMOND +25CA..25CB ; Other_Math # So [2] LOZENGE..WHITE CIRCLE +25CF..25D3 ; Other_Math # So [5] BLACK CIRCLE..CIRCLE WITH UPPER HALF BLACK +25E2 ; Other_Math # So BLACK LOWER RIGHT TRIANGLE +25E4 ; Other_Math # So BLACK UPPER LEFT TRIANGLE +25E7..25EC ; Other_Math # So [6] SQUARE WITH LEFT HALF BLACK..WHITE UP-POINTING TRIANGLE WITH DOT +2605..2606 ; Other_Math # So [2] BLACK STAR..WHITE STAR +2640 ; Other_Math # So FEMALE SIGN +2642 ; Other_Math # So MALE SIGN +2660..2663 ; Other_Math # So [4] BLACK SPADE SUIT..BLACK CLUB SUIT +266D..266E ; Other_Math # So [2] MUSIC FLAT SIGN..MUSIC NATURAL SIGN +27C5 ; Other_Math # Ps LEFT S-SHAPED BAG DELIMITER +27C6 ; Other_Math # Pe RIGHT S-SHAPED BAG DELIMITER +27E6 ; Other_Math # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET +27E7 ; Other_Math # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET +27E8 ; Other_Math # Ps MATHEMATICAL LEFT ANGLE BRACKET +27E9 ; Other_Math # Pe MATHEMATICAL RIGHT ANGLE BRACKET +27EA ; Other_Math # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET +27EB ; Other_Math # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET +27EC ; Other_Math # Ps MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET +27ED ; Other_Math # Pe MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET +27EE ; Other_Math # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS +27EF ; Other_Math # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS +2983 ; Other_Math # Ps LEFT WHITE CURLY BRACKET +2984 ; Other_Math # Pe RIGHT WHITE CURLY BRACKET +2985 ; Other_Math # Ps LEFT WHITE PARENTHESIS +2986 ; Other_Math # Pe RIGHT WHITE PARENTHESIS +2987 ; Other_Math # Ps Z NOTATION LEFT IMAGE BRACKET +2988 ; Other_Math # Pe Z NOTATION RIGHT IMAGE BRACKET +2989 ; Other_Math # Ps Z NOTATION LEFT BINDING BRACKET +298A ; Other_Math # Pe Z NOTATION RIGHT BINDING BRACKET +298B ; Other_Math # Ps LEFT SQUARE BRACKET WITH UNDERBAR +298C ; Other_Math # Pe RIGHT SQUARE BRACKET WITH UNDERBAR +298D ; Other_Math # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER +298E ; Other_Math # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +298F ; Other_Math # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +2990 ; Other_Math # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER +2991 ; Other_Math # Ps LEFT ANGLE BRACKET WITH DOT +2992 ; Other_Math # Pe RIGHT ANGLE BRACKET WITH DOT +2993 ; Other_Math # Ps LEFT ARC LESS-THAN BRACKET +2994 ; Other_Math # Pe RIGHT ARC GREATER-THAN BRACKET +2995 ; Other_Math # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET +2996 ; Other_Math # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET +2997 ; Other_Math # Ps LEFT BLACK TORTOISE SHELL BRACKET +2998 ; Other_Math # Pe RIGHT BLACK TORTOISE SHELL BRACKET +29D8 ; Other_Math # Ps LEFT WIGGLY FENCE +29D9 ; Other_Math # Pe RIGHT WIGGLY FENCE +29DA ; Other_Math # Ps LEFT DOUBLE WIGGLY FENCE +29DB ; Other_Math # Pe RIGHT DOUBLE WIGGLY FENCE +29FC ; Other_Math # Ps LEFT-POINTING CURVED ANGLE BRACKET +29FD ; Other_Math # Pe RIGHT-POINTING CURVED ANGLE BRACKET +FE61 ; Other_Math # Po SMALL ASTERISK +FE63 ; Other_Math # Pd SMALL HYPHEN-MINUS +FE68 ; Other_Math # Po SMALL REVERSE SOLIDUS +FF3C ; Other_Math # Po FULLWIDTH REVERSE SOLIDUS +FF3E ; Other_Math # Sk FULLWIDTH CIRCUMFLEX ACCENT +1D400..1D454 ; Other_Math # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G +1D456..1D49C ; Other_Math # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F ; Other_Math # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2 ; Other_Math # L& MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6 ; Other_Math # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC ; Other_Math # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B9 ; Other_Math # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D +1D4BB ; Other_Math # L& MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3 ; Other_Math # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D505 ; Other_Math # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A ; Other_Math # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514 ; Other_Math # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C ; Other_Math # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D51E..1D539 ; Other_Math # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E ; Other_Math # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544 ; Other_Math # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546 ; Other_Math # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550 ; Other_Math # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D552..1D6A5 ; Other_Math # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J +1D6A8..1D6C0 ; Other_Math # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6C2..1D6DA ; Other_Math # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DC..1D6FA ; Other_Math # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA +1D6FC..1D714 ; Other_Math # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D716..1D734 ; Other_Math # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D736..1D74E ; Other_Math # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D750..1D76E ; Other_Math # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D770..1D788 ; Other_Math # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D78A..1D7A8 ; Other_Math # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7AA..1D7C2 ; Other_Math # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C4..1D7CB ; Other_Math # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1D7CE..1D7FF ; Other_Math # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1EE00..1EE03 ; Other_Math # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; Other_Math # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; Other_Math # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; Other_Math # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; Other_Math # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; Other_Math # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; Other_Math # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; Other_Math # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; Other_Math # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; Other_Math # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; Other_Math # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; Other_Math # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; Other_Math # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; Other_Math # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; Other_Math # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; Other_Math # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; Other_Math # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; Other_Math # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; Other_Math # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; Other_Math # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; Other_Math # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; Other_Math # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; Other_Math # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN + +# Total code points: 1362 + +# ================================================ + +0030..0039 ; Hex_Digit # Nd [10] DIGIT ZERO..DIGIT NINE +0041..0046 ; Hex_Digit # L& [6] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER F +0061..0066 ; Hex_Digit # L& [6] LATIN SMALL LETTER A..LATIN SMALL LETTER F +FF10..FF19 ; Hex_Digit # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE +FF21..FF26 ; Hex_Digit # L& [6] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER F +FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER F + +# Total code points: 44 + +# ================================================ + +0030..0039 ; ASCII_Hex_Digit # Nd [10] DIGIT ZERO..DIGIT NINE +0041..0046 ; ASCII_Hex_Digit # L& [6] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER F +0061..0066 ; ASCII_Hex_Digit # L& [6] LATIN SMALL LETTER A..LATIN SMALL LETTER F + +# Total code points: 22 + +# ================================================ + +0345 ; Other_Alphabetic # Mn COMBINING GREEK YPOGEGRAMMENI +05B0..05BD ; Other_Alphabetic # Mn [14] HEBREW POINT SHEVA..HEBREW POINT METEG +05BF ; Other_Alphabetic # Mn HEBREW POINT RAFE +05C1..05C2 ; Other_Alphabetic # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C4..05C5 ; Other_Alphabetic # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT +05C7 ; Other_Alphabetic # Mn HEBREW POINT QAMATS QATAN +0610..061A ; Other_Alphabetic # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA +064B..0657 ; Other_Alphabetic # Mn [13] ARABIC FATHATAN..ARABIC INVERTED DAMMA +0659..065F ; Other_Alphabetic # Mn [7] ARABIC ZWARAKAY..ARABIC WAVY HAMZA BELOW +0670 ; Other_Alphabetic # Mn ARABIC LETTER SUPERSCRIPT ALEF +06D6..06DC ; Other_Alphabetic # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN +06E1..06E4 ; Other_Alphabetic # Mn [4] ARABIC SMALL HIGH DOTLESS HEAD OF KHAH..ARABIC SMALL HIGH MADDA +06E7..06E8 ; Other_Alphabetic # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON +06ED ; Other_Alphabetic # Mn ARABIC SMALL LOW MEEM +0711 ; Other_Alphabetic # Mn SYRIAC LETTER SUPERSCRIPT ALAPH +0730..073F ; Other_Alphabetic # Mn [16] SYRIAC PTHAHA ABOVE..SYRIAC RWAHA +07A6..07B0 ; Other_Alphabetic # Mn [11] THAANA ABAFILI..THAANA SUKUN +0816..0817 ; Other_Alphabetic # Mn [2] SAMARITAN MARK IN..SAMARITAN MARK IN-ALAF +081B..0823 ; Other_Alphabetic # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A +0825..0827 ; Other_Alphabetic # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U +0829..082C ; Other_Alphabetic # Mn [4] SAMARITAN VOWEL SIGN LONG I..SAMARITAN VOWEL SIGN SUKUN +08E3..08E9 ; Other_Alphabetic # Mn [7] ARABIC TURNED DAMMA BELOW..ARABIC CURLY KASRATAN +08F0..0902 ; Other_Alphabetic # Mn [19] ARABIC OPEN FATHATAN..DEVANAGARI SIGN ANUSVARA +0903 ; Other_Alphabetic # Mc DEVANAGARI SIGN VISARGA +093A ; Other_Alphabetic # Mn DEVANAGARI VOWEL SIGN OE +093B ; Other_Alphabetic # Mc DEVANAGARI VOWEL SIGN OOE +093E..0940 ; Other_Alphabetic # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II +0941..0948 ; Other_Alphabetic # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI +0949..094C ; Other_Alphabetic # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU +094E..094F ; Other_Alphabetic # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW +0955..0957 ; Other_Alphabetic # Mn [3] DEVANAGARI VOWEL SIGN CANDRA LONG E..DEVANAGARI VOWEL SIGN UUE +0962..0963 ; Other_Alphabetic # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL +0981 ; Other_Alphabetic # Mn BENGALI SIGN CANDRABINDU +0982..0983 ; Other_Alphabetic # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA +09BE..09C0 ; Other_Alphabetic # Mc [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II +09C1..09C4 ; Other_Alphabetic # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR +09C7..09C8 ; Other_Alphabetic # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI +09CB..09CC ; Other_Alphabetic # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU +09D7 ; Other_Alphabetic # Mc BENGALI AU LENGTH MARK +09E2..09E3 ; Other_Alphabetic # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL +0A01..0A02 ; Other_Alphabetic # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI +0A03 ; Other_Alphabetic # Mc GURMUKHI SIGN VISARGA +0A3E..0A40 ; Other_Alphabetic # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II +0A41..0A42 ; Other_Alphabetic # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU +0A47..0A48 ; Other_Alphabetic # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI +0A4B..0A4C ; Other_Alphabetic # Mn [2] GURMUKHI VOWEL SIGN OO..GURMUKHI VOWEL SIGN AU +0A51 ; Other_Alphabetic # Mn GURMUKHI SIGN UDAAT +0A70..0A71 ; Other_Alphabetic # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK +0A75 ; Other_Alphabetic # Mn GURMUKHI SIGN YAKASH +0A81..0A82 ; Other_Alphabetic # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA +0A83 ; Other_Alphabetic # Mc GUJARATI SIGN VISARGA +0ABE..0AC0 ; Other_Alphabetic # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II +0AC1..0AC5 ; Other_Alphabetic # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E +0AC7..0AC8 ; Other_Alphabetic # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI +0AC9 ; Other_Alphabetic # Mc GUJARATI VOWEL SIGN CANDRA O +0ACB..0ACC ; Other_Alphabetic # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU +0AE2..0AE3 ; Other_Alphabetic # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0B01 ; Other_Alphabetic # Mn ORIYA SIGN CANDRABINDU +0B02..0B03 ; Other_Alphabetic # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA +0B3E ; Other_Alphabetic # Mc ORIYA VOWEL SIGN AA +0B3F ; Other_Alphabetic # Mn ORIYA VOWEL SIGN I +0B40 ; Other_Alphabetic # Mc ORIYA VOWEL SIGN II +0B41..0B44 ; Other_Alphabetic # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR +0B47..0B48 ; Other_Alphabetic # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI +0B4B..0B4C ; Other_Alphabetic # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU +0B56 ; Other_Alphabetic # Mn ORIYA AI LENGTH MARK +0B57 ; Other_Alphabetic # Mc ORIYA AU LENGTH MARK +0B62..0B63 ; Other_Alphabetic # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL +0B82 ; Other_Alphabetic # Mn TAMIL SIGN ANUSVARA +0BBE..0BBF ; Other_Alphabetic # Mc [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I +0BC0 ; Other_Alphabetic # Mn TAMIL VOWEL SIGN II +0BC1..0BC2 ; Other_Alphabetic # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU +0BC6..0BC8 ; Other_Alphabetic # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI +0BCA..0BCC ; Other_Alphabetic # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU +0BD7 ; Other_Alphabetic # Mc TAMIL AU LENGTH MARK +0C00 ; Other_Alphabetic # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE +0C01..0C03 ; Other_Alphabetic # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA +0C3E..0C40 ; Other_Alphabetic # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II +0C41..0C44 ; Other_Alphabetic # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR +0C46..0C48 ; Other_Alphabetic # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI +0C4A..0C4C ; Other_Alphabetic # Mn [3] TELUGU VOWEL SIGN O..TELUGU VOWEL SIGN AU +0C55..0C56 ; Other_Alphabetic # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK +0C62..0C63 ; Other_Alphabetic # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL +0C81 ; Other_Alphabetic # Mn KANNADA SIGN CANDRABINDU +0C82..0C83 ; Other_Alphabetic # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA +0CBE ; Other_Alphabetic # Mc KANNADA VOWEL SIGN AA +0CBF ; Other_Alphabetic # Mn KANNADA VOWEL SIGN I +0CC0..0CC4 ; Other_Alphabetic # Mc [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR +0CC6 ; Other_Alphabetic # Mn KANNADA VOWEL SIGN E +0CC7..0CC8 ; Other_Alphabetic # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI +0CCA..0CCB ; Other_Alphabetic # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO +0CCC ; Other_Alphabetic # Mn KANNADA VOWEL SIGN AU +0CD5..0CD6 ; Other_Alphabetic # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0CE2..0CE3 ; Other_Alphabetic # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL +0D01 ; Other_Alphabetic # Mn MALAYALAM SIGN CANDRABINDU +0D02..0D03 ; Other_Alphabetic # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA +0D3E..0D40 ; Other_Alphabetic # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II +0D41..0D44 ; Other_Alphabetic # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR +0D46..0D48 ; Other_Alphabetic # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI +0D4A..0D4C ; Other_Alphabetic # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU +0D57 ; Other_Alphabetic # Mc MALAYALAM AU LENGTH MARK +0D62..0D63 ; Other_Alphabetic # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL +0D82..0D83 ; Other_Alphabetic # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA +0DCF..0DD1 ; Other_Alphabetic # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA +0DD2..0DD4 ; Other_Alphabetic # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA +0DD6 ; Other_Alphabetic # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA +0DD8..0DDF ; Other_Alphabetic # Mc [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA +0DF2..0DF3 ; Other_Alphabetic # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA +0E31 ; Other_Alphabetic # Mn THAI CHARACTER MAI HAN-AKAT +0E34..0E3A ; Other_Alphabetic # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU +0E4D ; Other_Alphabetic # Mn THAI CHARACTER NIKHAHIT +0EB1 ; Other_Alphabetic # Mn LAO VOWEL SIGN MAI KAN +0EB4..0EB9 ; Other_Alphabetic # Mn [6] LAO VOWEL SIGN I..LAO VOWEL SIGN UU +0EBB..0EBC ; Other_Alphabetic # Mn [2] LAO VOWEL SIGN MAI KON..LAO SEMIVOWEL SIGN LO +0ECD ; Other_Alphabetic # Mn LAO NIGGAHITA +0F71..0F7E ; Other_Alphabetic # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO +0F7F ; Other_Alphabetic # Mc TIBETAN SIGN RNAM BCAD +0F80..0F81 ; Other_Alphabetic # Mn [2] TIBETAN VOWEL SIGN REVERSED I..TIBETAN VOWEL SIGN REVERSED II +0F8D..0F97 ; Other_Alphabetic # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA +0F99..0FBC ; Other_Alphabetic # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA +102B..102C ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA +102D..1030 ; Other_Alphabetic # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU +1031 ; Other_Alphabetic # Mc MYANMAR VOWEL SIGN E +1032..1036 ; Other_Alphabetic # Mn [5] MYANMAR VOWEL SIGN AI..MYANMAR SIGN ANUSVARA +1038 ; Other_Alphabetic # Mc MYANMAR SIGN VISARGA +103B..103C ; Other_Alphabetic # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA +103D..103E ; Other_Alphabetic # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA +1056..1057 ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR +1058..1059 ; Other_Alphabetic # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL +105E..1060 ; Other_Alphabetic # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA +1062 ; Other_Alphabetic # Mc MYANMAR VOWEL SIGN SGAW KAREN EU +1067..1068 ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR VOWEL SIGN WESTERN PWO KAREN UE +1071..1074 ; Other_Alphabetic # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE +1082 ; Other_Alphabetic # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA +1083..1084 ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E +1085..1086 ; Other_Alphabetic # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y +109C ; Other_Alphabetic # Mc MYANMAR VOWEL SIGN AITON A +109D ; Other_Alphabetic # Mn MYANMAR VOWEL SIGN AITON AI +135F ; Other_Alphabetic # Mn ETHIOPIC COMBINING GEMINATION MARK +1712..1713 ; Other_Alphabetic # Mn [2] TAGALOG VOWEL SIGN I..TAGALOG VOWEL SIGN U +1732..1733 ; Other_Alphabetic # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U +1752..1753 ; Other_Alphabetic # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U +1772..1773 ; Other_Alphabetic # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +17B6 ; Other_Alphabetic # Mc KHMER VOWEL SIGN AA +17B7..17BD ; Other_Alphabetic # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA +17BE..17C5 ; Other_Alphabetic # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU +17C6 ; Other_Alphabetic # Mn KHMER SIGN NIKAHIT +17C7..17C8 ; Other_Alphabetic # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU +18A9 ; Other_Alphabetic # Mn MONGOLIAN LETTER ALI GALI DAGALGA +1920..1922 ; Other_Alphabetic # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U +1923..1926 ; Other_Alphabetic # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU +1927..1928 ; Other_Alphabetic # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O +1929..192B ; Other_Alphabetic # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA +1930..1931 ; Other_Alphabetic # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA +1932 ; Other_Alphabetic # Mn LIMBU SMALL LETTER ANUSVARA +1933..1938 ; Other_Alphabetic # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA +1A17..1A18 ; Other_Alphabetic # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U +1A19..1A1A ; Other_Alphabetic # Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O +1A1B ; Other_Alphabetic # Mn BUGINESE VOWEL SIGN AE +1A55 ; Other_Alphabetic # Mc TAI THAM CONSONANT SIGN MEDIAL RA +1A56 ; Other_Alphabetic # Mn TAI THAM CONSONANT SIGN MEDIAL LA +1A57 ; Other_Alphabetic # Mc TAI THAM CONSONANT SIGN LA TANG LAI +1A58..1A5E ; Other_Alphabetic # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA +1A61 ; Other_Alphabetic # Mc TAI THAM VOWEL SIGN A +1A62 ; Other_Alphabetic # Mn TAI THAM VOWEL SIGN MAI SAT +1A63..1A64 ; Other_Alphabetic # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA +1A65..1A6C ; Other_Alphabetic # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW +1A6D..1A72 ; Other_Alphabetic # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI +1A73..1A74 ; Other_Alphabetic # Mn [2] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN MAI KANG +1B00..1B03 ; Other_Alphabetic # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG +1B04 ; Other_Alphabetic # Mc BALINESE SIGN BISAH +1B35 ; Other_Alphabetic # Mc BALINESE VOWEL SIGN TEDUNG +1B36..1B3A ; Other_Alphabetic # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA +1B3B ; Other_Alphabetic # Mc BALINESE VOWEL SIGN RA REPA TEDUNG +1B3C ; Other_Alphabetic # Mn BALINESE VOWEL SIGN LA LENGA +1B3D..1B41 ; Other_Alphabetic # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG +1B42 ; Other_Alphabetic # Mn BALINESE VOWEL SIGN PEPET +1B43 ; Other_Alphabetic # Mc BALINESE VOWEL SIGN PEPET TEDUNG +1B80..1B81 ; Other_Alphabetic # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR +1B82 ; Other_Alphabetic # Mc SUNDANESE SIGN PANGWISAD +1BA1 ; Other_Alphabetic # Mc SUNDANESE CONSONANT SIGN PAMINGKAL +1BA2..1BA5 ; Other_Alphabetic # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU +1BA6..1BA7 ; Other_Alphabetic # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG +1BA8..1BA9 ; Other_Alphabetic # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAC..1BAD ; Other_Alphabetic # Mn [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA +1BE7 ; Other_Alphabetic # Mc BATAK VOWEL SIGN E +1BE8..1BE9 ; Other_Alphabetic # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE +1BEA..1BEC ; Other_Alphabetic # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O +1BED ; Other_Alphabetic # Mn BATAK VOWEL SIGN KARO O +1BEE ; Other_Alphabetic # Mc BATAK VOWEL SIGN U +1BEF..1BF1 ; Other_Alphabetic # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H +1C24..1C2B ; Other_Alphabetic # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU +1C2C..1C33 ; Other_Alphabetic # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T +1C34..1C35 ; Other_Alphabetic # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG +1CF2..1CF3 ; Other_Alphabetic # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA +1DE7..1DF4 ; Other_Alphabetic # Mn [14] COMBINING LATIN SMALL LETTER ALPHA..COMBINING LATIN SMALL LETTER U WITH DIAERESIS +24B6..24E9 ; Other_Alphabetic # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z +2DE0..2DFF ; Other_Alphabetic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS +A674..A67B ; Other_Alphabetic # Mn [8] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC LETTER OMEGA +A69E..A69F ; Other_Alphabetic # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E +A823..A824 ; Other_Alphabetic # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I +A825..A826 ; Other_Alphabetic # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E +A827 ; Other_Alphabetic # Mc SYLOTI NAGRI VOWEL SIGN OO +A880..A881 ; Other_Alphabetic # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA +A8B4..A8C3 ; Other_Alphabetic # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU +A926..A92A ; Other_Alphabetic # Mn [5] KAYAH LI VOWEL UE..KAYAH LI VOWEL O +A947..A951 ; Other_Alphabetic # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R +A952 ; Other_Alphabetic # Mc REJANG CONSONANT SIGN H +A980..A982 ; Other_Alphabetic # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR +A983 ; Other_Alphabetic # Mc JAVANESE SIGN WIGNYAN +A9B4..A9B5 ; Other_Alphabetic # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG +A9B6..A9B9 ; Other_Alphabetic # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT +A9BA..A9BB ; Other_Alphabetic # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE +A9BC ; Other_Alphabetic # Mn JAVANESE VOWEL SIGN PEPET +A9BD..A9BF ; Other_Alphabetic # Mc [3] JAVANESE CONSONANT SIGN KERET..JAVANESE CONSONANT SIGN CAKRA +AA29..AA2E ; Other_Alphabetic # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE +AA2F..AA30 ; Other_Alphabetic # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI +AA31..AA32 ; Other_Alphabetic # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE +AA33..AA34 ; Other_Alphabetic # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA +AA35..AA36 ; Other_Alphabetic # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA +AA43 ; Other_Alphabetic # Mn CHAM CONSONANT SIGN FINAL NG +AA4C ; Other_Alphabetic # Mn CHAM CONSONANT SIGN FINAL M +AA4D ; Other_Alphabetic # Mc CHAM CONSONANT SIGN FINAL H +AAB0 ; Other_Alphabetic # Mn TAI VIET MAI KANG +AAB2..AAB4 ; Other_Alphabetic # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U +AAB7..AAB8 ; Other_Alphabetic # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA +AABE ; Other_Alphabetic # Mn TAI VIET VOWEL AM +AAEB ; Other_Alphabetic # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; Other_Alphabetic # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF5 ; Other_Alphabetic # Mc MEETEI MAYEK VOWEL SIGN VISARGA +ABE3..ABE4 ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP +ABE5 ; Other_Alphabetic # Mn MEETEI MAYEK VOWEL SIGN ANAP +ABE6..ABE7 ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP +ABE8 ; Other_Alphabetic # Mn MEETEI MAYEK VOWEL SIGN UNAP +ABE9..ABEA ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG +FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA +10376..1037A ; Other_Alphabetic # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII +10A01..10A03 ; Other_Alphabetic # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R +10A05..10A06 ; Other_Alphabetic # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O +10A0C..10A0F ; Other_Alphabetic # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA +11000 ; Other_Alphabetic # Mc BRAHMI SIGN CANDRABINDU +11001 ; Other_Alphabetic # Mn BRAHMI SIGN ANUSVARA +11002 ; Other_Alphabetic # Mc BRAHMI SIGN VISARGA +11038..11045 ; Other_Alphabetic # Mn [14] BRAHMI VOWEL SIGN AA..BRAHMI VOWEL SIGN AU +11082 ; Other_Alphabetic # Mc KAITHI SIGN VISARGA +110B0..110B2 ; Other_Alphabetic # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II +110B3..110B6 ; Other_Alphabetic # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI +110B7..110B8 ; Other_Alphabetic # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +11100..11102 ; Other_Alphabetic # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; Other_Alphabetic # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; Other_Alphabetic # Mc CHAKMA VOWEL SIGN E +1112D..11132 ; Other_Alphabetic # Mn [6] CHAKMA VOWEL SIGN AI..CHAKMA AU MARK +11180..11181 ; Other_Alphabetic # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; Other_Alphabetic # Mc SHARADA SIGN VISARGA +111B3..111B5 ; Other_Alphabetic # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; Other_Alphabetic # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF ; Other_Alphabetic # Mc SHARADA VOWEL SIGN AU +1122C..1122E ; Other_Alphabetic # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II +1122F..11231 ; Other_Alphabetic # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI +11232..11233 ; Other_Alphabetic # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU +11234 ; Other_Alphabetic # Mn KHOJKI SIGN ANUSVARA +11237 ; Other_Alphabetic # Mn KHOJKI SIGN SHADDA +112DF ; Other_Alphabetic # Mn KHUDAWADI SIGN ANUSVARA +112E0..112E2 ; Other_Alphabetic # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II +112E3..112E8 ; Other_Alphabetic # Mn [6] KHUDAWADI VOWEL SIGN U..KHUDAWADI VOWEL SIGN AU +11300..11301 ; Other_Alphabetic # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU +11302..11303 ; Other_Alphabetic # Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA +1133E..1133F ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN AA..GRANTHA VOWEL SIGN I +11340 ; Other_Alphabetic # Mn GRANTHA VOWEL SIGN II +11341..11344 ; Other_Alphabetic # Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR +11347..11348 ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI +1134B..1134C ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN OO..GRANTHA VOWEL SIGN AU +11357 ; Other_Alphabetic # Mc GRANTHA AU LENGTH MARK +11362..11363 ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +114B0..114B2 ; Other_Alphabetic # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II +114B3..114B8 ; Other_Alphabetic # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL +114B9 ; Other_Alphabetic # Mc TIRHUTA VOWEL SIGN E +114BA ; Other_Alphabetic # Mn TIRHUTA VOWEL SIGN SHORT E +114BB..114BE ; Other_Alphabetic # Mc [4] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN AU +114BF..114C0 ; Other_Alphabetic # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA +114C1 ; Other_Alphabetic # Mc TIRHUTA SIGN VISARGA +115AF..115B1 ; Other_Alphabetic # Mc [3] SIDDHAM VOWEL SIGN AA..SIDDHAM VOWEL SIGN II +115B2..115B5 ; Other_Alphabetic # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR +115B8..115BB ; Other_Alphabetic # Mc [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU +115BC..115BD ; Other_Alphabetic # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA +115BE ; Other_Alphabetic # Mc SIDDHAM SIGN VISARGA +115DC..115DD ; Other_Alphabetic # Mn [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU +11630..11632 ; Other_Alphabetic # Mc [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II +11633..1163A ; Other_Alphabetic # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI +1163B..1163C ; Other_Alphabetic # Mc [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU +1163D ; Other_Alphabetic # Mn MODI SIGN ANUSVARA +1163E ; Other_Alphabetic # Mc MODI SIGN VISARGA +11640 ; Other_Alphabetic # Mn MODI SIGN ARDHACANDRA +116AB ; Other_Alphabetic # Mn TAKRI SIGN ANUSVARA +116AC ; Other_Alphabetic # Mc TAKRI SIGN VISARGA +116AD ; Other_Alphabetic # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; Other_Alphabetic # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; Other_Alphabetic # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +1171D..1171F ; Other_Alphabetic # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +11720..11721 ; Other_Alphabetic # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA +11722..11725 ; Other_Alphabetic # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU +11726 ; Other_Alphabetic # Mc AHOM VOWEL SIGN E +11727..1172A ; Other_Alphabetic # Mn [4] AHOM VOWEL SIGN AW..AHOM VOWEL SIGN AM +16B30..16B36 ; Other_Alphabetic # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16F51..16F7E ; Other_Alphabetic # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG +1BC9E ; Other_Alphabetic # Mn DUPLOYAN DOUBLE MARK +1F130..1F149 ; Other_Alphabetic # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z +1F150..1F169 ; Other_Alphabetic # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F170..1F189 ; Other_Alphabetic # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z + +# Total code points: 1116 + +# ================================================ + +3006 ; Ideographic # Lo IDEOGRAPHIC CLOSING MARK +3007 ; Ideographic # Nl IDEOGRAPHIC NUMBER ZERO +3021..3029 ; Ideographic # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE +3038..303A ; Ideographic # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY +3400..4DB5 ; Ideographic # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 +4E00..9FD5 ; Ideographic # Lo [20950] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FD5 +F900..FA6D ; Ideographic # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 +20000..2A6D6 ; Ideographic # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 +2A700..2B734 ; Ideographic # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 +2B740..2B81D ; Ideographic # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D +2B820..2CEA1 ; Ideographic # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 +2F800..2FA1D ; Ideographic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D + +# Total code points: 81404 + +# ================================================ + +005E ; Diacritic # Sk CIRCUMFLEX ACCENT +0060 ; Diacritic # Sk GRAVE ACCENT +00A8 ; Diacritic # Sk DIAERESIS +00AF ; Diacritic # Sk MACRON +00B4 ; Diacritic # Sk ACUTE ACCENT +00B7 ; Diacritic # Po MIDDLE DOT +00B8 ; Diacritic # Sk CEDILLA +02B0..02C1 ; Diacritic # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP +02C2..02C5 ; Diacritic # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD +02C6..02D1 ; Diacritic # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON +02D2..02DF ; Diacritic # Sk [14] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT +02E0..02E4 ; Diacritic # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +02E5..02EB ; Diacritic # Sk [7] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK +02EC ; Diacritic # Lm MODIFIER LETTER VOICING +02ED ; Diacritic # Sk MODIFIER LETTER UNASPIRATED +02EE ; Diacritic # Lm MODIFIER LETTER DOUBLE APOSTROPHE +02EF..02FF ; Diacritic # Sk [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW +0300..034E ; Diacritic # Mn [79] COMBINING GRAVE ACCENT..COMBINING UPWARDS ARROW BELOW +0350..0357 ; Diacritic # Mn [8] COMBINING RIGHT ARROWHEAD ABOVE..COMBINING RIGHT HALF RING ABOVE +035D..0362 ; Diacritic # Mn [6] COMBINING DOUBLE BREVE..COMBINING DOUBLE RIGHTWARDS ARROW BELOW +0374 ; Diacritic # Lm GREEK NUMERAL SIGN +0375 ; Diacritic # Sk GREEK LOWER NUMERAL SIGN +037A ; Diacritic # Lm GREEK YPOGEGRAMMENI +0384..0385 ; Diacritic # Sk [2] GREEK TONOS..GREEK DIALYTIKA TONOS +0483..0487 ; Diacritic # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE +0559 ; Diacritic # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING +0591..05A1 ; Diacritic # Mn [17] HEBREW ACCENT ETNAHTA..HEBREW ACCENT PAZER +05A3..05BD ; Diacritic # Mn [27] HEBREW ACCENT MUNAH..HEBREW POINT METEG +05BF ; Diacritic # Mn HEBREW POINT RAFE +05C1..05C2 ; Diacritic # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C4 ; Diacritic # Mn HEBREW MARK UPPER DOT +064B..0652 ; Diacritic # Mn [8] ARABIC FATHATAN..ARABIC SUKUN +0657..0658 ; Diacritic # Mn [2] ARABIC INVERTED DAMMA..ARABIC MARK NOON GHUNNA +06DF..06E0 ; Diacritic # Mn [2] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH UPRIGHT RECTANGULAR ZERO +06E5..06E6 ; Diacritic # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH +06EA..06EC ; Diacritic # Mn [3] ARABIC EMPTY CENTRE LOW STOP..ARABIC ROUNDED HIGH STOP WITH FILLED CENTRE +0730..074A ; Diacritic # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH +07A6..07B0 ; Diacritic # Mn [11] THAANA ABAFILI..THAANA SUKUN +07EB..07F3 ; Diacritic # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE +07F4..07F5 ; Diacritic # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE +0818..0819 ; Diacritic # Mn [2] SAMARITAN MARK OCCLUSION..SAMARITAN MARK DAGESH +08E3..08FE ; Diacritic # Mn [28] ARABIC TURNED DAMMA BELOW..ARABIC DAMMA WITH DOT +093C ; Diacritic # Mn DEVANAGARI SIGN NUKTA +094D ; Diacritic # Mn DEVANAGARI SIGN VIRAMA +0951..0954 ; Diacritic # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT +0971 ; Diacritic # Lm DEVANAGARI SIGN HIGH SPACING DOT +09BC ; Diacritic # Mn BENGALI SIGN NUKTA +09CD ; Diacritic # Mn BENGALI SIGN VIRAMA +0A3C ; Diacritic # Mn GURMUKHI SIGN NUKTA +0A4D ; Diacritic # Mn GURMUKHI SIGN VIRAMA +0ABC ; Diacritic # Mn GUJARATI SIGN NUKTA +0ACD ; Diacritic # Mn GUJARATI SIGN VIRAMA +0B3C ; Diacritic # Mn ORIYA SIGN NUKTA +0B4D ; Diacritic # Mn ORIYA SIGN VIRAMA +0BCD ; Diacritic # Mn TAMIL SIGN VIRAMA +0C4D ; Diacritic # Mn TELUGU SIGN VIRAMA +0CBC ; Diacritic # Mn KANNADA SIGN NUKTA +0CCD ; Diacritic # Mn KANNADA SIGN VIRAMA +0D4D ; Diacritic # Mn MALAYALAM SIGN VIRAMA +0DCA ; Diacritic # Mn SINHALA SIGN AL-LAKUNA +0E47..0E4C ; Diacritic # Mn [6] THAI CHARACTER MAITAIKHU..THAI CHARACTER THANTHAKHAT +0E4E ; Diacritic # Mn THAI CHARACTER YAMAKKAN +0EC8..0ECC ; Diacritic # Mn [5] LAO TONE MAI EK..LAO CANCELLATION MARK +0F18..0F19 ; Diacritic # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS +0F35 ; Diacritic # Mn TIBETAN MARK NGAS BZUNG NYI ZLA +0F37 ; Diacritic # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS +0F39 ; Diacritic # Mn TIBETAN MARK TSA -PHRU +0F3E..0F3F ; Diacritic # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES +0F82..0F84 ; Diacritic # Mn [3] TIBETAN SIGN NYI ZLA NAA DA..TIBETAN MARK HALANTA +0F86..0F87 ; Diacritic # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS +0FC6 ; Diacritic # Mn TIBETAN SYMBOL PADMA GDAN +1037 ; Diacritic # Mn MYANMAR SIGN DOT BELOW +1039..103A ; Diacritic # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT +1087..108C ; Diacritic # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3 +108D ; Diacritic # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE +108F ; Diacritic # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5 +109A..109B ; Diacritic # Mc [2] MYANMAR SIGN KHAMTI TONE-1..MYANMAR SIGN KHAMTI TONE-3 +17C9..17D3 ; Diacritic # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT +17DD ; Diacritic # Mn KHMER SIGN ATTHACAN +1939..193B ; Diacritic # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I +1A75..1A7C ; Diacritic # Mn [8] TAI THAM SIGN TONE-1..TAI THAM SIGN KHUEN-LUE KARAN +1A7F ; Diacritic # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT +1AB0..1ABD ; Diacritic # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW +1B34 ; Diacritic # Mn BALINESE SIGN REREKAN +1B44 ; Diacritic # Mc BALINESE ADEG ADEG +1B6B..1B73 ; Diacritic # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG +1BAA ; Diacritic # Mc SUNDANESE SIGN PAMAAEH +1BAB ; Diacritic # Mn SUNDANESE SIGN VIRAMA +1C36..1C37 ; Diacritic # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA +1C78..1C7D ; Diacritic # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD +1CD0..1CD2 ; Diacritic # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA +1CD3 ; Diacritic # Po VEDIC SIGN NIHSHVASA +1CD4..1CE0 ; Diacritic # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA +1CE1 ; Diacritic # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA +1CE2..1CE8 ; Diacritic # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL +1CED ; Diacritic # Mn VEDIC SIGN TIRYAK +1CF4 ; Diacritic # Mn VEDIC TONE CANDRA ABOVE +1CF8..1CF9 ; Diacritic # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE +1D2C..1D6A ; Diacritic # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1DC4..1DCF ; Diacritic # Mn [12] COMBINING MACRON-ACUTE..COMBINING ZIGZAG BELOW +1DF5 ; Diacritic # Mn COMBINING UP TACK ABOVE +1DFD..1DFF ; Diacritic # Mn [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +1FBD ; Diacritic # Sk GREEK KORONIS +1FBF..1FC1 ; Diacritic # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI +1FCD..1FCF ; Diacritic # Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI +1FDD..1FDF ; Diacritic # Sk [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI +1FED..1FEF ; Diacritic # Sk [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA +1FFD..1FFE ; Diacritic # Sk [2] GREEK OXIA..GREEK DASIA +2CEF..2CF1 ; Diacritic # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2E2F ; Diacritic # Lm VERTICAL TILDE +302A..302D ; Diacritic # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; Diacritic # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK +3099..309A ; Diacritic # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +309B..309C ; Diacritic # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +30FC ; Diacritic # Lm KATAKANA-HIRAGANA PROLONGED SOUND MARK +A66F ; Diacritic # Mn COMBINING CYRILLIC VZMET +A67C..A67D ; Diacritic # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A67F ; Diacritic # Lm CYRILLIC PAYEROK +A69C..A69D ; Diacritic # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A6F0..A6F1 ; Diacritic # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS +A717..A71F ; Diacritic # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK +A720..A721 ; Diacritic # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE +A788 ; Diacritic # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT +A7F8..A7F9 ; Diacritic # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +A8C4 ; Diacritic # Mn SAURASHTRA SIGN VIRAMA +A8E0..A8F1 ; Diacritic # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA +A92B..A92D ; Diacritic # Mn [3] KAYAH LI TONE PLOPHU..KAYAH LI TONE CALYA PLOPHU +A92E ; Diacritic # Po KAYAH LI SIGN CWI +A953 ; Diacritic # Mc REJANG VIRAMA +A9B3 ; Diacritic # Mn JAVANESE SIGN CECAK TELU +A9C0 ; Diacritic # Mc JAVANESE PANGKON +A9E5 ; Diacritic # Mn MYANMAR SIGN SHAN SAW +AA7B ; Diacritic # Mc MYANMAR SIGN PAO KAREN TONE +AA7C ; Diacritic # Mn MYANMAR SIGN TAI LAING TONE-2 +AA7D ; Diacritic # Mc MYANMAR SIGN TAI LAING TONE-5 +AABF ; Diacritic # Mn TAI VIET TONE MAI EK +AAC0 ; Diacritic # Lo TAI VIET TONE MAI NUENG +AAC1 ; Diacritic # Mn TAI VIET TONE MAI THO +AAC2 ; Diacritic # Lo TAI VIET TONE MAI SONG +AAF6 ; Diacritic # Mn MEETEI MAYEK VIRAMA +AB5B ; Diacritic # Sk MODIFIER BREVE WITH INVERTED BREVE +AB5C..AB5F ; Diacritic # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +ABEC ; Diacritic # Mc MEETEI MAYEK LUM IYEK +ABED ; Diacritic # Mn MEETEI MAYEK APUN IYEK +FB1E ; Diacritic # Mn HEBREW POINT JUDEO-SPANISH VARIKA +FE20..FE2F ; Diacritic # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF +FF3E ; Diacritic # Sk FULLWIDTH CIRCUMFLEX ACCENT +FF40 ; Diacritic # Sk FULLWIDTH GRAVE ACCENT +FF70 ; Diacritic # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF9E..FF9F ; Diacritic # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +FFE3 ; Diacritic # Sk FULLWIDTH MACRON +102E0 ; Diacritic # Mn COPTIC EPACT THOUSANDS MARK +10AE5..10AE6 ; Diacritic # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW +110B9..110BA ; Diacritic # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +11133..11134 ; Diacritic # Mn [2] CHAKMA VIRAMA..CHAKMA MAAYYAA +11173 ; Diacritic # Mn MAHAJANI SIGN NUKTA +111C0 ; Diacritic # Mc SHARADA SIGN VIRAMA +111CA..111CC ; Diacritic # Mn [3] SHARADA SIGN NUKTA..SHARADA EXTRA SHORT VOWEL MARK +11235 ; Diacritic # Mc KHOJKI SIGN VIRAMA +11236 ; Diacritic # Mn KHOJKI SIGN NUKTA +112E9..112EA ; Diacritic # Mn [2] KHUDAWADI SIGN NUKTA..KHUDAWADI SIGN VIRAMA +1133C ; Diacritic # Mn GRANTHA SIGN NUKTA +1134D ; Diacritic # Mc GRANTHA SIGN VIRAMA +11366..1136C ; Diacritic # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX +11370..11374 ; Diacritic # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +114C2..114C3 ; Diacritic # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA +115BF..115C0 ; Diacritic # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA +1163F ; Diacritic # Mn MODI SIGN VIRAMA +116B6 ; Diacritic # Mc TAKRI SIGN VIRAMA +116B7 ; Diacritic # Mn TAKRI SIGN NUKTA +1172B ; Diacritic # Mn AHOM SIGN KILLER +16AF0..16AF4 ; Diacritic # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE +16F8F..16F92 ; Diacritic # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16F93..16F9F ; Diacritic # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 +1D167..1D169 ; Diacritic # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 +1D16D..1D172 ; Diacritic # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 +1D17B..1D182 ; Diacritic # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE +1D185..1D18B ; Diacritic # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE +1D1AA..1D1AD ; Diacritic # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO +1E8D0..1E8D6 ; Diacritic # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS + +# Total code points: 773 + +# ================================================ + +00B7 ; Extender # Po MIDDLE DOT +02D0..02D1 ; Extender # Lm [2] MODIFIER LETTER TRIANGULAR COLON..MODIFIER LETTER HALF TRIANGULAR COLON +0640 ; Extender # Lm ARABIC TATWEEL +07FA ; Extender # Lm NKO LAJANYALAN +0E46 ; Extender # Lm THAI CHARACTER MAIYAMOK +0EC6 ; Extender # Lm LAO KO LA +180A ; Extender # Po MONGOLIAN NIRUGU +1843 ; Extender # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +1AA7 ; Extender # Lm TAI THAM SIGN MAI YAMOK +1C36 ; Extender # Mn LEPCHA SIGN RAN +1C7B ; Extender # Lm OL CHIKI RELAA +3005 ; Extender # Lm IDEOGRAPHIC ITERATION MARK +3031..3035 ; Extender # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF +309D..309E ; Extender # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK +30FC..30FE ; Extender # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK +A015 ; Extender # Lm YI SYLLABLE WU +A60C ; Extender # Lm VAI SYLLABLE LENGTHENER +A9CF ; Extender # Lm JAVANESE PANGRANGKEP +A9E6 ; Extender # Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION +AA70 ; Extender # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION +AADD ; Extender # Lm TAI VIET SYMBOL SAM +AAF3..AAF4 ; Extender # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +1135D ; Extender # Lo GRANTHA SIGN PLUTA +115C6..115C8 ; Extender # Po [3] SIDDHAM REPETITION MARK-1..SIDDHAM REPETITION MARK-3 +16B42..16B43 ; Extender # Lm [2] PAHAWH HMONG SIGN VOS NRUA..PAHAWH HMONG SIGN IB YAM + +# Total code points: 38 + +# ================================================ + +00AA ; Other_Lowercase # Lo FEMININE ORDINAL INDICATOR +00BA ; Other_Lowercase # Lo MASCULINE ORDINAL INDICATOR +02B0..02B8 ; Other_Lowercase # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y +02C0..02C1 ; Other_Lowercase # Lm [2] MODIFIER LETTER GLOTTAL STOP..MODIFIER LETTER REVERSED GLOTTAL STOP +02E0..02E4 ; Other_Lowercase # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +0345 ; Other_Lowercase # Mn COMBINING GREEK YPOGEGRAMMENI +037A ; Other_Lowercase # Lm GREEK YPOGEGRAMMENI +1D2C..1D6A ; Other_Lowercase # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D78 ; Other_Lowercase # Lm MODIFIER LETTER CYRILLIC EN +1D9B..1DBF ; Other_Lowercase # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA +2071 ; Other_Lowercase # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; Other_Lowercase # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; Other_Lowercase # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2170..217F ; Other_Lowercase # Nl [16] SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL ONE THOUSAND +24D0..24E9 ; Other_Lowercase # So [26] CIRCLED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z +2C7C..2C7D ; Other_Lowercase # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V +A69C..A69D ; Other_Lowercase # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A770 ; Other_Lowercase # Lm MODIFIER LETTER US +A7F8..A7F9 ; Other_Lowercase # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +AB5C..AB5F ; Other_Lowercase # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK + +# Total code points: 189 + +# ================================================ + +2160..216F ; Other_Uppercase # Nl [16] ROMAN NUMERAL ONE..ROMAN NUMERAL ONE THOUSAND +24B6..24CF ; Other_Uppercase # So [26] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN CAPITAL LETTER Z +1F130..1F149 ; Other_Uppercase # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z +1F150..1F169 ; Other_Uppercase # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F170..1F189 ; Other_Uppercase # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z + +# Total code points: 120 + +# ================================================ + +FDD0..FDEF ; Noncharacter_Code_Point # Cn [32] <noncharacter-FDD0>..<noncharacter-FDEF> +FFFE..FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-FFFE>..<noncharacter-FFFF> +1FFFE..1FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-1FFFE>..<noncharacter-1FFFF> +2FFFE..2FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-2FFFE>..<noncharacter-2FFFF> +3FFFE..3FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-3FFFE>..<noncharacter-3FFFF> +4FFFE..4FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-4FFFE>..<noncharacter-4FFFF> +5FFFE..5FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-5FFFE>..<noncharacter-5FFFF> +6FFFE..6FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-6FFFE>..<noncharacter-6FFFF> +7FFFE..7FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-7FFFE>..<noncharacter-7FFFF> +8FFFE..8FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-8FFFE>..<noncharacter-8FFFF> +9FFFE..9FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-9FFFE>..<noncharacter-9FFFF> +AFFFE..AFFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-AFFFE>..<noncharacter-AFFFF> +BFFFE..BFFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-BFFFE>..<noncharacter-BFFFF> +CFFFE..CFFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-CFFFE>..<noncharacter-CFFFF> +DFFFE..DFFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-DFFFE>..<noncharacter-DFFFF> +EFFFE..EFFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-EFFFE>..<noncharacter-EFFFF> +FFFFE..FFFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-FFFFE>..<noncharacter-FFFFF> +10FFFE..10FFFF; Noncharacter_Code_Point # Cn [2] <noncharacter-10FFFE>..<noncharacter-10FFFF> + +# Total code points: 66 + +# ================================================ + +09BE ; Other_Grapheme_Extend # Mc BENGALI VOWEL SIGN AA +09D7 ; Other_Grapheme_Extend # Mc BENGALI AU LENGTH MARK +0B3E ; Other_Grapheme_Extend # Mc ORIYA VOWEL SIGN AA +0B57 ; Other_Grapheme_Extend # Mc ORIYA AU LENGTH MARK +0BBE ; Other_Grapheme_Extend # Mc TAMIL VOWEL SIGN AA +0BD7 ; Other_Grapheme_Extend # Mc TAMIL AU LENGTH MARK +0CC2 ; Other_Grapheme_Extend # Mc KANNADA VOWEL SIGN UU +0CD5..0CD6 ; Other_Grapheme_Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0D3E ; Other_Grapheme_Extend # Mc MALAYALAM VOWEL SIGN AA +0D57 ; Other_Grapheme_Extend # Mc MALAYALAM AU LENGTH MARK +0DCF ; Other_Grapheme_Extend # Mc SINHALA VOWEL SIGN AELA-PILLA +0DDF ; Other_Grapheme_Extend # Mc SINHALA VOWEL SIGN GAYANUKITTA +200C..200D ; Other_Grapheme_Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER +302E..302F ; Other_Grapheme_Extend # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK +FF9E..FF9F ; Other_Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +1133E ; Other_Grapheme_Extend # Mc GRANTHA VOWEL SIGN AA +11357 ; Other_Grapheme_Extend # Mc GRANTHA AU LENGTH MARK +114B0 ; Other_Grapheme_Extend # Mc TIRHUTA VOWEL SIGN AA +114BD ; Other_Grapheme_Extend # Mc TIRHUTA VOWEL SIGN SHORT O +115AF ; Other_Grapheme_Extend # Mc SIDDHAM VOWEL SIGN AA +1D165 ; Other_Grapheme_Extend # Mc MUSICAL SYMBOL COMBINING STEM +1D16E..1D172 ; Other_Grapheme_Extend # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5 + +# Total code points: 30 + +# ================================================ + +2FF0..2FF1 ; IDS_Binary_Operator # So [2] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO BELOW +2FF4..2FFB ; IDS_Binary_Operator # So [8] IDEOGRAPHIC DESCRIPTION CHARACTER FULL SURROUND..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID + +# Total code points: 10 + +# ================================================ + +2FF2..2FF3 ; IDS_Trinary_Operator # So [2] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO MIDDLE AND RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO MIDDLE AND BELOW + +# Total code points: 2 + +# ================================================ + +2E80..2E99 ; Radical # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP +2E9B..2EF3 ; Radical # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE +2F00..2FD5 ; Radical # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE + +# Total code points: 329 + +# ================================================ + +3400..4DB5 ; Unified_Ideograph # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 +4E00..9FD5 ; Unified_Ideograph # Lo [20950] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FD5 +FA0E..FA0F ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA0E..CJK COMPATIBILITY IDEOGRAPH-FA0F +FA11 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA11 +FA13..FA14 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA13..CJK COMPATIBILITY IDEOGRAPH-FA14 +FA1F ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA1F +FA21 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA21 +FA23..FA24 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA23..CJK COMPATIBILITY IDEOGRAPH-FA24 +FA27..FA29 ; Unified_Ideograph # Lo [3] CJK COMPATIBILITY IDEOGRAPH-FA27..CJK COMPATIBILITY IDEOGRAPH-FA29 +20000..2A6D6 ; Unified_Ideograph # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 +2A700..2B734 ; Unified_Ideograph # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 +2B740..2B81D ; Unified_Ideograph # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D +2B820..2CEA1 ; Unified_Ideograph # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 + +# Total code points: 80388 + +# ================================================ + +034F ; Other_Default_Ignorable_Code_Point # Mn COMBINING GRAPHEME JOINER +115F..1160 ; Other_Default_Ignorable_Code_Point # Lo [2] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG FILLER +17B4..17B5 ; Other_Default_Ignorable_Code_Point # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +2065 ; Other_Default_Ignorable_Code_Point # Cn <reserved-2065> +3164 ; Other_Default_Ignorable_Code_Point # Lo HANGUL FILLER +FFA0 ; Other_Default_Ignorable_Code_Point # Lo HALFWIDTH HANGUL FILLER +FFF0..FFF8 ; Other_Default_Ignorable_Code_Point # Cn [9] <reserved-FFF0>..<reserved-FFF8> +E0000 ; Other_Default_Ignorable_Code_Point # Cn <reserved-E0000> +E0002..E001F ; Other_Default_Ignorable_Code_Point # Cn [30] <reserved-E0002>..<reserved-E001F> +E0080..E00FF ; Other_Default_Ignorable_Code_Point # Cn [128] <reserved-E0080>..<reserved-E00FF> +E01F0..E0FFF ; Other_Default_Ignorable_Code_Point # Cn [3600] <reserved-E01F0>..<reserved-E0FFF> + +# Total code points: 3776 + +# ================================================ + +0149 ; Deprecated # L& LATIN SMALL LETTER N PRECEDED BY APOSTROPHE +0673 ; Deprecated # Lo ARABIC LETTER ALEF WITH WAVY HAMZA BELOW +0F77 ; Deprecated # Mn TIBETAN VOWEL SIGN VOCALIC RR +0F79 ; Deprecated # Mn TIBETAN VOWEL SIGN VOCALIC LL +17A3..17A4 ; Deprecated # Lo [2] KHMER INDEPENDENT VOWEL QAQ..KHMER INDEPENDENT VOWEL QAA +206A..206F ; Deprecated # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES +2329 ; Deprecated # Ps LEFT-POINTING ANGLE BRACKET +232A ; Deprecated # Pe RIGHT-POINTING ANGLE BRACKET +E0001 ; Deprecated # Cf LANGUAGE TAG +E007F ; Deprecated # Cf CANCEL TAG + +# Total code points: 16 + +# ================================================ + +0069..006A ; Soft_Dotted # L& [2] LATIN SMALL LETTER I..LATIN SMALL LETTER J +012F ; Soft_Dotted # L& LATIN SMALL LETTER I WITH OGONEK +0249 ; Soft_Dotted # L& LATIN SMALL LETTER J WITH STROKE +0268 ; Soft_Dotted # L& LATIN SMALL LETTER I WITH STROKE +029D ; Soft_Dotted # L& LATIN SMALL LETTER J WITH CROSSED-TAIL +02B2 ; Soft_Dotted # Lm MODIFIER LETTER SMALL J +03F3 ; Soft_Dotted # L& GREEK LETTER YOT +0456 ; Soft_Dotted # L& CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +0458 ; Soft_Dotted # L& CYRILLIC SMALL LETTER JE +1D62 ; Soft_Dotted # Lm LATIN SUBSCRIPT SMALL LETTER I +1D96 ; Soft_Dotted # L& LATIN SMALL LETTER I WITH RETROFLEX HOOK +1DA4 ; Soft_Dotted # Lm MODIFIER LETTER SMALL I WITH STROKE +1DA8 ; Soft_Dotted # Lm MODIFIER LETTER SMALL J WITH CROSSED-TAIL +1E2D ; Soft_Dotted # L& LATIN SMALL LETTER I WITH TILDE BELOW +1ECB ; Soft_Dotted # L& LATIN SMALL LETTER I WITH DOT BELOW +2071 ; Soft_Dotted # Lm SUPERSCRIPT LATIN SMALL LETTER I +2148..2149 ; Soft_Dotted # L& [2] DOUBLE-STRUCK ITALIC SMALL I..DOUBLE-STRUCK ITALIC SMALL J +2C7C ; Soft_Dotted # Lm LATIN SUBSCRIPT SMALL LETTER J +1D422..1D423 ; Soft_Dotted # L& [2] MATHEMATICAL BOLD SMALL I..MATHEMATICAL BOLD SMALL J +1D456..1D457 ; Soft_Dotted # L& [2] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL ITALIC SMALL J +1D48A..1D48B ; Soft_Dotted # L& [2] MATHEMATICAL BOLD ITALIC SMALL I..MATHEMATICAL BOLD ITALIC SMALL J +1D4BE..1D4BF ; Soft_Dotted # L& [2] MATHEMATICAL SCRIPT SMALL I..MATHEMATICAL SCRIPT SMALL J +1D4F2..1D4F3 ; Soft_Dotted # L& [2] MATHEMATICAL BOLD SCRIPT SMALL I..MATHEMATICAL BOLD SCRIPT SMALL J +1D526..1D527 ; Soft_Dotted # L& [2] MATHEMATICAL FRAKTUR SMALL I..MATHEMATICAL FRAKTUR SMALL J +1D55A..1D55B ; Soft_Dotted # L& [2] MATHEMATICAL DOUBLE-STRUCK SMALL I..MATHEMATICAL DOUBLE-STRUCK SMALL J +1D58E..1D58F ; Soft_Dotted # L& [2] MATHEMATICAL BOLD FRAKTUR SMALL I..MATHEMATICAL BOLD FRAKTUR SMALL J +1D5C2..1D5C3 ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF SMALL I..MATHEMATICAL SANS-SERIF SMALL J +1D5F6..1D5F7 ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF BOLD SMALL I..MATHEMATICAL SANS-SERIF BOLD SMALL J +1D62A..1D62B ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF ITALIC SMALL I..MATHEMATICAL SANS-SERIF ITALIC SMALL J +1D65E..1D65F ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL I..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL J +1D692..1D693 ; Soft_Dotted # L& [2] MATHEMATICAL MONOSPACE SMALL I..MATHEMATICAL MONOSPACE SMALL J + +# Total code points: 46 + +# ================================================ + +0E40..0E44 ; Logical_Order_Exception # Lo [5] THAI CHARACTER SARA E..THAI CHARACTER SARA AI MAIMALAI +0EC0..0EC4 ; Logical_Order_Exception # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI +19B5..19B7 ; Logical_Order_Exception # Lo [3] NEW TAI LUE VOWEL SIGN E..NEW TAI LUE VOWEL SIGN O +19BA ; Logical_Order_Exception # Lo NEW TAI LUE VOWEL SIGN AY +AAB5..AAB6 ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O +AAB9 ; Logical_Order_Exception # Lo TAI VIET VOWEL UEA +AABB..AABC ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL AUE..TAI VIET VOWEL AY + +# Total code points: 19 + +# ================================================ + +2118 ; Other_ID_Start # Sm SCRIPT CAPITAL P +212E ; Other_ID_Start # So ESTIMATED SYMBOL +309B..309C ; Other_ID_Start # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK + +# Total code points: 4 + +# ================================================ + +00B7 ; Other_ID_Continue # Po MIDDLE DOT +0387 ; Other_ID_Continue # Po GREEK ANO TELEIA +1369..1371 ; Other_ID_Continue # No [9] ETHIOPIC DIGIT ONE..ETHIOPIC DIGIT NINE +19DA ; Other_ID_Continue # No NEW TAI LUE THAM DIGIT ONE + +# Total code points: 12 + +# ================================================ + +0021 ; STerm # Po EXCLAMATION MARK +002E ; STerm # Po FULL STOP +003F ; STerm # Po QUESTION MARK +0589 ; STerm # Po ARMENIAN FULL STOP +061F ; STerm # Po ARABIC QUESTION MARK +06D4 ; STerm # Po ARABIC FULL STOP +0700..0702 ; STerm # Po [3] SYRIAC END OF PARAGRAPH..SYRIAC SUBLINEAR FULL STOP +07F9 ; STerm # Po NKO EXCLAMATION MARK +0964..0965 ; STerm # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA +104A..104B ; STerm # Po [2] MYANMAR SIGN LITTLE SECTION..MYANMAR SIGN SECTION +1362 ; STerm # Po ETHIOPIC FULL STOP +1367..1368 ; STerm # Po [2] ETHIOPIC QUESTION MARK..ETHIOPIC PARAGRAPH SEPARATOR +166E ; STerm # Po CANADIAN SYLLABICS FULL STOP +1735..1736 ; STerm # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION +1803 ; STerm # Po MONGOLIAN FULL STOP +1809 ; STerm # Po MONGOLIAN MANCHU FULL STOP +1944..1945 ; STerm # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK +1AA8..1AAB ; STerm # Po [4] TAI THAM SIGN KAAN..TAI THAM SIGN SATKAANKUU +1B5A..1B5B ; STerm # Po [2] BALINESE PANTI..BALINESE PAMADA +1B5E..1B5F ; STerm # Po [2] BALINESE CARIK SIKI..BALINESE CARIK PAREREN +1C3B..1C3C ; STerm # Po [2] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION NYET THYOOM TA-ROL +1C7E..1C7F ; STerm # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD +203C..203D ; STerm # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG +2047..2049 ; STerm # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK +2E2E ; STerm # Po REVERSED QUESTION MARK +2E3C ; STerm # Po STENOGRAPHIC FULL STOP +3002 ; STerm # Po IDEOGRAPHIC FULL STOP +A4FF ; STerm # Po LISU PUNCTUATION FULL STOP +A60E..A60F ; STerm # Po [2] VAI FULL STOP..VAI QUESTION MARK +A6F3 ; STerm # Po BAMUM FULL STOP +A6F7 ; STerm # Po BAMUM QUESTION MARK +A876..A877 ; STerm # Po [2] PHAGS-PA MARK SHAD..PHAGS-PA MARK DOUBLE SHAD +A8CE..A8CF ; STerm # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA +A92F ; STerm # Po KAYAH LI SIGN SHYA +A9C8..A9C9 ; STerm # Po [2] JAVANESE PADA LINGSA..JAVANESE PADA LUNGSI +AA5D..AA5F ; STerm # Po [3] CHAM PUNCTUATION DANDA..CHAM PUNCTUATION TRIPLE DANDA +AAF0..AAF1 ; STerm # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM +ABEB ; STerm # Po MEETEI MAYEK CHEIKHEI +FE52 ; STerm # Po SMALL FULL STOP +FE56..FE57 ; STerm # Po [2] SMALL QUESTION MARK..SMALL EXCLAMATION MARK +FF01 ; STerm # Po FULLWIDTH EXCLAMATION MARK +FF0E ; STerm # Po FULLWIDTH FULL STOP +FF1F ; STerm # Po FULLWIDTH QUESTION MARK +FF61 ; STerm # Po HALFWIDTH IDEOGRAPHIC FULL STOP +10A56..10A57 ; STerm # Po [2] KHAROSHTHI PUNCTUATION DANDA..KHAROSHTHI PUNCTUATION DOUBLE DANDA +11047..11048 ; STerm # Po [2] BRAHMI DANDA..BRAHMI DOUBLE DANDA +110BE..110C1 ; STerm # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +11141..11143 ; STerm # Po [3] CHAKMA DANDA..CHAKMA QUESTION MARK +111C5..111C6 ; STerm # Po [2] SHARADA DANDA..SHARADA DOUBLE DANDA +111CD ; STerm # Po SHARADA SUTRA MARK +111DE..111DF ; STerm # Po [2] SHARADA SECTION MARK-1..SHARADA SECTION MARK-2 +11238..11239 ; STerm # Po [2] KHOJKI DANDA..KHOJKI DOUBLE DANDA +1123B..1123C ; STerm # Po [2] KHOJKI SECTION MARK..KHOJKI DOUBLE SECTION MARK +112A9 ; STerm # Po MULTANI SECTION MARK +115C2..115C3 ; STerm # Po [2] SIDDHAM DANDA..SIDDHAM DOUBLE DANDA +115C9..115D7 ; STerm # Po [15] SIDDHAM END OF TEXT MARK..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES +11641..11642 ; STerm # Po [2] MODI DANDA..MODI DOUBLE DANDA +1173C..1173E ; STerm # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI +16A6E..16A6F ; STerm # Po [2] MRO DANDA..MRO DOUBLE DANDA +16AF5 ; STerm # Po BASSA VAH FULL STOP +16B37..16B38 ; STerm # Po [2] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS TSHAB CEEB +16B44 ; STerm # Po PAHAWH HMONG SIGN XAUS +1BC9F ; STerm # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP +1DA88 ; STerm # Po SIGNWRITING FULL STOP + +# Total code points: 120 + +# ================================================ + +180B..180D ; Variation_Selector # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +FE00..FE0F ; Variation_Selector # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 +E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 + +# Total code points: 259 + +# ================================================ + +0009..000D ; Pattern_White_Space # Cc [5] <control-0009>..<control-000D> +0020 ; Pattern_White_Space # Zs SPACE +0085 ; Pattern_White_Space # Cc <control-0085> +200E..200F ; Pattern_White_Space # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK +2028 ; Pattern_White_Space # Zl LINE SEPARATOR +2029 ; Pattern_White_Space # Zp PARAGRAPH SEPARATOR + +# Total code points: 11 + +# ================================================ + +0021..0023 ; Pattern_Syntax # Po [3] EXCLAMATION MARK..NUMBER SIGN +0024 ; Pattern_Syntax # Sc DOLLAR SIGN +0025..0027 ; Pattern_Syntax # Po [3] PERCENT SIGN..APOSTROPHE +0028 ; Pattern_Syntax # Ps LEFT PARENTHESIS +0029 ; Pattern_Syntax # Pe RIGHT PARENTHESIS +002A ; Pattern_Syntax # Po ASTERISK +002B ; Pattern_Syntax # Sm PLUS SIGN +002C ; Pattern_Syntax # Po COMMA +002D ; Pattern_Syntax # Pd HYPHEN-MINUS +002E..002F ; Pattern_Syntax # Po [2] FULL STOP..SOLIDUS +003A..003B ; Pattern_Syntax # Po [2] COLON..SEMICOLON +003C..003E ; Pattern_Syntax # Sm [3] LESS-THAN SIGN..GREATER-THAN SIGN +003F..0040 ; Pattern_Syntax # Po [2] QUESTION MARK..COMMERCIAL AT +005B ; Pattern_Syntax # Ps LEFT SQUARE BRACKET +005C ; Pattern_Syntax # Po REVERSE SOLIDUS +005D ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET +005E ; Pattern_Syntax # Sk CIRCUMFLEX ACCENT +0060 ; Pattern_Syntax # Sk GRAVE ACCENT +007B ; Pattern_Syntax # Ps LEFT CURLY BRACKET +007C ; Pattern_Syntax # Sm VERTICAL LINE +007D ; Pattern_Syntax # Pe RIGHT CURLY BRACKET +007E ; Pattern_Syntax # Sm TILDE +00A1 ; Pattern_Syntax # Po INVERTED EXCLAMATION MARK +00A2..00A5 ; Pattern_Syntax # Sc [4] CENT SIGN..YEN SIGN +00A6 ; Pattern_Syntax # So BROKEN BAR +00A7 ; Pattern_Syntax # Po SECTION SIGN +00A9 ; Pattern_Syntax # So COPYRIGHT SIGN +00AB ; Pattern_Syntax # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +00AC ; Pattern_Syntax # Sm NOT SIGN +00AE ; Pattern_Syntax # So REGISTERED SIGN +00B0 ; Pattern_Syntax # So DEGREE SIGN +00B1 ; Pattern_Syntax # Sm PLUS-MINUS SIGN +00B6 ; Pattern_Syntax # Po PILCROW SIGN +00BB ; Pattern_Syntax # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +00BF ; Pattern_Syntax # Po INVERTED QUESTION MARK +00D7 ; Pattern_Syntax # Sm MULTIPLICATION SIGN +00F7 ; Pattern_Syntax # Sm DIVISION SIGN +2010..2015 ; Pattern_Syntax # Pd [6] HYPHEN..HORIZONTAL BAR +2016..2017 ; Pattern_Syntax # Po [2] DOUBLE VERTICAL LINE..DOUBLE LOW LINE +2018 ; Pattern_Syntax # Pi LEFT SINGLE QUOTATION MARK +2019 ; Pattern_Syntax # Pf RIGHT SINGLE QUOTATION MARK +201A ; Pattern_Syntax # Ps SINGLE LOW-9 QUOTATION MARK +201B..201C ; Pattern_Syntax # Pi [2] SINGLE HIGH-REVERSED-9 QUOTATION MARK..LEFT DOUBLE QUOTATION MARK +201D ; Pattern_Syntax # Pf RIGHT DOUBLE QUOTATION MARK +201E ; Pattern_Syntax # Ps DOUBLE LOW-9 QUOTATION MARK +201F ; Pattern_Syntax # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK +2020..2027 ; Pattern_Syntax # Po [8] DAGGER..HYPHENATION POINT +2030..2038 ; Pattern_Syntax # Po [9] PER MILLE SIGN..CARET +2039 ; Pattern_Syntax # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK +203A ; Pattern_Syntax # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +203B..203E ; Pattern_Syntax # Po [4] REFERENCE MARK..OVERLINE +2041..2043 ; Pattern_Syntax # Po [3] CARET INSERTION POINT..HYPHEN BULLET +2044 ; Pattern_Syntax # Sm FRACTION SLASH +2045 ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH QUILL +2046 ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH QUILL +2047..2051 ; Pattern_Syntax # Po [11] DOUBLE QUESTION MARK..TWO ASTERISKS ALIGNED VERTICALLY +2052 ; Pattern_Syntax # Sm COMMERCIAL MINUS SIGN +2053 ; Pattern_Syntax # Po SWUNG DASH +2055..205E ; Pattern_Syntax # Po [10] FLOWER PUNCTUATION MARK..VERTICAL FOUR DOTS +2190..2194 ; Pattern_Syntax # Sm [5] LEFTWARDS ARROW..LEFT RIGHT ARROW +2195..2199 ; Pattern_Syntax # So [5] UP DOWN ARROW..SOUTH WEST ARROW +219A..219B ; Pattern_Syntax # Sm [2] LEFTWARDS ARROW WITH STROKE..RIGHTWARDS ARROW WITH STROKE +219C..219F ; Pattern_Syntax # So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW +21A0 ; Pattern_Syntax # Sm RIGHTWARDS TWO HEADED ARROW +21A1..21A2 ; Pattern_Syntax # So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL +21A3 ; Pattern_Syntax # Sm RIGHTWARDS ARROW WITH TAIL +21A4..21A5 ; Pattern_Syntax # So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR +21A6 ; Pattern_Syntax # Sm RIGHTWARDS ARROW FROM BAR +21A7..21AD ; Pattern_Syntax # So [7] DOWNWARDS ARROW FROM BAR..LEFT RIGHT WAVE ARROW +21AE ; Pattern_Syntax # Sm LEFT RIGHT ARROW WITH STROKE +21AF..21CD ; Pattern_Syntax # So [31] DOWNWARDS ZIGZAG ARROW..LEFTWARDS DOUBLE ARROW WITH STROKE +21CE..21CF ; Pattern_Syntax # Sm [2] LEFT RIGHT DOUBLE ARROW WITH STROKE..RIGHTWARDS DOUBLE ARROW WITH STROKE +21D0..21D1 ; Pattern_Syntax # So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW +21D2 ; Pattern_Syntax # Sm RIGHTWARDS DOUBLE ARROW +21D3 ; Pattern_Syntax # So DOWNWARDS DOUBLE ARROW +21D4 ; Pattern_Syntax # Sm LEFT RIGHT DOUBLE ARROW +21D5..21F3 ; Pattern_Syntax # So [31] UP DOWN DOUBLE ARROW..UP DOWN WHITE ARROW +21F4..22FF ; Pattern_Syntax # Sm [268] RIGHT ARROW WITH SMALL CIRCLE..Z NOTATION BAG MEMBERSHIP +2300..2307 ; Pattern_Syntax # So [8] DIAMETER SIGN..WAVY LINE +2308 ; Pattern_Syntax # Ps LEFT CEILING +2309 ; Pattern_Syntax # Pe RIGHT CEILING +230A ; Pattern_Syntax # Ps LEFT FLOOR +230B ; Pattern_Syntax # Pe RIGHT FLOOR +230C..231F ; Pattern_Syntax # So [20] BOTTOM RIGHT CROP..BOTTOM RIGHT CORNER +2320..2321 ; Pattern_Syntax # Sm [2] TOP HALF INTEGRAL..BOTTOM HALF INTEGRAL +2322..2328 ; Pattern_Syntax # So [7] FROWN..KEYBOARD +2329 ; Pattern_Syntax # Ps LEFT-POINTING ANGLE BRACKET +232A ; Pattern_Syntax # Pe RIGHT-POINTING ANGLE BRACKET +232B..237B ; Pattern_Syntax # So [81] ERASE TO THE LEFT..NOT CHECK MARK +237C ; Pattern_Syntax # Sm RIGHT ANGLE WITH DOWNWARDS ZIGZAG ARROW +237D..239A ; Pattern_Syntax # So [30] SHOULDERED OPEN BOX..CLEAR SCREEN SYMBOL +239B..23B3 ; Pattern_Syntax # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM +23B4..23DB ; Pattern_Syntax # So [40] TOP SQUARE BRACKET..FUSE +23DC..23E1 ; Pattern_Syntax # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET +23E2..23FA ; Pattern_Syntax # So [25] WHITE TRAPEZIUM..BLACK CIRCLE FOR RECORD +23FB..23FF ; Pattern_Syntax # Cn [5] <reserved-23FB>..<reserved-23FF> +2400..2426 ; Pattern_Syntax # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO +2427..243F ; Pattern_Syntax # Cn [25] <reserved-2427>..<reserved-243F> +2440..244A ; Pattern_Syntax # So [11] OCR HOOK..OCR DOUBLE BACKSLASH +244B..245F ; Pattern_Syntax # Cn [21] <reserved-244B>..<reserved-245F> +2500..25B6 ; Pattern_Syntax # So [183] BOX DRAWINGS LIGHT HORIZONTAL..BLACK RIGHT-POINTING TRIANGLE +25B7 ; Pattern_Syntax # Sm WHITE RIGHT-POINTING TRIANGLE +25B8..25C0 ; Pattern_Syntax # So [9] BLACK RIGHT-POINTING SMALL TRIANGLE..BLACK LEFT-POINTING TRIANGLE +25C1 ; Pattern_Syntax # Sm WHITE LEFT-POINTING TRIANGLE +25C2..25F7 ; Pattern_Syntax # So [54] BLACK LEFT-POINTING SMALL TRIANGLE..WHITE CIRCLE WITH UPPER RIGHT QUADRANT +25F8..25FF ; Pattern_Syntax # Sm [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE +2600..266E ; Pattern_Syntax # So [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN +266F ; Pattern_Syntax # Sm MUSIC SHARP SIGN +2670..2767 ; Pattern_Syntax # So [248] WEST SYRIAC CROSS..ROTATED FLORAL HEART BULLET +2768 ; Pattern_Syntax # Ps MEDIUM LEFT PARENTHESIS ORNAMENT +2769 ; Pattern_Syntax # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT +276A ; Pattern_Syntax # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT +276B ; Pattern_Syntax # Pe MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT +276C ; Pattern_Syntax # Ps MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT +276D ; Pattern_Syntax # Pe MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT +276E ; Pattern_Syntax # Ps HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT +276F ; Pattern_Syntax # Pe HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT +2770 ; Pattern_Syntax # Ps HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT +2771 ; Pattern_Syntax # Pe HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT +2772 ; Pattern_Syntax # Ps LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT +2773 ; Pattern_Syntax # Pe LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT +2774 ; Pattern_Syntax # Ps MEDIUM LEFT CURLY BRACKET ORNAMENT +2775 ; Pattern_Syntax # Pe MEDIUM RIGHT CURLY BRACKET ORNAMENT +2794..27BF ; Pattern_Syntax # So [44] HEAVY WIDE-HEADED RIGHTWARDS ARROW..DOUBLE CURLY LOOP +27C0..27C4 ; Pattern_Syntax # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET +27C5 ; Pattern_Syntax # Ps LEFT S-SHAPED BAG DELIMITER +27C6 ; Pattern_Syntax # Pe RIGHT S-SHAPED BAG DELIMITER +27C7..27E5 ; Pattern_Syntax # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK +27E6 ; Pattern_Syntax # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET +27E7 ; Pattern_Syntax # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET +27E8 ; Pattern_Syntax # Ps MATHEMATICAL LEFT ANGLE BRACKET +27E9 ; Pattern_Syntax # Pe MATHEMATICAL RIGHT ANGLE BRACKET +27EA ; Pattern_Syntax # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET +27EB ; Pattern_Syntax # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET +27EC ; Pattern_Syntax # Ps MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET +27ED ; Pattern_Syntax # Pe MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET +27EE ; Pattern_Syntax # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS +27EF ; Pattern_Syntax # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS +27F0..27FF ; Pattern_Syntax # Sm [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW +2800..28FF ; Pattern_Syntax # So [256] BRAILLE PATTERN BLANK..BRAILLE PATTERN DOTS-12345678 +2900..2982 ; Pattern_Syntax # Sm [131] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..Z NOTATION TYPE COLON +2983 ; Pattern_Syntax # Ps LEFT WHITE CURLY BRACKET +2984 ; Pattern_Syntax # Pe RIGHT WHITE CURLY BRACKET +2985 ; Pattern_Syntax # Ps LEFT WHITE PARENTHESIS +2986 ; Pattern_Syntax # Pe RIGHT WHITE PARENTHESIS +2987 ; Pattern_Syntax # Ps Z NOTATION LEFT IMAGE BRACKET +2988 ; Pattern_Syntax # Pe Z NOTATION RIGHT IMAGE BRACKET +2989 ; Pattern_Syntax # Ps Z NOTATION LEFT BINDING BRACKET +298A ; Pattern_Syntax # Pe Z NOTATION RIGHT BINDING BRACKET +298B ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH UNDERBAR +298C ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH UNDERBAR +298D ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER +298E ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +298F ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +2990 ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER +2991 ; Pattern_Syntax # Ps LEFT ANGLE BRACKET WITH DOT +2992 ; Pattern_Syntax # Pe RIGHT ANGLE BRACKET WITH DOT +2993 ; Pattern_Syntax # Ps LEFT ARC LESS-THAN BRACKET +2994 ; Pattern_Syntax # Pe RIGHT ARC GREATER-THAN BRACKET +2995 ; Pattern_Syntax # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET +2996 ; Pattern_Syntax # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET +2997 ; Pattern_Syntax # Ps LEFT BLACK TORTOISE SHELL BRACKET +2998 ; Pattern_Syntax # Pe RIGHT BLACK TORTOISE SHELL BRACKET +2999..29D7 ; Pattern_Syntax # Sm [63] DOTTED FENCE..BLACK HOURGLASS +29D8 ; Pattern_Syntax # Ps LEFT WIGGLY FENCE +29D9 ; Pattern_Syntax # Pe RIGHT WIGGLY FENCE +29DA ; Pattern_Syntax # Ps LEFT DOUBLE WIGGLY FENCE +29DB ; Pattern_Syntax # Pe RIGHT DOUBLE WIGGLY FENCE +29DC..29FB ; Pattern_Syntax # Sm [32] INCOMPLETE INFINITY..TRIPLE PLUS +29FC ; Pattern_Syntax # Ps LEFT-POINTING CURVED ANGLE BRACKET +29FD ; Pattern_Syntax # Pe RIGHT-POINTING CURVED ANGLE BRACKET +29FE..2AFF ; Pattern_Syntax # Sm [258] TINY..N-ARY WHITE VERTICAL BAR +2B00..2B2F ; Pattern_Syntax # So [48] NORTH EAST WHITE ARROW..WHITE VERTICAL ELLIPSE +2B30..2B44 ; Pattern_Syntax # Sm [21] LEFT ARROW WITH SMALL CIRCLE..RIGHTWARDS ARROW THROUGH SUPERSET +2B45..2B46 ; Pattern_Syntax # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW +2B47..2B4C ; Pattern_Syntax # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR +2B4D..2B73 ; Pattern_Syntax # So [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR +2B74..2B75 ; Pattern_Syntax # Cn [2] <reserved-2B74>..<reserved-2B75> +2B76..2B95 ; Pattern_Syntax # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW +2B96..2B97 ; Pattern_Syntax # Cn [2] <reserved-2B96>..<reserved-2B97> +2B98..2BB9 ; Pattern_Syntax # So [34] THREE-D TOP-LIGHTED LEFTWARDS EQUILATERAL ARROWHEAD..UP ARROWHEAD IN A RECTANGLE BOX +2BBA..2BBC ; Pattern_Syntax # Cn [3] <reserved-2BBA>..<reserved-2BBC> +2BBD..2BC8 ; Pattern_Syntax # So [12] BALLOT BOX WITH LIGHT X..BLACK MEDIUM RIGHT-POINTING TRIANGLE CENTRED +2BC9 ; Pattern_Syntax # Cn <reserved-2BC9> +2BCA..2BD1 ; Pattern_Syntax # So [8] TOP HALF BLACK CIRCLE..UNCERTAINTY SIGN +2BD2..2BEB ; Pattern_Syntax # Cn [26] <reserved-2BD2>..<reserved-2BEB> +2BEC..2BEF ; Pattern_Syntax # So [4] LEFTWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS..DOWNWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS +2BF0..2BFF ; Pattern_Syntax # Cn [16] <reserved-2BF0>..<reserved-2BFF> +2E00..2E01 ; Pattern_Syntax # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER +2E02 ; Pattern_Syntax # Pi LEFT SUBSTITUTION BRACKET +2E03 ; Pattern_Syntax # Pf RIGHT SUBSTITUTION BRACKET +2E04 ; Pattern_Syntax # Pi LEFT DOTTED SUBSTITUTION BRACKET +2E05 ; Pattern_Syntax # Pf RIGHT DOTTED SUBSTITUTION BRACKET +2E06..2E08 ; Pattern_Syntax # Po [3] RAISED INTERPOLATION MARKER..DOTTED TRANSPOSITION MARKER +2E09 ; Pattern_Syntax # Pi LEFT TRANSPOSITION BRACKET +2E0A ; Pattern_Syntax # Pf RIGHT TRANSPOSITION BRACKET +2E0B ; Pattern_Syntax # Po RAISED SQUARE +2E0C ; Pattern_Syntax # Pi LEFT RAISED OMISSION BRACKET +2E0D ; Pattern_Syntax # Pf RIGHT RAISED OMISSION BRACKET +2E0E..2E16 ; Pattern_Syntax # Po [9] EDITORIAL CORONIS..DOTTED RIGHT-POINTING ANGLE +2E17 ; Pattern_Syntax # Pd DOUBLE OBLIQUE HYPHEN +2E18..2E19 ; Pattern_Syntax # Po [2] INVERTED INTERROBANG..PALM BRANCH +2E1A ; Pattern_Syntax # Pd HYPHEN WITH DIAERESIS +2E1B ; Pattern_Syntax # Po TILDE WITH RING ABOVE +2E1C ; Pattern_Syntax # Pi LEFT LOW PARAPHRASE BRACKET +2E1D ; Pattern_Syntax # Pf RIGHT LOW PARAPHRASE BRACKET +2E1E..2E1F ; Pattern_Syntax # Po [2] TILDE WITH DOT ABOVE..TILDE WITH DOT BELOW +2E20 ; Pattern_Syntax # Pi LEFT VERTICAL BAR WITH QUILL +2E21 ; Pattern_Syntax # Pf RIGHT VERTICAL BAR WITH QUILL +2E22 ; Pattern_Syntax # Ps TOP LEFT HALF BRACKET +2E23 ; Pattern_Syntax # Pe TOP RIGHT HALF BRACKET +2E24 ; Pattern_Syntax # Ps BOTTOM LEFT HALF BRACKET +2E25 ; Pattern_Syntax # Pe BOTTOM RIGHT HALF BRACKET +2E26 ; Pattern_Syntax # Ps LEFT SIDEWAYS U BRACKET +2E27 ; Pattern_Syntax # Pe RIGHT SIDEWAYS U BRACKET +2E28 ; Pattern_Syntax # Ps LEFT DOUBLE PARENTHESIS +2E29 ; Pattern_Syntax # Pe RIGHT DOUBLE PARENTHESIS +2E2A..2E2E ; Pattern_Syntax # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK +2E2F ; Pattern_Syntax # Lm VERTICAL TILDE +2E30..2E39 ; Pattern_Syntax # Po [10] RING POINT..TOP HALF SECTION SIGN +2E3A..2E3B ; Pattern_Syntax # Pd [2] TWO-EM DASH..THREE-EM DASH +2E3C..2E3F ; Pattern_Syntax # Po [4] STENOGRAPHIC FULL STOP..CAPITULUM +2E40 ; Pattern_Syntax # Pd DOUBLE HYPHEN +2E41 ; Pattern_Syntax # Po REVERSED COMMA +2E42 ; Pattern_Syntax # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK +2E43..2E7F ; Pattern_Syntax # Cn [61] <reserved-2E43>..<reserved-2E7F> +3001..3003 ; Pattern_Syntax # Po [3] IDEOGRAPHIC COMMA..DITTO MARK +3008 ; Pattern_Syntax # Ps LEFT ANGLE BRACKET +3009 ; Pattern_Syntax # Pe RIGHT ANGLE BRACKET +300A ; Pattern_Syntax # Ps LEFT DOUBLE ANGLE BRACKET +300B ; Pattern_Syntax # Pe RIGHT DOUBLE ANGLE BRACKET +300C ; Pattern_Syntax # Ps LEFT CORNER BRACKET +300D ; Pattern_Syntax # Pe RIGHT CORNER BRACKET +300E ; Pattern_Syntax # Ps LEFT WHITE CORNER BRACKET +300F ; Pattern_Syntax # Pe RIGHT WHITE CORNER BRACKET +3010 ; Pattern_Syntax # Ps LEFT BLACK LENTICULAR BRACKET +3011 ; Pattern_Syntax # Pe RIGHT BLACK LENTICULAR BRACKET +3012..3013 ; Pattern_Syntax # So [2] POSTAL MARK..GETA MARK +3014 ; Pattern_Syntax # Ps LEFT TORTOISE SHELL BRACKET +3015 ; Pattern_Syntax # Pe RIGHT TORTOISE SHELL BRACKET +3016 ; Pattern_Syntax # Ps LEFT WHITE LENTICULAR BRACKET +3017 ; Pattern_Syntax # Pe RIGHT WHITE LENTICULAR BRACKET +3018 ; Pattern_Syntax # Ps LEFT WHITE TORTOISE SHELL BRACKET +3019 ; Pattern_Syntax # Pe RIGHT WHITE TORTOISE SHELL BRACKET +301A ; Pattern_Syntax # Ps LEFT WHITE SQUARE BRACKET +301B ; Pattern_Syntax # Pe RIGHT WHITE SQUARE BRACKET +301C ; Pattern_Syntax # Pd WAVE DASH +301D ; Pattern_Syntax # Ps REVERSED DOUBLE PRIME QUOTATION MARK +301E..301F ; Pattern_Syntax # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK +3020 ; Pattern_Syntax # So POSTAL MARK FACE +3030 ; Pattern_Syntax # Pd WAVY DASH +FD3E ; Pattern_Syntax # Pe ORNATE LEFT PARENTHESIS +FD3F ; Pattern_Syntax # Ps ORNATE RIGHT PARENTHESIS +FE45..FE46 ; Pattern_Syntax # Po [2] SESAME DOT..WHITE SESAME DOT + +# Total code points: 2760 + +# EOF diff --git a/src/third_party/unicode-8.0.0/ReadMe.txt b/src/third_party/unicode-8.0.0/ReadMe.txt new file mode 100644 index 00000000000..fc4a9044b7e --- /dev/null +++ b/src/third_party/unicode-8.0.0/ReadMe.txt @@ -0,0 +1,17 @@ +# Date: 2015-06-16, 20:24:00 GMT [KW] +# +# Unicode Character Database +# Copyright (c) 1991-2015 Unicode, Inc. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# +# For documentation, see the following: +# NamesList.html +# UAX #38, "Unicode Han Database (Unihan)" +# UAX #44, "Unicode Character Database." +# +# The UAXes can be accessed at http://www.unicode.org/versions/Unicode8.0.0/ + +This directory contains the final data files +for the Unicode Character Database, for Version 8.0.0 of the Unicode +Standard. + |