diff options
-rw-r--r-- | distsrc/THIRD-PARTY-NOTICES | 44 | ||||
-rw-r--r-- | src/mongo/SConscript | 7 | ||||
-rw-r--r-- | src/mongo/db/fts/SConscript | 7 | ||||
-rw-r--r-- | src/mongo/db/fts/unicode/SConscript | 65 | ||||
-rw-r--r-- | src/mongo/db/fts/unicode/codepoints.h | 86 | ||||
-rw-r--r-- | src/mongo/db/fts/unicode/codepoints_diacritic_map.cpp | 3969 | ||||
-rw-r--r-- | src/mongo/db/fts/unicode/codepoints_test.cpp | 94 | ||||
-rw-r--r-- | src/mongo/db/fts/unicode/gen_casefold_map.py | 76 | ||||
-rw-r--r-- | src/mongo/db/fts/unicode/gen_delimiter_list.py | 80 | ||||
-rw-r--r-- | src/mongo/db/fts/unicode/gen_diacritic_list.py | 63 | ||||
-rw-r--r-- | src/mongo/db/fts/unicode/gen_diacritic_map.py | 105 | ||||
-rw-r--r-- | src/mongo/db/fts/unicode/gen_helper.py | 39 | ||||
-rw-r--r-- | src/mongo/db/fts/unicode/string.cpp | 157 | ||||
-rw-r--r-- | src/mongo/db/fts/unicode/string.h | 139 | ||||
-rw-r--r-- | src/mongo/db/fts/unicode/string_test.cpp | 187 | ||||
-rw-r--r-- | src/mongo/shell/linenoise_utf8.h | 6 | ||||
-rw-r--r-- | src/third_party/unicode-8.0.0/CaseFolding.txt | 1414 | ||||
-rw-r--r-- | src/third_party/unicode-8.0.0/PropList.txt | 1525 | ||||
-rw-r--r-- | src/third_party/unicode-8.0.0/ReadMe.txt | 17 |
19 files changed, 8076 insertions, 4 deletions
diff --git a/distsrc/THIRD-PARTY-NOTICES b/distsrc/THIRD-PARTY-NOTICES index 040be4680fd..37349340d22 100644 --- a/distsrc/THIRD-PARTY-NOTICES +++ b/distsrc/THIRD-PARTY-NOTICES @@ -501,7 +501,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -13) License Notice for SpiderMonkey +14) License Notice for SpiderMonkey ----------------------------------- |------------------------------------------------|------------------|---------------| @@ -698,7 +698,7 @@ You can contact the author at : - LZ4 source repository : http://code.google.com/p/lz4/ - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c -14) License Notice for Intel DFP Math Library +15) License Notice for Intel DFP Math Library --------------------------------------------- Copyright (c) 2011, Intel Corp. @@ -730,4 +730,44 @@ OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +16) License Notice for Unicode Data +----------------------------------- + +Copyright © 1991-2015 Unicode, Inc. All rights reserved. +Distributed under the Terms of Use in +http://www.unicode.org/copyright.html. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of the Unicode data files and any associated documentation +(the "Data Files") or Unicode software and any associated documentation +(the "Software") to deal in the Data Files or Software +without restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, and/or sell copies of +the Data Files or Software, and to permit persons to whom the Data Files +or Software are furnished to do so, provided that +(a) this copyright and permission notice appear with all copies +of the Data Files or Software, +(b) this copyright and permission notice appear in associated +documentation, and +(c) there is clear notice in each modified Data File or in the Software +as well as in the documentation associated with the Data File(s) or +Software that the data or software has been modified. + +THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT OF THIRD PARTY RIGHTS. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS +NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL +DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, +DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THE DATA FILES OR SOFTWARE. + +Except as contained in this notice, the name of a copyright holder +shall not be used in advertising or otherwise to promote the sale, +use or other dealings in these Data Files or Software without prior +written authorization of the copyright holder. + End diff --git a/src/mongo/SConscript b/src/mongo/SConscript index bd6407bbff8..f6e42077bfa 100644 --- a/src/mongo/SConscript +++ b/src/mongo/SConscript @@ -312,6 +312,11 @@ env.Install( 'util/options_parser/options_parser_init', ])) +env.Library("linenoise_utf8", + source=[ + "shell/linenoise_utf8.cpp", + ]) + # --- sniffer --- mongosniff_built = False if env.TargetOSIs('osx') or env["_HAVEPCAP"]: @@ -341,7 +346,6 @@ if not has_option('noshell') and (usev8 or usemozjs): "shell/bench.cpp", "shell/clientAndShell.cpp", "shell/linenoise.cpp", - "shell/linenoise_utf8.cpp", "shell/mk_wcwidth.cpp", "shell/mongo-server.cpp", "shell/shell_utils.cpp", @@ -355,6 +359,7 @@ if not has_option('noshell') and (usev8 or usemozjs): 'scripting/scripting', 'util/processinfo', 'util/signal_handlers', + 'linenoise_utf8', 'shell/mongojs', ]) diff --git a/src/mongo/db/fts/SConscript b/src/mongo/db/fts/SConscript index 25f3b467c57..3a769baca98 100644 --- a/src/mongo/db/fts/SConscript +++ b/src/mongo/db/fts/SConscript @@ -2,6 +2,12 @@ Import("env") +env.SConscript( + dirs=[ + 'unicode', + ], +) + stop_word_languages = [ 'danish', 'dutch', @@ -44,6 +50,7 @@ baseEnv.Library('base', [ 'tokenizer.cpp', ], LIBDEPS=["$BUILD_DIR/mongo/base", "$BUILD_DIR/mongo/db/common", + "$BUILD_DIR/mongo/db/fts/unicode/unicode", "$BUILD_DIR/mongo/platform/platform", "$BUILD_DIR/third_party/shim_stemmer" ]) diff --git a/src/mongo/db/fts/unicode/SConscript b/src/mongo/db/fts/unicode/SConscript new file mode 100644 index 00000000000..dc01c5b618c --- /dev/null +++ b/src/mongo/db/fts/unicode/SConscript @@ -0,0 +1,65 @@ +# -*- mode: python -*- + +Import("env") + +env.Command( + target="codepoints_casefold.cpp", + source=[ + "gen_casefold_map.py", + "#/src/third_party/unicode-8.0.0/CaseFolding.txt", + "gen_helper.py", + ], + action="$PYTHON ${SOURCES[0]} ${SOURCES[1]} $TARGETS") + +env.Command( + target="codepoints_delimiter_list.cpp", + source=[ + "gen_delimiter_list.py", + "#/src/third_party/unicode-8.0.0/PropList.txt", + "gen_helper.py", + ], + action="$PYTHON ${SOURCES[0]} ${SOURCES[1]} $TARGETS") + +env.Command( + target="codepoints_diacritic_list.cpp", + source=[ + "gen_diacritic_list.py", + "#/src/third_party/unicode-8.0.0/PropList.txt", + "gen_helper.py", + ], + action="$PYTHON ${SOURCES[0]} ${SOURCES[1]} $TARGETS") + +env.Library( + target='unicode', + source=[ + 'codepoints_casefold.cpp', + 'codepoints_delimiter_list.cpp', + 'codepoints_diacritic_list.cpp', + 'codepoints_diacritic_map.cpp', + 'string.cpp', + ], + LIBDEPS=[ + '$BUILD_DIR/mongo/base', + '$BUILD_DIR/mongo/linenoise_utf8', + ] +) + +env.CppUnitTest( + target='string_test', + source=[ + 'string_test.cpp' + ], + LIBDEPS=[ + 'unicode', + ] +) + +env.CppUnitTest( + target='codepoints_test', + source=[ + 'codepoints_test.cpp' + ], + LIBDEPS=[ + 'unicode', + ] +) diff --git a/src/mongo/db/fts/unicode/codepoints.h b/src/mongo/db/fts/unicode/codepoints.h new file mode 100644 index 00000000000..5b1e8e2b2b5 --- /dev/null +++ b/src/mongo/db/fts/unicode/codepoints.h @@ -0,0 +1,86 @@ +/** + * Copyright (C) 2015 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include <string> + +namespace mongo { +namespace unicode { + +/** + * There are currently two options supported for the delimiter list. The only difference between the + * English and NotEnglish modes is that in English, the apostrophe is considered a delimiter, while + * in NotEnglish, it is not. + */ +enum class DelimiterListLanguage { + kEnglish, + kNotEnglish, +}; + +/** + * There are currently two Case folding modes supported. The only difference between the Normal mode + * and the Turkish mode is that in the Turkish mode, the letter I is lowercased to ı, and the letter + * İ is lowercased to i. In the normal mode, the letter I is lowercased to i, and there is no + * mapping for İ. + */ +enum class CaseFoldMode { + kNormal, + kTurkish, +}; + +/** + * Returns whether or not the given codepoint is a diacritic. In 'D' normalized Unicode text, + * diacritics are removed by removing characters with these codepoints. + */ +bool codepointIsDiacritic(char32_t codepoint); + +/** + * Returns whether or not the given codepoint is considered a delimiter in the language 'lang'. + * Currently, there is only a difference between English and non-English languages (the apostrophe). + * To see which Unicode character categories were considered delimiters, see gen_delimiter_list.py. + */ +bool codepointIsDelimiter(char32_t codepoint, DelimiterListLanguage lang); + +/** + * Return a version of the given codepoint without any diacritics. These mappings are generated by + * taking all of the characters within a set of Unicode code blocks (see gen_diacritic_map.py to see + * which code blocks are used), decomposing them to the NFD normalization form, removing any + * combining marks, and renormalizing them to the NFC form. The result is a mapping from original + * codepoint to a codepoint with no diacritics. + */ +char32_t codepointRemoveDiacritics(char32_t codepoint); + +/** + * Return the lowercased version of the given codepoint, applying the special Turkish version of + * case folding if specified. + */ +char32_t codepointToLower(char32_t codepoint, CaseFoldMode mode = CaseFoldMode::kNormal); + +} // namespace unicode +} // namespace mongo diff --git a/src/mongo/db/fts/unicode/codepoints_diacritic_map.cpp b/src/mongo/db/fts/unicode/codepoints_diacritic_map.cpp new file mode 100644 index 00000000000..c6b39d328b8 --- /dev/null +++ b/src/mongo/db/fts/unicode/codepoints_diacritic_map.cpp @@ -0,0 +1,3969 @@ +/** + * Copyright (C) 2015 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + * + * THIS IS A GENERATED FILE, DO NOT MODIFY. + */ + +#include "mongo/db/fts/unicode/codepoints.h" + +namespace mongo { +namespace unicode { + +char32_t codepointRemoveDiacritics(char32_t codepoint) { + switch (codepoint) { + case 0xc0: + return 0x41; + case 0xc1: + return 0x41; + case 0xc2: + return 0x41; + case 0xc3: + return 0x41; + case 0xc4: + return 0x41; + case 0xc5: + return 0x41; + case 0xc7: + return 0x43; + case 0xc8: + return 0x45; + case 0xc9: + return 0x45; + case 0xca: + return 0x45; + case 0xcb: + return 0x45; + case 0xcc: + return 0x49; + case 0xcd: + return 0x49; + case 0xce: + return 0x49; + case 0xcf: + return 0x49; + case 0xd1: + return 0x4e; + case 0xd2: + return 0x4f; + case 0xd3: + return 0x4f; + case 0xd4: + return 0x4f; + case 0xd5: + return 0x4f; + case 0xd6: + return 0x4f; + case 0xd9: + return 0x55; + case 0xda: + return 0x55; + case 0xdb: + return 0x55; + case 0xdc: + return 0x55; + case 0xdd: + return 0x59; + case 0xe0: + return 0x61; + case 0xe1: + return 0x61; + case 0xe2: + return 0x61; + case 0xe3: + return 0x61; + case 0xe4: + return 0x61; + case 0xe5: + return 0x61; + case 0xe7: + return 0x63; + case 0xe8: + return 0x65; + case 0xe9: + return 0x65; + case 0xea: + return 0x65; + case 0xeb: + return 0x65; + case 0xec: + return 0x69; + case 0xed: + return 0x69; + case 0xee: + return 0x69; + case 0xef: + return 0x69; + case 0xf1: + return 0x6e; + case 0xf2: + return 0x6f; + case 0xf3: + return 0x6f; + case 0xf4: + return 0x6f; + case 0xf5: + return 0x6f; + case 0xf6: + return 0x6f; + case 0xf9: + return 0x75; + case 0xfa: + return 0x75; + case 0xfb: + return 0x75; + case 0xfc: + return 0x75; + case 0xfd: + return 0x79; + case 0xff: + return 0x79; + case 0x100: + return 0x41; + case 0x101: + return 0x61; + case 0x102: + return 0x41; + case 0x103: + return 0x61; + case 0x104: + return 0x41; + case 0x105: + return 0x61; + case 0x106: + return 0x43; + case 0x107: + return 0x63; + case 0x108: + return 0x43; + case 0x109: + return 0x63; + case 0x10a: + return 0x43; + case 0x10b: + return 0x63; + case 0x10c: + return 0x43; + case 0x10d: + return 0x63; + case 0x10e: + return 0x44; + case 0x10f: + return 0x64; + case 0x112: + return 0x45; + case 0x113: + return 0x65; + case 0x114: + return 0x45; + case 0x115: + return 0x65; + case 0x116: + return 0x45; + case 0x117: + return 0x65; + case 0x118: + return 0x45; + case 0x119: + return 0x65; + case 0x11a: + return 0x45; + case 0x11b: + return 0x65; + case 0x11c: + return 0x47; + case 0x11d: + return 0x67; + case 0x11e: + return 0x47; + case 0x11f: + return 0x67; + case 0x120: + return 0x47; + case 0x121: + return 0x67; + case 0x122: + return 0x47; + case 0x123: + return 0x67; + case 0x124: + return 0x48; + case 0x125: + return 0x68; + case 0x128: + return 0x49; + case 0x129: + return 0x69; + case 0x12a: + return 0x49; + case 0x12b: + return 0x69; + case 0x12c: + return 0x49; + case 0x12d: + return 0x69; + case 0x12e: + return 0x49; + case 0x12f: + return 0x69; + case 0x130: + return 0x49; + case 0x134: + return 0x4a; + case 0x135: + return 0x6a; + case 0x136: + return 0x4b; + case 0x137: + return 0x6b; + case 0x139: + return 0x4c; + case 0x13a: + return 0x6c; + case 0x13b: + return 0x4c; + case 0x13c: + return 0x6c; + case 0x13d: + return 0x4c; + case 0x13e: + return 0x6c; + case 0x143: + return 0x4e; + case 0x144: + return 0x6e; + case 0x145: + return 0x4e; + case 0x146: + return 0x6e; + case 0x147: + return 0x4e; + case 0x148: + return 0x6e; + case 0x14c: + return 0x4f; + case 0x14d: + return 0x6f; + case 0x14e: + return 0x4f; + case 0x14f: + return 0x6f; + case 0x150: + return 0x4f; + case 0x151: + return 0x6f; + case 0x154: + return 0x52; + case 0x155: + return 0x72; + case 0x156: + return 0x52; + case 0x157: + return 0x72; + case 0x158: + return 0x52; + case 0x159: + return 0x72; + case 0x15a: + return 0x53; + case 0x15b: + return 0x73; + case 0x15c: + return 0x53; + case 0x15d: + return 0x73; + case 0x15e: + return 0x53; + case 0x15f: + return 0x73; + case 0x160: + return 0x53; + case 0x161: + return 0x73; + case 0x162: + return 0x54; + case 0x163: + return 0x74; + case 0x164: + return 0x54; + case 0x165: + return 0x74; + case 0x168: + return 0x55; + case 0x169: + return 0x75; + case 0x16a: + return 0x55; + case 0x16b: + return 0x75; + case 0x16c: + return 0x55; + case 0x16d: + return 0x75; + case 0x16e: + return 0x55; + case 0x16f: + return 0x75; + case 0x170: + return 0x55; + case 0x171: + return 0x75; + case 0x172: + return 0x55; + case 0x173: + return 0x75; + case 0x174: + return 0x57; + case 0x175: + return 0x77; + case 0x176: + return 0x59; + case 0x177: + return 0x79; + case 0x178: + return 0x59; + case 0x179: + return 0x5a; + case 0x17a: + return 0x7a; + case 0x17b: + return 0x5a; + case 0x17c: + return 0x7a; + case 0x17d: + return 0x5a; + case 0x17e: + return 0x7a; + case 0x1a0: + return 0x4f; + case 0x1a1: + return 0x6f; + case 0x1af: + return 0x55; + case 0x1b0: + return 0x75; + case 0x1cd: + return 0x41; + case 0x1ce: + return 0x61; + case 0x1cf: + return 0x49; + case 0x1d0: + return 0x69; + case 0x1d1: + return 0x4f; + case 0x1d2: + return 0x6f; + case 0x1d3: + return 0x55; + case 0x1d4: + return 0x75; + case 0x1d5: + return 0x55; + case 0x1d6: + return 0x75; + case 0x1d7: + return 0x55; + case 0x1d8: + return 0x75; + case 0x1d9: + return 0x55; + case 0x1da: + return 0x75; + case 0x1db: + return 0x55; + case 0x1dc: + return 0x75; + case 0x1de: + return 0x41; + case 0x1df: + return 0x61; + case 0x1e0: + return 0x41; + case 0x1e1: + return 0x61; + case 0x1e2: + return 0xc6; + case 0x1e3: + return 0xe6; + case 0x1e6: + return 0x47; + case 0x1e7: + return 0x67; + case 0x1e8: + return 0x4b; + case 0x1e9: + return 0x6b; + case 0x1ea: + return 0x4f; + case 0x1eb: + return 0x6f; + case 0x1ec: + return 0x4f; + case 0x1ed: + return 0x6f; + case 0x1ee: + return 0x1b7; + case 0x1ef: + return 0x292; + case 0x1f0: + return 0x6a; + case 0x1f4: + return 0x47; + case 0x1f5: + return 0x67; + case 0x1f8: + return 0x4e; + case 0x1f9: + return 0x6e; + case 0x1fa: + return 0x41; + case 0x1fb: + return 0x61; + case 0x1fc: + return 0xc6; + case 0x1fd: + return 0xe6; + case 0x1fe: + return 0xd8; + case 0x1ff: + return 0xf8; + case 0x200: + return 0x41; + case 0x201: + return 0x61; + case 0x202: + return 0x41; + case 0x203: + return 0x61; + case 0x204: + return 0x45; + case 0x205: + return 0x65; + case 0x206: + return 0x45; + case 0x207: + return 0x65; + case 0x208: + return 0x49; + case 0x209: + return 0x69; + case 0x20a: + return 0x49; + case 0x20b: + return 0x69; + case 0x20c: + return 0x4f; + case 0x20d: + return 0x6f; + case 0x20e: + return 0x4f; + case 0x20f: + return 0x6f; + case 0x210: + return 0x52; + case 0x211: + return 0x72; + case 0x212: + return 0x52; + case 0x213: + return 0x72; + case 0x214: + return 0x55; + case 0x215: + return 0x75; + case 0x216: + return 0x55; + case 0x217: + return 0x75; + case 0x218: + return 0x53; + case 0x219: + return 0x73; + case 0x21a: + return 0x54; + case 0x21b: + return 0x74; + case 0x21e: + return 0x48; + case 0x21f: + return 0x68; + case 0x226: + return 0x41; + case 0x227: + return 0x61; + case 0x228: + return 0x45; + case 0x229: + return 0x65; + case 0x22a: + return 0x4f; + case 0x22b: + return 0x6f; + case 0x22c: + return 0x4f; + case 0x22d: + return 0x6f; + case 0x22e: + return 0x4f; + case 0x22f: + return 0x6f; + case 0x230: + return 0x4f; + case 0x231: + return 0x6f; + case 0x232: + return 0x59; + case 0x233: + return 0x79; + case 0x37e: + return 0x3b; + case 0x386: + return 0x391; + case 0x388: + return 0x395; + case 0x389: + return 0x397; + case 0x38a: + return 0x399; + case 0x38c: + return 0x39f; + case 0x38e: + return 0x3a5; + case 0x38f: + return 0x3a9; + case 0x390: + return 0x3b9; + case 0x3aa: + return 0x399; + case 0x3ab: + return 0x3a5; + case 0x3ac: + return 0x3b1; + case 0x3ad: + return 0x3b5; + case 0x3ae: + return 0x3b7; + case 0x3af: + return 0x3b9; + case 0x3b0: + return 0x3c5; + case 0x3ca: + return 0x3b9; + case 0x3cb: + return 0x3c5; + case 0x3cc: + return 0x3bf; + case 0x3cd: + return 0x3c5; + case 0x3ce: + return 0x3c9; + case 0x3d3: + return 0x3d2; + case 0x3d4: + return 0x3d2; + case 0x400: + return 0x415; + case 0x401: + return 0x415; + case 0x403: + return 0x413; + case 0x407: + return 0x406; + case 0x40c: + return 0x41a; + case 0x40d: + return 0x418; + case 0x40e: + return 0x423; + case 0x419: + return 0x418; + case 0x439: + return 0x438; + case 0x450: + return 0x435; + case 0x451: + return 0x435; + case 0x453: + return 0x433; + case 0x457: + return 0x456; + case 0x45c: + return 0x43a; + case 0x45d: + return 0x438; + case 0x45e: + return 0x443; + case 0x476: + return 0x474; + case 0x477: + return 0x475; + case 0x4c1: + return 0x416; + case 0x4c2: + return 0x436; + case 0x4d0: + return 0x410; + case 0x4d1: + return 0x430; + case 0x4d2: + return 0x410; + case 0x4d3: + return 0x430; + case 0x4d6: + return 0x415; + case 0x4d7: + return 0x435; + case 0x4da: + return 0x4d8; + case 0x4db: + return 0x4d9; + case 0x4dc: + return 0x416; + case 0x4dd: + return 0x436; + case 0x4de: + return 0x417; + case 0x4df: + return 0x437; + case 0x4e2: + return 0x418; + case 0x4e3: + return 0x438; + case 0x4e4: + return 0x418; + case 0x4e5: + return 0x438; + case 0x4e6: + return 0x41e; + case 0x4e7: + return 0x43e; + case 0x4ea: + return 0x4e8; + case 0x4eb: + return 0x4e9; + case 0x4ec: + return 0x42d; + case 0x4ed: + return 0x44d; + case 0x4ee: + return 0x423; + case 0x4ef: + return 0x443; + case 0x4f0: + return 0x423; + case 0x4f1: + return 0x443; + case 0x4f2: + return 0x423; + case 0x4f3: + return 0x443; + case 0x4f4: + return 0x427; + case 0x4f5: + return 0x447; + case 0x4f8: + return 0x42b; + case 0x4f9: + return 0x44b; + case 0x929: + return 0x928; + case 0x931: + return 0x930; + case 0x934: + return 0x933; + case 0x958: + return 0x915; + case 0x959: + return 0x916; + case 0x95a: + return 0x917; + case 0x95b: + return 0x91c; + case 0x95c: + return 0x921; + case 0x95d: + return 0x922; + case 0x95e: + return 0x92b; + case 0x95f: + return 0x92f; + case 0x9dc: + return 0x9a1; + case 0x9dd: + return 0x9a2; + case 0x9df: + return 0x9af; + case 0xa33: + return 0xa32; + case 0xa36: + return 0xa38; + case 0xa59: + return 0xa16; + case 0xa5a: + return 0xa17; + case 0xa5b: + return 0xa1c; + case 0xa5e: + return 0xa2b; + case 0xb5c: + return 0xb21; + case 0xb5d: + return 0xb22; + case 0xdda: + return 0xdd9; + case 0xddd: + return 0xddc; + case 0x1e00: + return 0x41; + case 0x1e01: + return 0x61; + case 0x1e02: + return 0x42; + case 0x1e03: + return 0x62; + case 0x1e04: + return 0x42; + case 0x1e05: + return 0x62; + case 0x1e06: + return 0x42; + case 0x1e07: + return 0x62; + case 0x1e08: + return 0x43; + case 0x1e09: + return 0x63; + case 0x1e0a: + return 0x44; + case 0x1e0b: + return 0x64; + case 0x1e0c: + return 0x44; + case 0x1e0d: + return 0x64; + case 0x1e0e: + return 0x44; + case 0x1e0f: + return 0x64; + case 0x1e10: + return 0x44; + case 0x1e11: + return 0x64; + case 0x1e12: + return 0x44; + case 0x1e13: + return 0x64; + case 0x1e14: + return 0x45; + case 0x1e15: + return 0x65; + case 0x1e16: + return 0x45; + case 0x1e17: + return 0x65; + case 0x1e18: + return 0x45; + case 0x1e19: + return 0x65; + case 0x1e1a: + return 0x45; + case 0x1e1b: + return 0x65; + case 0x1e1c: + return 0x45; + case 0x1e1d: + return 0x65; + case 0x1e1e: + return 0x46; + case 0x1e1f: + return 0x66; + case 0x1e20: + return 0x47; + case 0x1e21: + return 0x67; + case 0x1e22: + return 0x48; + case 0x1e23: + return 0x68; + case 0x1e24: + return 0x48; + case 0x1e25: + return 0x68; + case 0x1e26: + return 0x48; + case 0x1e27: + return 0x68; + case 0x1e28: + return 0x48; + case 0x1e29: + return 0x68; + case 0x1e2a: + return 0x48; + case 0x1e2b: + return 0x68; + case 0x1e2c: + return 0x49; + case 0x1e2d: + return 0x69; + case 0x1e2e: + return 0x49; + case 0x1e2f: + return 0x69; + case 0x1e30: + return 0x4b; + case 0x1e31: + return 0x6b; + case 0x1e32: + return 0x4b; + case 0x1e33: + return 0x6b; + case 0x1e34: + return 0x4b; + case 0x1e35: + return 0x6b; + case 0x1e36: + return 0x4c; + case 0x1e37: + return 0x6c; + case 0x1e38: + return 0x4c; + case 0x1e39: + return 0x6c; + case 0x1e3a: + return 0x4c; + case 0x1e3b: + return 0x6c; + case 0x1e3c: + return 0x4c; + case 0x1e3d: + return 0x6c; + case 0x1e3e: + return 0x4d; + case 0x1e3f: + return 0x6d; + case 0x1e40: + return 0x4d; + case 0x1e41: + return 0x6d; + case 0x1e42: + return 0x4d; + case 0x1e43: + return 0x6d; + case 0x1e44: + return 0x4e; + case 0x1e45: + return 0x6e; + case 0x1e46: + return 0x4e; + case 0x1e47: + return 0x6e; + case 0x1e48: + return 0x4e; + case 0x1e49: + return 0x6e; + case 0x1e4a: + return 0x4e; + case 0x1e4b: + return 0x6e; + case 0x1e4c: + return 0x4f; + case 0x1e4d: + return 0x6f; + case 0x1e4e: + return 0x4f; + case 0x1e4f: + return 0x6f; + case 0x1e50: + return 0x4f; + case 0x1e51: + return 0x6f; + case 0x1e52: + return 0x4f; + case 0x1e53: + return 0x6f; + case 0x1e54: + return 0x50; + case 0x1e55: + return 0x70; + case 0x1e56: + return 0x50; + case 0x1e57: + return 0x70; + case 0x1e58: + return 0x52; + case 0x1e59: + return 0x72; + case 0x1e5a: + return 0x52; + case 0x1e5b: + return 0x72; + case 0x1e5c: + return 0x52; + case 0x1e5d: + return 0x72; + case 0x1e5e: + return 0x52; + case 0x1e5f: + return 0x72; + case 0x1e60: + return 0x53; + case 0x1e61: + return 0x73; + case 0x1e62: + return 0x53; + case 0x1e63: + return 0x73; + case 0x1e64: + return 0x53; + case 0x1e65: + return 0x73; + case 0x1e66: + return 0x53; + case 0x1e67: + return 0x73; + case 0x1e68: + return 0x53; + case 0x1e69: + return 0x73; + case 0x1e6a: + return 0x54; + case 0x1e6b: + return 0x74; + case 0x1e6c: + return 0x54; + case 0x1e6d: + return 0x74; + case 0x1e6e: + return 0x54; + case 0x1e6f: + return 0x74; + case 0x1e70: + return 0x54; + case 0x1e71: + return 0x74; + case 0x1e72: + return 0x55; + case 0x1e73: + return 0x75; + case 0x1e74: + return 0x55; + case 0x1e75: + return 0x75; + case 0x1e76: + return 0x55; + case 0x1e77: + return 0x75; + case 0x1e78: + return 0x55; + case 0x1e79: + return 0x75; + case 0x1e7a: + return 0x55; + case 0x1e7b: + return 0x75; + case 0x1e7c: + return 0x56; + case 0x1e7d: + return 0x76; + case 0x1e7e: + return 0x56; + case 0x1e7f: + return 0x76; + case 0x1e80: + return 0x57; + case 0x1e81: + return 0x77; + case 0x1e82: + return 0x57; + case 0x1e83: + return 0x77; + case 0x1e84: + return 0x57; + case 0x1e85: + return 0x77; + case 0x1e86: + return 0x57; + case 0x1e87: + return 0x77; + case 0x1e88: + return 0x57; + case 0x1e89: + return 0x77; + case 0x1e8a: + return 0x58; + case 0x1e8b: + return 0x78; + case 0x1e8c: + return 0x58; + case 0x1e8d: + return 0x78; + case 0x1e8e: + return 0x59; + case 0x1e8f: + return 0x79; + case 0x1e90: + return 0x5a; + case 0x1e91: + return 0x7a; + case 0x1e92: + return 0x5a; + case 0x1e93: + return 0x7a; + case 0x1e94: + return 0x5a; + case 0x1e95: + return 0x7a; + case 0x1e96: + return 0x68; + case 0x1e97: + return 0x74; + case 0x1e98: + return 0x77; + case 0x1e99: + return 0x79; + case 0x1e9b: + return 0x17f; + case 0x1ea0: + return 0x41; + case 0x1ea1: + return 0x61; + case 0x1ea2: + return 0x41; + case 0x1ea3: + return 0x61; + case 0x1ea4: + return 0x41; + case 0x1ea5: + return 0x61; + case 0x1ea6: + return 0x41; + case 0x1ea7: + return 0x61; + case 0x1ea8: + return 0x41; + case 0x1ea9: + return 0x61; + case 0x1eaa: + return 0x41; + case 0x1eab: + return 0x61; + case 0x1eac: + return 0x41; + case 0x1ead: + return 0x61; + case 0x1eae: + return 0x41; + case 0x1eaf: + return 0x61; + case 0x1eb0: + return 0x41; + case 0x1eb1: + return 0x61; + case 0x1eb2: + return 0x41; + case 0x1eb3: + return 0x61; + case 0x1eb4: + return 0x41; + case 0x1eb5: + return 0x61; + case 0x1eb6: + return 0x41; + case 0x1eb7: + return 0x61; + case 0x1eb8: + return 0x45; + case 0x1eb9: + return 0x65; + case 0x1eba: + return 0x45; + case 0x1ebb: + return 0x65; + case 0x1ebc: + return 0x45; + case 0x1ebd: + return 0x65; + case 0x1ebe: + return 0x45; + case 0x1ebf: + return 0x65; + case 0x1ec0: + return 0x45; + case 0x1ec1: + return 0x65; + case 0x1ec2: + return 0x45; + case 0x1ec3: + return 0x65; + case 0x1ec4: + return 0x45; + case 0x1ec5: + return 0x65; + case 0x1ec6: + return 0x45; + case 0x1ec7: + return 0x65; + case 0x1ec8: + return 0x49; + case 0x1ec9: + return 0x69; + case 0x1eca: + return 0x49; + case 0x1ecb: + return 0x69; + case 0x1ecc: + return 0x4f; + case 0x1ecd: + return 0x6f; + case 0x1ece: + return 0x4f; + case 0x1ecf: + return 0x6f; + case 0x1ed0: + return 0x4f; + case 0x1ed1: + return 0x6f; + case 0x1ed2: + return 0x4f; + case 0x1ed3: + return 0x6f; + case 0x1ed4: + return 0x4f; + case 0x1ed5: + return 0x6f; + case 0x1ed6: + return 0x4f; + case 0x1ed7: + return 0x6f; + case 0x1ed8: + return 0x4f; + case 0x1ed9: + return 0x6f; + case 0x1eda: + return 0x4f; + case 0x1edb: + return 0x6f; + case 0x1edc: + return 0x4f; + case 0x1edd: + return 0x6f; + case 0x1ede: + return 0x4f; + case 0x1edf: + return 0x6f; + case 0x1ee0: + return 0x4f; + case 0x1ee1: + return 0x6f; + case 0x1ee2: + return 0x4f; + case 0x1ee3: + return 0x6f; + case 0x1ee4: + return 0x55; + case 0x1ee5: + return 0x75; + case 0x1ee6: + return 0x55; + case 0x1ee7: + return 0x75; + case 0x1ee8: + return 0x55; + case 0x1ee9: + return 0x75; + case 0x1eea: + return 0x55; + case 0x1eeb: + return 0x75; + case 0x1eec: + return 0x55; + case 0x1eed: + return 0x75; + case 0x1eee: + return 0x55; + case 0x1eef: + return 0x75; + case 0x1ef0: + return 0x55; + case 0x1ef1: + return 0x75; + case 0x1ef2: + return 0x59; + case 0x1ef3: + return 0x79; + case 0x1ef4: + return 0x59; + case 0x1ef5: + return 0x79; + case 0x1ef6: + return 0x59; + case 0x1ef7: + return 0x79; + case 0x1ef8: + return 0x59; + case 0x1ef9: + return 0x79; + case 0x1f00: + return 0x3b1; + case 0x1f01: + return 0x3b1; + case 0x1f02: + return 0x3b1; + case 0x1f03: + return 0x3b1; + case 0x1f04: + return 0x3b1; + case 0x1f05: + return 0x3b1; + case 0x1f06: + return 0x3b1; + case 0x1f07: + return 0x3b1; + case 0x1f08: + return 0x391; + case 0x1f09: + return 0x391; + case 0x1f0a: + return 0x391; + case 0x1f0b: + return 0x391; + case 0x1f0c: + return 0x391; + case 0x1f0d: + return 0x391; + case 0x1f0e: + return 0x391; + case 0x1f0f: + return 0x391; + case 0x1f10: + return 0x3b5; + case 0x1f11: + return 0x3b5; + case 0x1f12: + return 0x3b5; + case 0x1f13: + return 0x3b5; + case 0x1f14: + return 0x3b5; + case 0x1f15: + return 0x3b5; + case 0x1f18: + return 0x395; + case 0x1f19: + return 0x395; + case 0x1f1a: + return 0x395; + case 0x1f1b: + return 0x395; + case 0x1f1c: + return 0x395; + case 0x1f1d: + return 0x395; + case 0x1f20: + return 0x3b7; + case 0x1f21: + return 0x3b7; + case 0x1f22: + return 0x3b7; + case 0x1f23: + return 0x3b7; + case 0x1f24: + return 0x3b7; + case 0x1f25: + return 0x3b7; + case 0x1f26: + return 0x3b7; + case 0x1f27: + return 0x3b7; + case 0x1f28: + return 0x397; + case 0x1f29: + return 0x397; + case 0x1f2a: + return 0x397; + case 0x1f2b: + return 0x397; + case 0x1f2c: + return 0x397; + case 0x1f2d: + return 0x397; + case 0x1f2e: + return 0x397; + case 0x1f2f: + return 0x397; + case 0x1f30: + return 0x3b9; + case 0x1f31: + return 0x3b9; + case 0x1f32: + return 0x3b9; + case 0x1f33: + return 0x3b9; + case 0x1f34: + return 0x3b9; + case 0x1f35: + return 0x3b9; + case 0x1f36: + return 0x3b9; + case 0x1f37: + return 0x3b9; + case 0x1f38: + return 0x399; + case 0x1f39: + return 0x399; + case 0x1f3a: + return 0x399; + case 0x1f3b: + return 0x399; + case 0x1f3c: + return 0x399; + case 0x1f3d: + return 0x399; + case 0x1f3e: + return 0x399; + case 0x1f3f: + return 0x399; + case 0x1f40: + return 0x3bf; + case 0x1f41: + return 0x3bf; + case 0x1f42: + return 0x3bf; + case 0x1f43: + return 0x3bf; + case 0x1f44: + return 0x3bf; + case 0x1f45: + return 0x3bf; + case 0x1f48: + return 0x39f; + case 0x1f49: + return 0x39f; + case 0x1f4a: + return 0x39f; + case 0x1f4b: + return 0x39f; + case 0x1f4c: + return 0x39f; + case 0x1f4d: + return 0x39f; + case 0x1f50: + return 0x3c5; + case 0x1f51: + return 0x3c5; + case 0x1f52: + return 0x3c5; + case 0x1f53: + return 0x3c5; + case 0x1f54: + return 0x3c5; + case 0x1f55: + return 0x3c5; + case 0x1f56: + return 0x3c5; + case 0x1f57: + return 0x3c5; + case 0x1f59: + return 0x3a5; + case 0x1f5b: + return 0x3a5; + case 0x1f5d: + return 0x3a5; + case 0x1f5f: + return 0x3a5; + case 0x1f60: + return 0x3c9; + case 0x1f61: + return 0x3c9; + case 0x1f62: + return 0x3c9; + case 0x1f63: + return 0x3c9; + case 0x1f64: + return 0x3c9; + case 0x1f65: + return 0x3c9; + case 0x1f66: + return 0x3c9; + case 0x1f67: + return 0x3c9; + case 0x1f68: + return 0x3a9; + case 0x1f69: + return 0x3a9; + case 0x1f6a: + return 0x3a9; + case 0x1f6b: + return 0x3a9; + case 0x1f6c: + return 0x3a9; + case 0x1f6d: + return 0x3a9; + case 0x1f6e: + return 0x3a9; + case 0x1f6f: + return 0x3a9; + case 0x1f70: + return 0x3b1; + case 0x1f71: + return 0x3b1; + case 0x1f72: + return 0x3b5; + case 0x1f73: + return 0x3b5; + case 0x1f74: + return 0x3b7; + case 0x1f75: + return 0x3b7; + case 0x1f76: + return 0x3b9; + case 0x1f77: + return 0x3b9; + case 0x1f78: + return 0x3bf; + case 0x1f79: + return 0x3bf; + case 0x1f7a: + return 0x3c5; + case 0x1f7b: + return 0x3c5; + case 0x1f7c: + return 0x3c9; + case 0x1f7d: + return 0x3c9; + case 0x1f80: + return 0x3b1; + case 0x1f81: + return 0x3b1; + case 0x1f82: + return 0x3b1; + case 0x1f83: + return 0x3b1; + case 0x1f84: + return 0x3b1; + case 0x1f85: + return 0x3b1; + case 0x1f86: + return 0x3b1; + case 0x1f87: + return 0x3b1; + case 0x1f88: + return 0x391; + case 0x1f89: + return 0x391; + case 0x1f8a: + return 0x391; + case 0x1f8b: + return 0x391; + case 0x1f8c: + return 0x391; + case 0x1f8d: + return 0x391; + case 0x1f8e: + return 0x391; + case 0x1f8f: + return 0x391; + case 0x1f90: + return 0x3b7; + case 0x1f91: + return 0x3b7; + case 0x1f92: + return 0x3b7; + case 0x1f93: + return 0x3b7; + case 0x1f94: + return 0x3b7; + case 0x1f95: + return 0x3b7; + case 0x1f96: + return 0x3b7; + case 0x1f97: + return 0x3b7; + case 0x1f98: + return 0x397; + case 0x1f99: + return 0x397; + case 0x1f9a: + return 0x397; + case 0x1f9b: + return 0x397; + case 0x1f9c: + return 0x397; + case 0x1f9d: + return 0x397; + case 0x1f9e: + return 0x397; + case 0x1f9f: + return 0x397; + case 0x1fa0: + return 0x3c9; + case 0x1fa1: + return 0x3c9; + case 0x1fa2: + return 0x3c9; + case 0x1fa3: + return 0x3c9; + case 0x1fa4: + return 0x3c9; + case 0x1fa5: + return 0x3c9; + case 0x1fa6: + return 0x3c9; + case 0x1fa7: + return 0x3c9; + case 0x1fa8: + return 0x3a9; + case 0x1fa9: + return 0x3a9; + case 0x1faa: + return 0x3a9; + case 0x1fab: + return 0x3a9; + case 0x1fac: + return 0x3a9; + case 0x1fad: + return 0x3a9; + case 0x1fae: + return 0x3a9; + case 0x1faf: + return 0x3a9; + case 0x1fb0: + return 0x3b1; + case 0x1fb1: + return 0x3b1; + case 0x1fb2: + return 0x3b1; + case 0x1fb3: + return 0x3b1; + case 0x1fb4: + return 0x3b1; + case 0x1fb6: + return 0x3b1; + case 0x1fb7: + return 0x3b1; + case 0x1fb8: + return 0x391; + case 0x1fb9: + return 0x391; + case 0x1fba: + return 0x391; + case 0x1fbb: + return 0x391; + case 0x1fbc: + return 0x391; + case 0x1fbe: + return 0x3b9; + case 0x1fc2: + return 0x3b7; + case 0x1fc3: + return 0x3b7; + case 0x1fc4: + return 0x3b7; + case 0x1fc6: + return 0x3b7; + case 0x1fc7: + return 0x3b7; + case 0x1fc8: + return 0x395; + case 0x1fc9: + return 0x395; + case 0x1fca: + return 0x397; + case 0x1fcb: + return 0x397; + case 0x1fcc: + return 0x397; + case 0x1fd0: + return 0x3b9; + case 0x1fd1: + return 0x3b9; + case 0x1fd2: + return 0x3b9; + case 0x1fd3: + return 0x3b9; + case 0x1fd6: + return 0x3b9; + case 0x1fd7: + return 0x3b9; + case 0x1fd8: + return 0x399; + case 0x1fd9: + return 0x399; + case 0x1fda: + return 0x399; + case 0x1fdb: + return 0x399; + case 0x1fe0: + return 0x3c5; + case 0x1fe1: + return 0x3c5; + case 0x1fe2: + return 0x3c5; + case 0x1fe3: + return 0x3c5; + case 0x1fe4: + return 0x3c1; + case 0x1fe5: + return 0x3c1; + case 0x1fe6: + return 0x3c5; + case 0x1fe7: + return 0x3c5; + case 0x1fe8: + return 0x3a5; + case 0x1fe9: + return 0x3a5; + case 0x1fea: + return 0x3a5; + case 0x1feb: + return 0x3a5; + case 0x1fec: + return 0x3a1; + case 0x1ff2: + return 0x3c9; + case 0x1ff3: + return 0x3c9; + case 0x1ff4: + return 0x3c9; + case 0x1ff6: + return 0x3c9; + case 0x1ff7: + return 0x3c9; + case 0x1ff8: + return 0x39f; + case 0x1ff9: + return 0x39f; + case 0x1ffa: + return 0x3a9; + case 0x1ffb: + return 0x3a9; + case 0x1ffc: + return 0x3a9; + case 0x2000: + return 0x2002; + case 0x2001: + return 0x2003; + case 0x2126: + return 0x3a9; + case 0x212a: + return 0x4b; + case 0x212b: + return 0x41; + case 0x219a: + return 0x2190; + case 0x219b: + return 0x2192; + case 0x21ae: + return 0x2194; + case 0x21cd: + return 0x21d0; + case 0x21ce: + return 0x21d4; + case 0x21cf: + return 0x21d2; + case 0x2204: + return 0x2203; + case 0x2209: + return 0x2208; + case 0x220c: + return 0x220b; + case 0x2224: + return 0x2223; + case 0x2226: + return 0x2225; + case 0x2241: + return 0x223c; + case 0x2244: + return 0x2243; + case 0x2247: + return 0x2245; + case 0x2249: + return 0x2248; + case 0x2260: + return 0x3d; + case 0x2262: + return 0x2261; + case 0x226d: + return 0x224d; + case 0x226e: + return 0x3c; + case 0x226f: + return 0x3e; + case 0x2270: + return 0x2264; + case 0x2271: + return 0x2265; + case 0x2274: + return 0x2272; + case 0x2275: + return 0x2273; + case 0x2278: + return 0x2276; + case 0x2279: + return 0x2277; + case 0x2280: + return 0x227a; + case 0x2281: + return 0x227b; + case 0x2284: + return 0x2282; + case 0x2285: + return 0x2283; + case 0x2288: + return 0x2286; + case 0x2289: + return 0x2287; + case 0x22ac: + return 0x22a2; + case 0x22ad: + return 0x22a8; + case 0x22ae: + return 0x22a9; + case 0x22af: + return 0x22ab; + case 0x22e0: + return 0x227c; + case 0x22e1: + return 0x227d; + case 0x22e2: + return 0x2291; + case 0x22e3: + return 0x2292; + case 0x22ea: + return 0x22b2; + case 0x22eb: + return 0x22b3; + case 0x22ec: + return 0x22b4; + case 0x22ed: + return 0x22b5; + case 0x2329: + return 0x3008; + case 0x232a: + return 0x3009; + case 0x2adc: + return 0x2add; + case 0x304c: + return 0x304b; + case 0x304e: + return 0x304d; + case 0x3050: + return 0x304f; + case 0x3052: + return 0x3051; + case 0x3054: + return 0x3053; + case 0x3056: + return 0x3055; + case 0x3058: + return 0x3057; + case 0x305a: + return 0x3059; + case 0x305c: + return 0x305b; + case 0x305e: + return 0x305d; + case 0x3060: + return 0x305f; + case 0x3062: + return 0x3061; + case 0x3065: + return 0x3064; + case 0x3067: + return 0x3066; + case 0x3069: + return 0x3068; + case 0x3070: + return 0x306f; + case 0x3071: + return 0x306f; + case 0x3073: + return 0x3072; + case 0x3074: + return 0x3072; + case 0x3076: + return 0x3075; + case 0x3077: + return 0x3075; + case 0x3079: + return 0x3078; + case 0x307a: + return 0x3078; + case 0x307c: + return 0x307b; + case 0x307d: + return 0x307b; + case 0x3094: + return 0x3046; + case 0x309e: + return 0x309d; + case 0x30ac: + return 0x30ab; + case 0x30ae: + return 0x30ad; + case 0x30b0: + return 0x30af; + case 0x30b2: + return 0x30b1; + case 0x30b4: + return 0x30b3; + case 0x30b6: + return 0x30b5; + case 0x30b8: + return 0x30b7; + case 0x30ba: + return 0x30b9; + case 0x30bc: + return 0x30bb; + case 0x30be: + return 0x30bd; + case 0x30c0: + return 0x30bf; + case 0x30c2: + return 0x30c1; + case 0x30c5: + return 0x30c4; + case 0x30c7: + return 0x30c6; + case 0x30c9: + return 0x30c8; + case 0x30d0: + return 0x30cf; + case 0x30d1: + return 0x30cf; + case 0x30d3: + return 0x30d2; + case 0x30d4: + return 0x30d2; + case 0x30d6: + return 0x30d5; + case 0x30d7: + return 0x30d5; + case 0x30d9: + return 0x30d8; + case 0x30da: + return 0x30d8; + case 0x30dc: + return 0x30db; + case 0x30dd: + return 0x30db; + case 0x30f4: + return 0x30a6; + case 0x30f7: + return 0x30ef; + case 0x30f8: + return 0x30f0; + case 0x30f9: + return 0x30f1; + case 0x30fa: + return 0x30f2; + case 0x30fe: + return 0x30fd; + case 0xf900: + return 0x8c48; + case 0xf901: + return 0x66f4; + case 0xf902: + return 0x8eca; + case 0xf903: + return 0x8cc8; + case 0xf904: + return 0x6ed1; + case 0xf905: + return 0x4e32; + case 0xf906: + return 0x53e5; + case 0xf907: + return 0x9f9c; + case 0xf908: + return 0x9f9c; + case 0xf909: + return 0x5951; + case 0xf90a: + return 0x91d1; + case 0xf90b: + return 0x5587; + case 0xf90c: + return 0x5948; + case 0xf90d: + return 0x61f6; + case 0xf90e: + return 0x7669; + case 0xf90f: + return 0x7f85; + case 0xf910: + return 0x863f; + case 0xf911: + return 0x87ba; + case 0xf912: + return 0x88f8; + case 0xf913: + return 0x908f; + case 0xf914: + return 0x6a02; + case 0xf915: + return 0x6d1b; + case 0xf916: + return 0x70d9; + case 0xf917: + return 0x73de; + case 0xf918: + return 0x843d; + case 0xf919: + return 0x916a; + case 0xf91a: + return 0x99f1; + case 0xf91b: + return 0x4e82; + case 0xf91c: + return 0x5375; + case 0xf91d: + return 0x6b04; + case 0xf91e: + return 0x721b; + case 0xf91f: + return 0x862d; + case 0xf920: + return 0x9e1e; + case 0xf921: + return 0x5d50; + case 0xf922: + return 0x6feb; + case 0xf923: + return 0x85cd; + case 0xf924: + return 0x8964; + case 0xf925: + return 0x62c9; + case 0xf926: + return 0x81d8; + case 0xf927: + return 0x881f; + case 0xf928: + return 0x5eca; + case 0xf929: + return 0x6717; + case 0xf92a: + return 0x6d6a; + case 0xf92b: + return 0x72fc; + case 0xf92c: + return 0x90ce; + case 0xf92d: + return 0x4f86; + case 0xf92e: + return 0x51b7; + case 0xf92f: + return 0x52de; + case 0xf930: + return 0x64c4; + case 0xf931: + return 0x6ad3; + case 0xf932: + return 0x7210; + case 0xf933: + return 0x76e7; + case 0xf934: + return 0x8001; + case 0xf935: + return 0x8606; + case 0xf936: + return 0x865c; + case 0xf937: + return 0x8def; + case 0xf938: + return 0x9732; + case 0xf939: + return 0x9b6f; + case 0xf93a: + return 0x9dfa; + case 0xf93b: + return 0x788c; + case 0xf93c: + return 0x797f; + case 0xf93d: + return 0x7da0; + case 0xf93e: + return 0x83c9; + case 0xf93f: + return 0x9304; + case 0xf940: + return 0x9e7f; + case 0xf941: + return 0x8ad6; + case 0xf942: + return 0x58df; + case 0xf943: + return 0x5f04; + case 0xf944: + return 0x7c60; + case 0xf945: + return 0x807e; + case 0xf946: + return 0x7262; + case 0xf947: + return 0x78ca; + case 0xf948: + return 0x8cc2; + case 0xf949: + return 0x96f7; + case 0xf94a: + return 0x58d8; + case 0xf94b: + return 0x5c62; + case 0xf94c: + return 0x6a13; + case 0xf94d: + return 0x6dda; + case 0xf94e: + return 0x6f0f; + case 0xf94f: + return 0x7d2f; + case 0xf950: + return 0x7e37; + case 0xf951: + return 0x964b; + case 0xf952: + return 0x52d2; + case 0xf953: + return 0x808b; + case 0xf954: + return 0x51dc; + case 0xf955: + return 0x51cc; + case 0xf956: + return 0x7a1c; + case 0xf957: + return 0x7dbe; + case 0xf958: + return 0x83f1; + case 0xf959: + return 0x9675; + case 0xf95a: + return 0x8b80; + case 0xf95b: + return 0x62cf; + case 0xf95c: + return 0x6a02; + case 0xf95d: + return 0x8afe; + case 0xf95e: + return 0x4e39; + case 0xf95f: + return 0x5be7; + case 0xf960: + return 0x6012; + case 0xf961: + return 0x7387; + case 0xf962: + return 0x7570; + case 0xf963: + return 0x5317; + case 0xf964: + return 0x78fb; + case 0xf965: + return 0x4fbf; + case 0xf966: + return 0x5fa9; + case 0xf967: + return 0x4e0d; + case 0xf968: + return 0x6ccc; + case 0xf969: + return 0x6578; + case 0xf96a: + return 0x7d22; + case 0xf96b: + return 0x53c3; + case 0xf96c: + return 0x585e; + case 0xf96d: + return 0x7701; + case 0xf96e: + return 0x8449; + case 0xf96f: + return 0x8aaa; + case 0xf970: + return 0x6bba; + case 0xf971: + return 0x8fb0; + case 0xf972: + return 0x6c88; + case 0xf973: + return 0x62fe; + case 0xf974: + return 0x82e5; + case 0xf975: + return 0x63a0; + case 0xf976: + return 0x7565; + case 0xf977: + return 0x4eae; + case 0xf978: + return 0x5169; + case 0xf979: + return 0x51c9; + case 0xf97a: + return 0x6881; + case 0xf97b: + return 0x7ce7; + case 0xf97c: + return 0x826f; + case 0xf97d: + return 0x8ad2; + case 0xf97e: + return 0x91cf; + case 0xf97f: + return 0x52f5; + case 0xf980: + return 0x5442; + case 0xf981: + return 0x5973; + case 0xf982: + return 0x5eec; + case 0xf983: + return 0x65c5; + case 0xf984: + return 0x6ffe; + case 0xf985: + return 0x792a; + case 0xf986: + return 0x95ad; + case 0xf987: + return 0x9a6a; + case 0xf988: + return 0x9e97; + case 0xf989: + return 0x9ece; + case 0xf98a: + return 0x529b; + case 0xf98b: + return 0x66c6; + case 0xf98c: + return 0x6b77; + case 0xf98d: + return 0x8f62; + case 0xf98e: + return 0x5e74; + case 0xf98f: + return 0x6190; + case 0xf990: + return 0x6200; + case 0xf991: + return 0x649a; + case 0xf992: + return 0x6f23; + case 0xf993: + return 0x7149; + case 0xf994: + return 0x7489; + case 0xf995: + return 0x79ca; + case 0xf996: + return 0x7df4; + case 0xf997: + return 0x806f; + case 0xf998: + return 0x8f26; + case 0xf999: + return 0x84ee; + case 0xf99a: + return 0x9023; + case 0xf99b: + return 0x934a; + case 0xf99c: + return 0x5217; + case 0xf99d: + return 0x52a3; + case 0xf99e: + return 0x54bd; + case 0xf99f: + return 0x70c8; + case 0xf9a0: + return 0x88c2; + case 0xf9a1: + return 0x8aaa; + case 0xf9a2: + return 0x5ec9; + case 0xf9a3: + return 0x5ff5; + case 0xf9a4: + return 0x637b; + case 0xf9a5: + return 0x6bae; + case 0xf9a6: + return 0x7c3e; + case 0xf9a7: + return 0x7375; + case 0xf9a8: + return 0x4ee4; + case 0xf9a9: + return 0x56f9; + case 0xf9aa: + return 0x5be7; + case 0xf9ab: + return 0x5dba; + case 0xf9ac: + return 0x601c; + case 0xf9ad: + return 0x73b2; + case 0xf9ae: + return 0x7469; + case 0xf9af: + return 0x7f9a; + case 0xf9b0: + return 0x8046; + case 0xf9b1: + return 0x9234; + case 0xf9b2: + return 0x96f6; + case 0xf9b3: + return 0x9748; + case 0xf9b4: + return 0x9818; + case 0xf9b5: + return 0x4f8b; + case 0xf9b6: + return 0x79ae; + case 0xf9b7: + return 0x91b4; + case 0xf9b8: + return 0x96b8; + case 0xf9b9: + return 0x60e1; + case 0xf9ba: + return 0x4e86; + case 0xf9bb: + return 0x50da; + case 0xf9bc: + return 0x5bee; + case 0xf9bd: + return 0x5c3f; + case 0xf9be: + return 0x6599; + case 0xf9bf: + return 0x6a02; + case 0xf9c0: + return 0x71ce; + case 0xf9c1: + return 0x7642; + case 0xf9c2: + return 0x84fc; + case 0xf9c3: + return 0x907c; + case 0xf9c4: + return 0x9f8d; + case 0xf9c5: + return 0x6688; + case 0xf9c6: + return 0x962e; + case 0xf9c7: + return 0x5289; + case 0xf9c8: + return 0x677b; + case 0xf9c9: + return 0x67f3; + case 0xf9ca: + return 0x6d41; + case 0xf9cb: + return 0x6e9c; + case 0xf9cc: + return 0x7409; + case 0xf9cd: + return 0x7559; + case 0xf9ce: + return 0x786b; + case 0xf9cf: + return 0x7d10; + case 0xf9d0: + return 0x985e; + case 0xf9d1: + return 0x516d; + case 0xf9d2: + return 0x622e; + case 0xf9d3: + return 0x9678; + case 0xf9d4: + return 0x502b; + case 0xf9d5: + return 0x5d19; + case 0xf9d6: + return 0x6dea; + case 0xf9d7: + return 0x8f2a; + case 0xf9d8: + return 0x5f8b; + case 0xf9d9: + return 0x6144; + case 0xf9da: + return 0x6817; + case 0xf9db: + return 0x7387; + case 0xf9dc: + return 0x9686; + case 0xf9dd: + return 0x5229; + case 0xf9de: + return 0x540f; + case 0xf9df: + return 0x5c65; + case 0xf9e0: + return 0x6613; + case 0xf9e1: + return 0x674e; + case 0xf9e2: + return 0x68a8; + case 0xf9e3: + return 0x6ce5; + case 0xf9e4: + return 0x7406; + case 0xf9e5: + return 0x75e2; + case 0xf9e6: + return 0x7f79; + case 0xf9e7: + return 0x88cf; + case 0xf9e8: + return 0x88e1; + case 0xf9e9: + return 0x91cc; + case 0xf9ea: + return 0x96e2; + case 0xf9eb: + return 0x533f; + case 0xf9ec: + return 0x6eba; + case 0xf9ed: + return 0x541d; + case 0xf9ee: + return 0x71d0; + case 0xf9ef: + return 0x7498; + case 0xf9f0: + return 0x85fa; + case 0xf9f1: + return 0x96a3; + case 0xf9f2: + return 0x9c57; + case 0xf9f3: + return 0x9e9f; + case 0xf9f4: + return 0x6797; + case 0xf9f5: + return 0x6dcb; + case 0xf9f6: + return 0x81e8; + case 0xf9f7: + return 0x7acb; + case 0xf9f8: + return 0x7b20; + case 0xf9f9: + return 0x7c92; + case 0xf9fa: + return 0x72c0; + case 0xf9fb: + return 0x7099; + case 0xf9fc: + return 0x8b58; + case 0xf9fd: + return 0x4ec0; + case 0xf9fe: + return 0x8336; + case 0xf9ff: + return 0x523a; + case 0xfa00: + return 0x5207; + case 0xfa01: + return 0x5ea6; + case 0xfa02: + return 0x62d3; + case 0xfa03: + return 0x7cd6; + case 0xfa04: + return 0x5b85; + case 0xfa05: + return 0x6d1e; + case 0xfa06: + return 0x66b4; + case 0xfa07: + return 0x8f3b; + case 0xfa08: + return 0x884c; + case 0xfa09: + return 0x964d; + case 0xfa0a: + return 0x898b; + case 0xfa0b: + return 0x5ed3; + case 0xfa0c: + return 0x5140; + case 0xfa0d: + return 0x55c0; + case 0xfa10: + return 0x585a; + case 0xfa12: + return 0x6674; + case 0xfa15: + return 0x51de; + case 0xfa16: + return 0x732a; + case 0xfa17: + return 0x76ca; + case 0xfa18: + return 0x793c; + case 0xfa19: + return 0x795e; + case 0xfa1a: + return 0x7965; + case 0xfa1b: + return 0x798f; + case 0xfa1c: + return 0x9756; + case 0xfa1d: + return 0x7cbe; + case 0xfa1e: + return 0x7fbd; + case 0xfa20: + return 0x8612; + case 0xfa22: + return 0x8af8; + case 0xfa25: + return 0x9038; + case 0xfa26: + return 0x90fd; + case 0xfa2a: + return 0x98ef; + case 0xfa2b: + return 0x98fc; + case 0xfa2c: + return 0x9928; + case 0xfa2d: + return 0x9db4; + case 0xfa2e: + return 0x90de; + case 0xfa2f: + return 0x96b7; + case 0xfa30: + return 0x4fae; + case 0xfa31: + return 0x50e7; + case 0xfa32: + return 0x514d; + case 0xfa33: + return 0x52c9; + case 0xfa34: + return 0x52e4; + case 0xfa35: + return 0x5351; + case 0xfa36: + return 0x559d; + case 0xfa37: + return 0x5606; + case 0xfa38: + return 0x5668; + case 0xfa39: + return 0x5840; + case 0xfa3a: + return 0x58a8; + case 0xfa3b: + return 0x5c64; + case 0xfa3c: + return 0x5c6e; + case 0xfa3d: + return 0x6094; + case 0xfa3e: + return 0x6168; + case 0xfa3f: + return 0x618e; + case 0xfa40: + return 0x61f2; + case 0xfa41: + return 0x654f; + case 0xfa42: + return 0x65e2; + case 0xfa43: + return 0x6691; + case 0xfa44: + return 0x6885; + case 0xfa45: + return 0x6d77; + case 0xfa46: + return 0x6e1a; + case 0xfa47: + return 0x6f22; + case 0xfa48: + return 0x716e; + case 0xfa49: + return 0x722b; + case 0xfa4a: + return 0x7422; + case 0xfa4b: + return 0x7891; + case 0xfa4c: + return 0x793e; + case 0xfa4d: + return 0x7949; + case 0xfa4e: + return 0x7948; + case 0xfa4f: + return 0x7950; + case 0xfa50: + return 0x7956; + case 0xfa51: + return 0x795d; + case 0xfa52: + return 0x798d; + case 0xfa53: + return 0x798e; + case 0xfa54: + return 0x7a40; + case 0xfa55: + return 0x7a81; + case 0xfa56: + return 0x7bc0; + case 0xfa57: + return 0x7df4; + case 0xfa58: + return 0x7e09; + case 0xfa59: + return 0x7e41; + case 0xfa5a: + return 0x7f72; + case 0xfa5b: + return 0x8005; + case 0xfa5c: + return 0x81ed; + case 0xfa5d: + return 0x8279; + case 0xfa5e: + return 0x8279; + case 0xfa5f: + return 0x8457; + case 0xfa60: + return 0x8910; + case 0xfa61: + return 0x8996; + case 0xfa62: + return 0x8b01; + case 0xfa63: + return 0x8b39; + case 0xfa64: + return 0x8cd3; + case 0xfa65: + return 0x8d08; + case 0xfa66: + return 0x8fb6; + case 0xfa67: + return 0x9038; + case 0xfa68: + return 0x96e3; + case 0xfa69: + return 0x97ff; + case 0xfa6a: + return 0x983b; + case 0xfa6b: + return 0x6075; + case 0xfa6c: + return 0x242ee; + case 0xfa6d: + return 0x8218; + case 0xfa70: + return 0x4e26; + case 0xfa71: + return 0x51b5; + case 0xfa72: + return 0x5168; + case 0xfa73: + return 0x4f80; + case 0xfa74: + return 0x5145; + case 0xfa75: + return 0x5180; + case 0xfa76: + return 0x52c7; + case 0xfa77: + return 0x52fa; + case 0xfa78: + return 0x559d; + case 0xfa79: + return 0x5555; + case 0xfa7a: + return 0x5599; + case 0xfa7b: + return 0x55e2; + case 0xfa7c: + return 0x585a; + case 0xfa7d: + return 0x58b3; + case 0xfa7e: + return 0x5944; + case 0xfa7f: + return 0x5954; + case 0xfa80: + return 0x5a62; + case 0xfa81: + return 0x5b28; + case 0xfa82: + return 0x5ed2; + case 0xfa83: + return 0x5ed9; + case 0xfa84: + return 0x5f69; + case 0xfa85: + return 0x5fad; + case 0xfa86: + return 0x60d8; + case 0xfa87: + return 0x614e; + case 0xfa88: + return 0x6108; + case 0xfa89: + return 0x618e; + case 0xfa8a: + return 0x6160; + case 0xfa8b: + return 0x61f2; + case 0xfa8c: + return 0x6234; + case 0xfa8d: + return 0x63c4; + case 0xfa8e: + return 0x641c; + case 0xfa8f: + return 0x6452; + case 0xfa90: + return 0x6556; + case 0xfa91: + return 0x6674; + case 0xfa92: + return 0x6717; + case 0xfa93: + return 0x671b; + case 0xfa94: + return 0x6756; + case 0xfa95: + return 0x6b79; + case 0xfa96: + return 0x6bba; + case 0xfa97: + return 0x6d41; + case 0xfa98: + return 0x6edb; + case 0xfa99: + return 0x6ecb; + case 0xfa9a: + return 0x6f22; + case 0xfa9b: + return 0x701e; + case 0xfa9c: + return 0x716e; + case 0xfa9d: + return 0x77a7; + case 0xfa9e: + return 0x7235; + case 0xfa9f: + return 0x72af; + case 0xfaa0: + return 0x732a; + case 0xfaa1: + return 0x7471; + case 0xfaa2: + return 0x7506; + case 0xfaa3: + return 0x753b; + case 0xfaa4: + return 0x761d; + case 0xfaa5: + return 0x761f; + case 0xfaa6: + return 0x76ca; + case 0xfaa7: + return 0x76db; + case 0xfaa8: + return 0x76f4; + case 0xfaa9: + return 0x774a; + case 0xfaaa: + return 0x7740; + case 0xfaab: + return 0x78cc; + case 0xfaac: + return 0x7ab1; + case 0xfaad: + return 0x7bc0; + case 0xfaae: + return 0x7c7b; + case 0xfaaf: + return 0x7d5b; + case 0xfab0: + return 0x7df4; + case 0xfab1: + return 0x7f3e; + case 0xfab2: + return 0x8005; + case 0xfab3: + return 0x8352; + case 0xfab4: + return 0x83ef; + case 0xfab5: + return 0x8779; + case 0xfab6: + return 0x8941; + case 0xfab7: + return 0x8986; + case 0xfab8: + return 0x8996; + case 0xfab9: + return 0x8abf; + case 0xfaba: + return 0x8af8; + case 0xfabb: + return 0x8acb; + case 0xfabc: + return 0x8b01; + case 0xfabd: + return 0x8afe; + case 0xfabe: + return 0x8aed; + case 0xfabf: + return 0x8b39; + case 0xfac0: + return 0x8b8a; + case 0xfac1: + return 0x8d08; + case 0xfac2: + return 0x8f38; + case 0xfac3: + return 0x9072; + case 0xfac4: + return 0x9199; + case 0xfac5: + return 0x9276; + case 0xfac6: + return 0x967c; + case 0xfac7: + return 0x96e3; + case 0xfac8: + return 0x9756; + case 0xfac9: + return 0x97db; + case 0xfaca: + return 0x97ff; + case 0xfacb: + return 0x980b; + case 0xfacc: + return 0x983b; + case 0xfacd: + return 0x9b12; + case 0xface: + return 0x9f9c; + case 0xfacf: + return 0x2284a; + case 0xfad0: + return 0x22844; + case 0xfad1: + return 0x233d5; + case 0xfad2: + return 0x3b9d; + case 0xfad3: + return 0x4018; + case 0xfad4: + return 0x4039; + case 0xfad5: + return 0x25249; + case 0xfad6: + return 0x25cd0; + case 0xfad7: + return 0x27ed3; + case 0xfad8: + return 0x9f43; + case 0xfad9: + return 0x9f8e; + case 0xfb1d: + return 0x5d9; + case 0xfb1f: + return 0x5f2; + case 0xfb2a: + return 0x5e9; + case 0xfb2b: + return 0x5e9; + case 0xfb2c: + return 0x5e9; + case 0xfb2d: + return 0x5e9; + case 0xfb2e: + return 0x5d0; + case 0xfb2f: + return 0x5d0; + case 0xfb30: + return 0x5d0; + case 0xfb31: + return 0x5d1; + case 0xfb32: + return 0x5d2; + case 0xfb33: + return 0x5d3; + case 0xfb34: + return 0x5d4; + case 0xfb35: + return 0x5d5; + case 0xfb36: + return 0x5d6; + case 0xfb38: + return 0x5d8; + case 0xfb39: + return 0x5d9; + case 0xfb3a: + return 0x5da; + case 0xfb3b: + return 0x5db; + case 0xfb3c: + return 0x5dc; + case 0xfb3e: + return 0x5de; + case 0xfb40: + return 0x5e0; + case 0xfb41: + return 0x5e1; + case 0xfb43: + return 0x5e3; + case 0xfb44: + return 0x5e4; + case 0xfb46: + return 0x5e6; + case 0xfb47: + return 0x5e7; + case 0xfb48: + return 0x5e8; + case 0xfb49: + return 0x5e9; + case 0xfb4a: + return 0x5ea; + case 0xfb4b: + return 0x5d5; + case 0xfb4c: + return 0x5d1; + case 0xfb4d: + return 0x5db; + case 0xfb4e: + return 0x5e4; + case 0x1109a: + return 0x11099; + case 0x1109c: + return 0x1109b; + case 0x110ab: + return 0x110a5; + case 0x2f800: + return 0x4e3d; + case 0x2f801: + return 0x4e38; + case 0x2f802: + return 0x4e41; + case 0x2f803: + return 0x20122; + case 0x2f804: + return 0x4f60; + case 0x2f805: + return 0x4fae; + case 0x2f806: + return 0x4fbb; + case 0x2f807: + return 0x5002; + case 0x2f808: + return 0x507a; + case 0x2f809: + return 0x5099; + case 0x2f80a: + return 0x50e7; + case 0x2f80b: + return 0x50cf; + case 0x2f80c: + return 0x349e; + case 0x2f80d: + return 0x2063a; + case 0x2f80e: + return 0x514d; + case 0x2f80f: + return 0x5154; + case 0x2f810: + return 0x5164; + case 0x2f811: + return 0x5177; + case 0x2f812: + return 0x2051c; + case 0x2f813: + return 0x34b9; + case 0x2f814: + return 0x5167; + case 0x2f815: + return 0x518d; + case 0x2f816: + return 0x2054b; + case 0x2f817: + return 0x5197; + case 0x2f818: + return 0x51a4; + case 0x2f819: + return 0x4ecc; + case 0x2f81a: + return 0x51ac; + case 0x2f81b: + return 0x51b5; + case 0x2f81c: + return 0x291df; + case 0x2f81d: + return 0x51f5; + case 0x2f81e: + return 0x5203; + case 0x2f81f: + return 0x34df; + case 0x2f820: + return 0x523b; + case 0x2f821: + return 0x5246; + case 0x2f822: + return 0x5272; + case 0x2f823: + return 0x5277; + case 0x2f824: + return 0x3515; + case 0x2f825: + return 0x52c7; + case 0x2f826: + return 0x52c9; + case 0x2f827: + return 0x52e4; + case 0x2f828: + return 0x52fa; + case 0x2f829: + return 0x5305; + case 0x2f82a: + return 0x5306; + case 0x2f82b: + return 0x5317; + case 0x2f82c: + return 0x5349; + case 0x2f82d: + return 0x5351; + case 0x2f82e: + return 0x535a; + case 0x2f82f: + return 0x5373; + case 0x2f830: + return 0x537d; + case 0x2f831: + return 0x537f; + case 0x2f832: + return 0x537f; + case 0x2f833: + return 0x537f; + case 0x2f834: + return 0x20a2c; + case 0x2f835: + return 0x7070; + case 0x2f836: + return 0x53ca; + case 0x2f837: + return 0x53df; + case 0x2f838: + return 0x20b63; + case 0x2f839: + return 0x53eb; + case 0x2f83a: + return 0x53f1; + case 0x2f83b: + return 0x5406; + case 0x2f83c: + return 0x549e; + case 0x2f83d: + return 0x5438; + case 0x2f83e: + return 0x5448; + case 0x2f83f: + return 0x5468; + case 0x2f840: + return 0x54a2; + case 0x2f841: + return 0x54f6; + case 0x2f842: + return 0x5510; + case 0x2f843: + return 0x5553; + case 0x2f844: + return 0x5563; + case 0x2f845: + return 0x5584; + case 0x2f846: + return 0x5584; + case 0x2f847: + return 0x5599; + case 0x2f848: + return 0x55ab; + case 0x2f849: + return 0x55b3; + case 0x2f84a: + return 0x55c2; + case 0x2f84b: + return 0x5716; + case 0x2f84c: + return 0x5606; + case 0x2f84d: + return 0x5717; + case 0x2f84e: + return 0x5651; + case 0x2f84f: + return 0x5674; + case 0x2f850: + return 0x5207; + case 0x2f851: + return 0x58ee; + case 0x2f852: + return 0x57ce; + case 0x2f853: + return 0x57f4; + case 0x2f854: + return 0x580d; + case 0x2f855: + return 0x578b; + case 0x2f856: + return 0x5832; + case 0x2f857: + return 0x5831; + case 0x2f858: + return 0x58ac; + case 0x2f859: + return 0x214e4; + case 0x2f85a: + return 0x58f2; + case 0x2f85b: + return 0x58f7; + case 0x2f85c: + return 0x5906; + case 0x2f85d: + return 0x591a; + case 0x2f85e: + return 0x5922; + case 0x2f85f: + return 0x5962; + case 0x2f860: + return 0x216a8; + case 0x2f861: + return 0x216ea; + case 0x2f862: + return 0x59ec; + case 0x2f863: + return 0x5a1b; + case 0x2f864: + return 0x5a27; + case 0x2f865: + return 0x59d8; + case 0x2f866: + return 0x5a66; + case 0x2f867: + return 0x36ee; + case 0x2f868: + return 0x36fc; + case 0x2f869: + return 0x5b08; + case 0x2f86a: + return 0x5b3e; + case 0x2f86b: + return 0x5b3e; + case 0x2f86c: + return 0x219c8; + case 0x2f86d: + return 0x5bc3; + case 0x2f86e: + return 0x5bd8; + case 0x2f86f: + return 0x5be7; + case 0x2f870: + return 0x5bf3; + case 0x2f871: + return 0x21b18; + case 0x2f872: + return 0x5bff; + case 0x2f873: + return 0x5c06; + case 0x2f874: + return 0x5f53; + case 0x2f875: + return 0x5c22; + case 0x2f876: + return 0x3781; + case 0x2f877: + return 0x5c60; + case 0x2f878: + return 0x5c6e; + case 0x2f879: + return 0x5cc0; + case 0x2f87a: + return 0x5c8d; + case 0x2f87b: + return 0x21de4; + case 0x2f87c: + return 0x5d43; + case 0x2f87d: + return 0x21de6; + case 0x2f87e: + return 0x5d6e; + case 0x2f87f: + return 0x5d6b; + case 0x2f880: + return 0x5d7c; + case 0x2f881: + return 0x5de1; + case 0x2f882: + return 0x5de2; + case 0x2f883: + return 0x382f; + case 0x2f884: + return 0x5dfd; + case 0x2f885: + return 0x5e28; + case 0x2f886: + return 0x5e3d; + case 0x2f887: + return 0x5e69; + case 0x2f888: + return 0x3862; + case 0x2f889: + return 0x22183; + case 0x2f88a: + return 0x387c; + case 0x2f88b: + return 0x5eb0; + case 0x2f88c: + return 0x5eb3; + case 0x2f88d: + return 0x5eb6; + case 0x2f88e: + return 0x5eca; + case 0x2f88f: + return 0x2a392; + case 0x2f890: + return 0x5efe; + case 0x2f891: + return 0x22331; + case 0x2f892: + return 0x22331; + case 0x2f893: + return 0x8201; + case 0x2f894: + return 0x5f22; + case 0x2f895: + return 0x5f22; + case 0x2f896: + return 0x38c7; + case 0x2f897: + return 0x232b8; + case 0x2f898: + return 0x261da; + case 0x2f899: + return 0x5f62; + case 0x2f89a: + return 0x5f6b; + case 0x2f89b: + return 0x38e3; + case 0x2f89c: + return 0x5f9a; + case 0x2f89d: + return 0x5fcd; + case 0x2f89e: + return 0x5fd7; + case 0x2f89f: + return 0x5ff9; + case 0x2f8a0: + return 0x6081; + case 0x2f8a1: + return 0x393a; + case 0x2f8a2: + return 0x391c; + case 0x2f8a3: + return 0x6094; + case 0x2f8a4: + return 0x226d4; + case 0x2f8a5: + return 0x60c7; + case 0x2f8a6: + return 0x6148; + case 0x2f8a7: + return 0x614c; + case 0x2f8a8: + return 0x614e; + case 0x2f8a9: + return 0x614c; + case 0x2f8aa: + return 0x617a; + case 0x2f8ab: + return 0x618e; + case 0x2f8ac: + return 0x61b2; + case 0x2f8ad: + return 0x61a4; + case 0x2f8ae: + return 0x61af; + case 0x2f8af: + return 0x61de; + case 0x2f8b0: + return 0x61f2; + case 0x2f8b1: + return 0x61f6; + case 0x2f8b2: + return 0x6210; + case 0x2f8b3: + return 0x621b; + case 0x2f8b4: + return 0x625d; + case 0x2f8b5: + return 0x62b1; + case 0x2f8b6: + return 0x62d4; + case 0x2f8b7: + return 0x6350; + case 0x2f8b8: + return 0x22b0c; + case 0x2f8b9: + return 0x633d; + case 0x2f8ba: + return 0x62fc; + case 0x2f8bb: + return 0x6368; + case 0x2f8bc: + return 0x6383; + case 0x2f8bd: + return 0x63e4; + case 0x2f8be: + return 0x22bf1; + case 0x2f8bf: + return 0x6422; + case 0x2f8c0: + return 0x63c5; + case 0x2f8c1: + return 0x63a9; + case 0x2f8c2: + return 0x3a2e; + case 0x2f8c3: + return 0x6469; + case 0x2f8c4: + return 0x647e; + case 0x2f8c5: + return 0x649d; + case 0x2f8c6: + return 0x6477; + case 0x2f8c7: + return 0x3a6c; + case 0x2f8c8: + return 0x654f; + case 0x2f8c9: + return 0x656c; + case 0x2f8ca: + return 0x2300a; + case 0x2f8cb: + return 0x65e3; + case 0x2f8cc: + return 0x66f8; + case 0x2f8cd: + return 0x6649; + case 0x2f8ce: + return 0x3b19; + case 0x2f8cf: + return 0x6691; + case 0x2f8d0: + return 0x3b08; + case 0x2f8d1: + return 0x3ae4; + case 0x2f8d2: + return 0x5192; + case 0x2f8d3: + return 0x5195; + case 0x2f8d4: + return 0x6700; + case 0x2f8d5: + return 0x669c; + case 0x2f8d6: + return 0x80ad; + case 0x2f8d7: + return 0x43d9; + case 0x2f8d8: + return 0x6717; + case 0x2f8d9: + return 0x671b; + case 0x2f8da: + return 0x6721; + case 0x2f8db: + return 0x675e; + case 0x2f8dc: + return 0x6753; + case 0x2f8dd: + return 0x233c3; + case 0x2f8de: + return 0x3b49; + case 0x2f8df: + return 0x67fa; + case 0x2f8e0: + return 0x6785; + case 0x2f8e1: + return 0x6852; + case 0x2f8e2: + return 0x6885; + case 0x2f8e3: + return 0x2346d; + case 0x2f8e4: + return 0x688e; + case 0x2f8e5: + return 0x681f; + case 0x2f8e6: + return 0x6914; + case 0x2f8e7: + return 0x3b9d; + case 0x2f8e8: + return 0x6942; + case 0x2f8e9: + return 0x69a3; + case 0x2f8ea: + return 0x69ea; + case 0x2f8eb: + return 0x6aa8; + case 0x2f8ec: + return 0x236a3; + case 0x2f8ed: + return 0x6adb; + case 0x2f8ee: + return 0x3c18; + case 0x2f8ef: + return 0x6b21; + case 0x2f8f0: + return 0x238a7; + case 0x2f8f1: + return 0x6b54; + case 0x2f8f2: + return 0x3c4e; + case 0x2f8f3: + return 0x6b72; + case 0x2f8f4: + return 0x6b9f; + case 0x2f8f5: + return 0x6bba; + case 0x2f8f6: + return 0x6bbb; + case 0x2f8f7: + return 0x23a8d; + case 0x2f8f8: + return 0x21d0b; + case 0x2f8f9: + return 0x23afa; + case 0x2f8fa: + return 0x6c4e; + case 0x2f8fb: + return 0x23cbc; + case 0x2f8fc: + return 0x6cbf; + case 0x2f8fd: + return 0x6ccd; + case 0x2f8fe: + return 0x6c67; + case 0x2f8ff: + return 0x6d16; + case 0x2f900: + return 0x6d3e; + case 0x2f901: + return 0x6d77; + case 0x2f902: + return 0x6d41; + case 0x2f903: + return 0x6d69; + case 0x2f904: + return 0x6d78; + case 0x2f905: + return 0x6d85; + case 0x2f906: + return 0x23d1e; + case 0x2f907: + return 0x6d34; + case 0x2f908: + return 0x6e2f; + case 0x2f909: + return 0x6e6e; + case 0x2f90a: + return 0x3d33; + case 0x2f90b: + return 0x6ecb; + case 0x2f90c: + return 0x6ec7; + case 0x2f90d: + return 0x23ed1; + case 0x2f90e: + return 0x6df9; + case 0x2f90f: + return 0x6f6e; + case 0x2f910: + return 0x23f5e; + case 0x2f911: + return 0x23f8e; + case 0x2f912: + return 0x6fc6; + case 0x2f913: + return 0x7039; + case 0x2f914: + return 0x701e; + case 0x2f915: + return 0x701b; + case 0x2f916: + return 0x3d96; + case 0x2f917: + return 0x704a; + case 0x2f918: + return 0x707d; + case 0x2f919: + return 0x7077; + case 0x2f91a: + return 0x70ad; + case 0x2f91b: + return 0x20525; + case 0x2f91c: + return 0x7145; + case 0x2f91d: + return 0x24263; + case 0x2f91e: + return 0x719c; + case 0x2f91f: + return 0x243ab; + case 0x2f920: + return 0x7228; + case 0x2f921: + return 0x7235; + case 0x2f922: + return 0x7250; + case 0x2f923: + return 0x24608; + case 0x2f924: + return 0x7280; + case 0x2f925: + return 0x7295; + case 0x2f926: + return 0x24735; + case 0x2f927: + return 0x24814; + case 0x2f928: + return 0x737a; + case 0x2f929: + return 0x738b; + case 0x2f92a: + return 0x3eac; + case 0x2f92b: + return 0x73a5; + case 0x2f92c: + return 0x3eb8; + case 0x2f92d: + return 0x3eb8; + case 0x2f92e: + return 0x7447; + case 0x2f92f: + return 0x745c; + case 0x2f930: + return 0x7471; + case 0x2f931: + return 0x7485; + case 0x2f932: + return 0x74ca; + case 0x2f933: + return 0x3f1b; + case 0x2f934: + return 0x7524; + case 0x2f935: + return 0x24c36; + case 0x2f936: + return 0x753e; + case 0x2f937: + return 0x24c92; + case 0x2f938: + return 0x7570; + case 0x2f939: + return 0x2219f; + case 0x2f93a: + return 0x7610; + case 0x2f93b: + return 0x24fa1; + case 0x2f93c: + return 0x24fb8; + case 0x2f93d: + return 0x25044; + case 0x2f93e: + return 0x3ffc; + case 0x2f93f: + return 0x4008; + case 0x2f940: + return 0x76f4; + case 0x2f941: + return 0x250f3; + case 0x2f942: + return 0x250f2; + case 0x2f943: + return 0x25119; + case 0x2f944: + return 0x25133; + case 0x2f945: + return 0x771e; + case 0x2f946: + return 0x771f; + case 0x2f947: + return 0x771f; + case 0x2f948: + return 0x774a; + case 0x2f949: + return 0x4039; + case 0x2f94a: + return 0x778b; + case 0x2f94b: + return 0x4046; + case 0x2f94c: + return 0x4096; + case 0x2f94d: + return 0x2541d; + case 0x2f94e: + return 0x784e; + case 0x2f94f: + return 0x788c; + case 0x2f950: + return 0x78cc; + case 0x2f951: + return 0x40e3; + case 0x2f952: + return 0x25626; + case 0x2f953: + return 0x7956; + case 0x2f954: + return 0x2569a; + case 0x2f955: + return 0x256c5; + case 0x2f956: + return 0x798f; + case 0x2f957: + return 0x79eb; + case 0x2f958: + return 0x412f; + case 0x2f959: + return 0x7a40; + case 0x2f95a: + return 0x7a4a; + case 0x2f95b: + return 0x7a4f; + case 0x2f95c: + return 0x2597c; + case 0x2f95d: + return 0x25aa7; + case 0x2f95e: + return 0x25aa7; + case 0x2f95f: + return 0x7aee; + case 0x2f960: + return 0x4202; + case 0x2f961: + return 0x25bab; + case 0x2f962: + return 0x7bc6; + case 0x2f963: + return 0x7bc9; + case 0x2f964: + return 0x4227; + case 0x2f965: + return 0x25c80; + case 0x2f966: + return 0x7cd2; + case 0x2f967: + return 0x42a0; + case 0x2f968: + return 0x7ce8; + case 0x2f969: + return 0x7ce3; + case 0x2f96a: + return 0x7d00; + case 0x2f96b: + return 0x25f86; + case 0x2f96c: + return 0x7d63; + case 0x2f96d: + return 0x4301; + case 0x2f96e: + return 0x7dc7; + case 0x2f96f: + return 0x7e02; + case 0x2f970: + return 0x7e45; + case 0x2f971: + return 0x4334; + case 0x2f972: + return 0x26228; + case 0x2f973: + return 0x26247; + case 0x2f974: + return 0x4359; + case 0x2f975: + return 0x262d9; + case 0x2f976: + return 0x7f7a; + case 0x2f977: + return 0x2633e; + case 0x2f978: + return 0x7f95; + case 0x2f979: + return 0x7ffa; + case 0x2f97a: + return 0x8005; + case 0x2f97b: + return 0x264da; + case 0x2f97c: + return 0x26523; + case 0x2f97d: + return 0x8060; + case 0x2f97e: + return 0x265a8; + case 0x2f97f: + return 0x8070; + case 0x2f980: + return 0x2335f; + case 0x2f981: + return 0x43d5; + case 0x2f982: + return 0x80b2; + case 0x2f983: + return 0x8103; + case 0x2f984: + return 0x440b; + case 0x2f985: + return 0x813e; + case 0x2f986: + return 0x5ab5; + case 0x2f987: + return 0x267a7; + case 0x2f988: + return 0x267b5; + case 0x2f989: + return 0x23393; + case 0x2f98a: + return 0x2339c; + case 0x2f98b: + return 0x8201; + case 0x2f98c: + return 0x8204; + case 0x2f98d: + return 0x8f9e; + case 0x2f98e: + return 0x446b; + case 0x2f98f: + return 0x8291; + case 0x2f990: + return 0x828b; + case 0x2f991: + return 0x829d; + case 0x2f992: + return 0x52b3; + case 0x2f993: + return 0x82b1; + case 0x2f994: + return 0x82b3; + case 0x2f995: + return 0x82bd; + case 0x2f996: + return 0x82e6; + case 0x2f997: + return 0x26b3c; + case 0x2f998: + return 0x82e5; + case 0x2f999: + return 0x831d; + case 0x2f99a: + return 0x8363; + case 0x2f99b: + return 0x83ad; + case 0x2f99c: + return 0x8323; + case 0x2f99d: + return 0x83bd; + case 0x2f99e: + return 0x83e7; + case 0x2f99f: + return 0x8457; + case 0x2f9a0: + return 0x8353; + case 0x2f9a1: + return 0x83ca; + case 0x2f9a2: + return 0x83cc; + case 0x2f9a3: + return 0x83dc; + case 0x2f9a4: + return 0x26c36; + case 0x2f9a5: + return 0x26d6b; + case 0x2f9a6: + return 0x26cd5; + case 0x2f9a7: + return 0x452b; + case 0x2f9a8: + return 0x84f1; + case 0x2f9a9: + return 0x84f3; + case 0x2f9aa: + return 0x8516; + case 0x2f9ab: + return 0x273ca; + case 0x2f9ac: + return 0x8564; + case 0x2f9ad: + return 0x26f2c; + case 0x2f9ae: + return 0x455d; + case 0x2f9af: + return 0x4561; + case 0x2f9b0: + return 0x26fb1; + case 0x2f9b1: + return 0x270d2; + case 0x2f9b2: + return 0x456b; + case 0x2f9b3: + return 0x8650; + case 0x2f9b4: + return 0x865c; + case 0x2f9b5: + return 0x8667; + case 0x2f9b6: + return 0x8669; + case 0x2f9b7: + return 0x86a9; + case 0x2f9b8: + return 0x8688; + case 0x2f9b9: + return 0x870e; + case 0x2f9ba: + return 0x86e2; + case 0x2f9bb: + return 0x8779; + case 0x2f9bc: + return 0x8728; + case 0x2f9bd: + return 0x876b; + case 0x2f9be: + return 0x8786; + case 0x2f9bf: + return 0x45d7; + case 0x2f9c0: + return 0x87e1; + case 0x2f9c1: + return 0x8801; + case 0x2f9c2: + return 0x45f9; + case 0x2f9c3: + return 0x8860; + case 0x2f9c4: + return 0x8863; + case 0x2f9c5: + return 0x27667; + case 0x2f9c6: + return 0x88d7; + case 0x2f9c7: + return 0x88de; + case 0x2f9c8: + return 0x4635; + case 0x2f9c9: + return 0x88fa; + case 0x2f9ca: + return 0x34bb; + case 0x2f9cb: + return 0x278ae; + case 0x2f9cc: + return 0x27966; + case 0x2f9cd: + return 0x46be; + case 0x2f9ce: + return 0x46c7; + case 0x2f9cf: + return 0x8aa0; + case 0x2f9d0: + return 0x8aed; + case 0x2f9d1: + return 0x8b8a; + case 0x2f9d2: + return 0x8c55; + case 0x2f9d3: + return 0x27ca8; + case 0x2f9d4: + return 0x8cab; + case 0x2f9d5: + return 0x8cc1; + case 0x2f9d6: + return 0x8d1b; + case 0x2f9d7: + return 0x8d77; + case 0x2f9d8: + return 0x27f2f; + case 0x2f9d9: + return 0x20804; + case 0x2f9da: + return 0x8dcb; + case 0x2f9db: + return 0x8dbc; + case 0x2f9dc: + return 0x8df0; + case 0x2f9dd: + return 0x208de; + case 0x2f9de: + return 0x8ed4; + case 0x2f9df: + return 0x8f38; + case 0x2f9e0: + return 0x285d2; + case 0x2f9e1: + return 0x285ed; + case 0x2f9e2: + return 0x9094; + case 0x2f9e3: + return 0x90f1; + case 0x2f9e4: + return 0x9111; + case 0x2f9e5: + return 0x2872e; + case 0x2f9e6: + return 0x911b; + case 0x2f9e7: + return 0x9238; + case 0x2f9e8: + return 0x92d7; + case 0x2f9e9: + return 0x92d8; + case 0x2f9ea: + return 0x927c; + case 0x2f9eb: + return 0x93f9; + case 0x2f9ec: + return 0x9415; + case 0x2f9ed: + return 0x28bfa; + case 0x2f9ee: + return 0x958b; + case 0x2f9ef: + return 0x4995; + case 0x2f9f0: + return 0x95b7; + case 0x2f9f1: + return 0x28d77; + case 0x2f9f2: + return 0x49e6; + case 0x2f9f3: + return 0x96c3; + case 0x2f9f4: + return 0x5db2; + case 0x2f9f5: + return 0x9723; + case 0x2f9f6: + return 0x29145; + case 0x2f9f7: + return 0x2921a; + case 0x2f9f8: + return 0x4a6e; + case 0x2f9f9: + return 0x4a76; + case 0x2f9fa: + return 0x97e0; + case 0x2f9fb: + return 0x2940a; + case 0x2f9fc: + return 0x4ab2; + case 0x2f9fd: + return 0x29496; + case 0x2f9fe: + return 0x980b; + case 0x2f9ff: + return 0x980b; + case 0x2fa00: + return 0x9829; + case 0x2fa01: + return 0x295b6; + case 0x2fa02: + return 0x98e2; + case 0x2fa03: + return 0x4b33; + case 0x2fa04: + return 0x9929; + case 0x2fa05: + return 0x99a7; + case 0x2fa06: + return 0x99c2; + case 0x2fa07: + return 0x99fe; + case 0x2fa08: + return 0x4bce; + case 0x2fa09: + return 0x29b30; + case 0x2fa0a: + return 0x9b12; + case 0x2fa0b: + return 0x9c40; + case 0x2fa0c: + return 0x9cfd; + case 0x2fa0d: + return 0x4cce; + case 0x2fa0e: + return 0x4ced; + case 0x2fa0f: + return 0x9d67; + case 0x2fa10: + return 0x2a0ce; + case 0x2fa11: + return 0x4cf8; + case 0x2fa12: + return 0x2a105; + case 0x2fa13: + return 0x2a20e; + case 0x2fa14: + return 0x2a291; + case 0x2fa15: + return 0x9ebb; + case 0x2fa16: + return 0x4d56; + case 0x2fa17: + return 0x9ef9; + case 0x2fa18: + return 0x9efe; + case 0x2fa19: + return 0x9f05; + case 0x2fa1a: + return 0x9f0f; + case 0x2fa1b: + return 0x9f16; + case 0x2fa1c: + return 0x9f3b; + case 0x2fa1d: + return 0x2a600; + default: + return codepoint; + } +} +} // namespace unicode +} // namespace mongo diff --git a/src/mongo/db/fts/unicode/codepoints_test.cpp b/src/mongo/db/fts/unicode/codepoints_test.cpp new file mode 100644 index 00000000000..90510666cba --- /dev/null +++ b/src/mongo/db/fts/unicode/codepoints_test.cpp @@ -0,0 +1,94 @@ +/** + * Copyright (C) 2015 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/fts/unicode/codepoints.h" +#include "mongo/unittest/unittest.h" + +namespace mongo { +namespace unicode { + +/** + * Above most of the arrays in this class are the UTF-32 character literals that correspond to the + * codepoints in the array. + */ + +TEST(UnicodeCodepoints, Diacritics) { + // There are no character literals for combining marks. + const char32_t marks[] = {0x0301, 0x0339, 0x1AB4, 0x1DC5, 0xA69D}; + + // const char32_t not_marks[] = {U'-', U'.', U'\'', U'*', U'm'}; + const char32_t not_marks[] = {0x2D, 0x2E, 0x27, 0x2A, 0x6D}; + + for (auto i = 0; i < 5; ++i) { + ASSERT(codepointIsDiacritic(marks[i])); + ASSERT_FALSE(codepointIsDiacritic(not_marks[i])); + } +} + +TEST(UnicodeCodepoints, Delimiters) { + // const char32_t delimiters[] = {U'-', U'.', U'"', U'¿', U'«'}; + const char32_t delimiters[] = {0x2D, 0x2E, 0x22, 0xBF, 0xAB}; + // const char32_t not_delimiters[] = {U'a', U'ê', U'π', U'Ω', U'å'}; + const char32_t not_delimiters[] = {0x61, 0xEA, 0x3C0, 0x3A9, 0xE5}; + + for (auto i = 0; i < 5; ++i) { + ASSERT(codepointIsDelimiter(delimiters[i], DelimiterListLanguage::kEnglish)); + ASSERT(codepointIsDelimiter(delimiters[i], DelimiterListLanguage::kNotEnglish)); + ASSERT_FALSE(codepointIsDelimiter(not_delimiters[i], DelimiterListLanguage::kEnglish)); + ASSERT_FALSE(codepointIsDelimiter(not_delimiters[i], DelimiterListLanguage::kNotEnglish)); + } + + // Special case for English. + ASSERT(codepointIsDelimiter(0x27, DelimiterListLanguage::kNotEnglish)); + ASSERT_FALSE(codepointIsDelimiter(0x27, DelimiterListLanguage::kEnglish)); +} + +TEST(UnicodeCodepoints, RemoveDiacritics) { + // const char32_t originals[] = {U'á', U'ê', U'ñ', U'å', U'ç'}; + const char32_t originals[] = {0xE1, 0xEA, 0xF1, 0xE5, 0xE7}; + // const char32_t clean[] = {U'a', U'e', U'n', U'a', U'c'}; + const char32_t clean[] = {0x61, 0x65, 0x6E, 0x61, 0x63}; + + for (auto i = 0; i < 5; ++i) { + ASSERT_EQUALS(clean[i], codepointRemoveDiacritics(originals[i])); + } +} + +TEST(UnicodeCodepoints, ToLower) { + // const char32_t upper[] = {U'Á', U'Ê', U'Ñ', U'Å', U'Ç'}; + const char32_t upper[] = {0xC1, 0xCA, 0xD1, 0xC5, 0xC7}; + // const char32_t lower[] = {U'á', U'ê', U'ñ', U'å', U'ç'}; + const char32_t lower[] = {0xE1, 0xEA, 0xF1, 0xE5, 0xE7}; + + for (auto i = 0; i < 5; ++i) { + ASSERT_EQUALS(lower[i], codepointToLower(upper[i])); + } +} + +} // namespace unicode +} // namespace mongo diff --git a/src/mongo/db/fts/unicode/gen_casefold_map.py b/src/mongo/db/fts/unicode/gen_casefold_map.py new file mode 100644 index 00000000000..fc55cdd57c7 --- /dev/null +++ b/src/mongo/db/fts/unicode/gen_casefold_map.py @@ -0,0 +1,76 @@ + #!/usr/bin/python + # -*- coding: utf-8 -*- +import os +import sys + +from gen_helper import getCopyrightNotice, openNamespaces, closeNamespaces, \ + include + +def generate(unicode_casefold_file, target): + """Generates a C++ source file that contains a Unicode case folding + function. + + The case folding function contains a switch statement with cases for every + Unicode codepoint that has a case folding mapping. + """ + out = open(target, "w") + + out.write(getCopyrightNotice()) + out.write(include("mongo/db/fts/unicode/codepoints.h")) + out.write("\n") + out.write(openNamespaces()) + + case_mappings = {} + + cf_file = open(unicode_casefold_file, 'r') + + for line in cf_file: + # Filter out blank lines and lines that start with # + data = line[:line.find('#')] + if(data == ""): + continue + + # Parse the data on the line + values = data.split("; ") + assert(len(values) == 4) + + status = values[1] + if status == 'C' or status == 'S': + # We only include the "Common" and "Simple" mappings. "Full" case + # folding mappings expand certain letters to multiple codepoints, + # which we currently do not support. + original_codepoint = int(values[0], 16) + codepoint_mapping = int(values[2], 16) + case_mappings[original_codepoint] = codepoint_mapping + + out.write("""char32_t codepointToLower(char32_t codepoint, CaseFoldMode \ +mode) { + if (mode == CaseFoldMode::kTurkish) { + if (codepoint == 0x049) { // I -> ı + return 0x131; + } else if (codepoint == 0x130) { // İ -> i + return 0x069; + } + } + + switch (codepoint) {\n""") + + mappings_list = [] + + for mapping in case_mappings: + mappings_list.append((mapping, case_mappings[mapping])) + + sorted_mappings = sorted(mappings_list, key=lambda mapping: mapping[0]) + + for mapping in sorted_mappings: + out.write("\ + case " + str(hex(mapping[0])) + ": return " + \ + str(hex(mapping[1])) +";\n") + + out.write("\ + default: return codepoint;\n }\n}") + + out.write(closeNamespaces()) + +if __name__ == "__main__": + generate(sys.argv[1], sys.argv[2]) diff --git a/src/mongo/db/fts/unicode/gen_delimiter_list.py b/src/mongo/db/fts/unicode/gen_delimiter_list.py new file mode 100644 index 00000000000..52b79544c6b --- /dev/null +++ b/src/mongo/db/fts/unicode/gen_delimiter_list.py @@ -0,0 +1,80 @@ + #!/usr/bin/python + # -*- coding: utf-8 -*- +import sys + +from gen_helper import getCopyrightNotice, openNamespaces, closeNamespaces, \ + include + +def generate(unicode_proplist_file, target): + """Generates a C++ source file that contains a delimiter checking function. + + The delimiter checking function contains a switch statement with cases for + every delimiter in the Unicode Character Database with the properties + specified in delim_properties. + """ + out = open(target, "w") + + out.write(getCopyrightNotice()) + out.write(include("mongo/db/fts/unicode/codepoints.h")) + out.write("\n") + out.write(openNamespaces()) + + delim_codepoints = set() + + proplist_file = open(unicode_proplist_file, 'r') + + delim_properties = ["White_Space", + "Dash", + "Hyphen", + "Quotation_Mark", + "Terminal_Punctuation", + "Pattern_Syntax", + "STerm"] + + for line in proplist_file: + # Filter out blank lines and lines that start with # + data = line[:line.find('#')] + if(data == ""): + continue + + # Parse the data on the line + values = data.split("; ") + assert(len(values) == 2) + + uproperty = values[1].strip() + if uproperty in delim_properties: + if len(values[0].split('..')) == 2: + codepoint_range = values[0].split('..') + + start = int(codepoint_range[0], 16) + end = int(codepoint_range[1], 16) + 1 + + for i in range(start, end): + if i not in delim_codepoints: + delim_codepoints.add(i) + else: + if int(values[0], 16) not in delim_codepoints: + delim_codepoints.add(int(values[0], 16)) + + # As of Unicode 8.0.0, all of the delimiters we used for text index + # version 2 are also in the list. + + out.write("""bool codepointIsDelimiter(char32_t codepoint, \ +DelimiterListLanguage lang) { + if (lang == DelimiterListLanguage::kEnglish && codepoint == '\\'') { + return false; + } + + switch (codepoint) {\n""") + + for delim in sorted(delim_codepoints): + out.write("\ + case " + str(hex(delim)) + ": return true;\n") + + out.write("\ + default: return false;\n }\n}") + + out.write(closeNamespaces()) + +if __name__ == "__main__": + generate(sys.argv[1], sys.argv[2]) diff --git a/src/mongo/db/fts/unicode/gen_diacritic_list.py b/src/mongo/db/fts/unicode/gen_diacritic_list.py new file mode 100644 index 00000000000..260a85307af --- /dev/null +++ b/src/mongo/db/fts/unicode/gen_diacritic_list.py @@ -0,0 +1,63 @@ + #!/usr/bin/python + # -*- coding: utf-8 -*- +import sys + +from gen_helper import getCopyrightNotice, openNamespaces, closeNamespaces, \ + include + +def generate(unicode_proplist_file, target): + """Generates a C++ source file that contains a diacritic checking function. + + The diacritic checking function contains a switch statement with cases for + every diacritic in the Unicode Character Database. + """ + out = open(target, "w") + + out.write(getCopyrightNotice()) + out.write(include("mongo/db/fts/unicode/codepoints.h")) + out.write("\n") + out.write(openNamespaces()) + + diacritics = set() + + proplist_file = open(unicode_proplist_file, 'r') + + for line in proplist_file: + # Filter out blank lines and lines that start with # + data = line[:line.find('#')] + if(data == ""): + continue + + # Parse the data on the line + values = data.split("; ") + assert(len(values) == 2) + + uproperty = values[1].strip() + if uproperty in "Diacritic": + if len(values[0].split('..')) == 2: + codepoint_range = values[0].split('..') + + start = int(codepoint_range[0], 16) + end = int(codepoint_range[1], 16) + 1 + + for i in range(start, end): + if i not in diacritics: + diacritics.add(i) + else: + if int(values[0], 16) not in diacritics: + diacritics.add(int(values[0], 16)) + + out.write("""bool codepointIsDiacritic(char32_t codepoint) { + switch (codepoint) {\n""") + + for diacritic in sorted(diacritics): + out.write("\ + case " + str(hex(diacritic)) + ": return true;\n") + + out.write("\ + default: return false;\n }\n}") + + out.write(closeNamespaces()) + +if __name__ == "__main__": + generate(sys.argv[1], sys.argv[2]) diff --git a/src/mongo/db/fts/unicode/gen_diacritic_map.py b/src/mongo/db/fts/unicode/gen_diacritic_map.py new file mode 100644 index 00000000000..d002a1acbac --- /dev/null +++ b/src/mongo/db/fts/unicode/gen_diacritic_map.py @@ -0,0 +1,105 @@ + #!/usr/bin/python + # -*- coding: utf-8 -*- +import sys +from unicodedata import normalize, category, unidata_version + +from gen_helper import getCopyrightNotice, openNamespaces, closeNamespaces, \ + include + +diacritics = set() + +def load_diacritics(unicode_proplist_file): + proplist_file = open(unicode_proplist_file, 'r') + + for line in proplist_file: + # Filter out blank lines and lines that start with # + data = line[:line.find('#')] + if(data == ""): + continue + + # Parse the data on the line + values = data.split("; ") + assert(len(values) == 2) + + uproperty = values[1].strip() + if uproperty == "Diacritic": + if len(values[0].split('..')) == 2: + codepoint_range = values[0].split('..') + + start = int(codepoint_range[0], 16) + end = int(codepoint_range[1], 16) + 1 + + for i in range(start, end): + if i not in diacritics: + diacritics.add(i) + else: + if int(values[0], 16) not in diacritics: + diacritics.add(int(values[0], 16)) + +diacritic_mappings = {} + +def add_diacritic_mapping(codepoint): + # a : original unicode character + # d : decomposed unicode character + # r : decomposed unicode character with diacritics removed + # c : recomposed unicode character with diacritics removed + a = chr(codepoint) + d = normalize('NFD', a) + r = u'' + + for i in range(len(d)): + if ord(d[i]) not in diacritics: + r += d[i] + + c = normalize('NFC', r) + + # Only use mappings where the final recomposed form is a single codepoint + if (a != c and len(c) == 1): + diacritic_mappings[codepoint] = ord(c[0]) + +def add_diacritic_range(start, end): + for x in range(start, end + 1): + add_diacritic_mapping(x) + +def generate(target): + """Generates a C++ source file that contains a diacritic removal mapping + function. + + The delimiter checking function contains a switch statement with cases for + every character in Unicode that has a removable combining diacritical mark. + """ + out = open(target, "w") + + out.write(getCopyrightNotice()) + out.write(include("mongo/db/fts/unicode/codepoints.h")) + out.write("\n") + out.write(openNamespaces()) + + # Map diacritics from 0 to the maximum Unicode codepoint + add_diacritic_range(0x0000, 0x10FFFF) + + out.write("""char32_t codepointRemoveDiacritics(char32_t codepoint) { + switch (codepoint) {\n""") + + mappings_list = [] + + for mapping in diacritic_mappings: + mappings_list.append((mapping, diacritic_mappings[mapping])) + + sorted_mappings = sorted(mappings_list, key=lambda mapping: mapping[0]) + + for mapping in sorted_mappings: + out.write(" case " + str(hex(mapping[0])) + ": return " + \ + str(hex(mapping[1])) +";\n") + + out.write(" default: return codepoint;\n }\n}") + + out.write(closeNamespaces()) + +if __name__ == "__main__": + if(unidata_version != '8.0.0'): + print("""ERROR: This script must be run with a version of Python that \ + contains the Unicode 8.0.0 Character Database.""") + sys.exit(1) + load_diacritics(sys.argv[1]) + generate(sys.argv[2]) diff --git a/src/mongo/db/fts/unicode/gen_helper.py b/src/mongo/db/fts/unicode/gen_helper.py new file mode 100644 index 00000000000..d3698e7894e --- /dev/null +++ b/src/mongo/db/fts/unicode/gen_helper.py @@ -0,0 +1,39 @@ +def getCopyrightNotice(): + return """/** + * Copyright (C) 2015 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + * + * THIS IS A GENERATED FILE, DO NOT MODIFY. + */\n\n""" + +def openNamespaces(): + return "namespace mongo {\nnamespace unicode {\n\n" + +def closeNamespaces(): + return "\n} // namespace unicode\n} // namespace mongo\n" + +def include(header): + return '#include "' + header + '"\n' diff --git a/src/mongo/db/fts/unicode/string.cpp b/src/mongo/db/fts/unicode/string.cpp new file mode 100644 index 00000000000..24c6ff8027e --- /dev/null +++ b/src/mongo/db/fts/unicode/string.cpp @@ -0,0 +1,157 @@ +/** + * Copyright (C) 2015 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/fts/unicode/string.h" + +#include <algorithm> + +#include "mongo/shell/linenoise_utf8.h" +#include "mongo/util/assert_util.h" + +namespace mongo { +namespace unicode { + +using linenoise_utf8::copyString32to8; +using linenoise_utf8::copyString8to32; + +using std::u32string; + +String::String(const StringData utf8_src) { + // _data is the target, resize it so that it's guaranteed to fit all of the input characters, + // plus a null character if there isn't one. + _data.resize(utf8_src.size() + 1); + + int result = 0; + size_t resultSize = 0; + + // Although utf8_src.rawData() is not guaranteed to be null-terminated, copyString8to32 won't + // access bad memory because it is limited by the size of its output buffer, which is set to the + // size of utf8_src. + copyString8to32(&_data[0], + reinterpret_cast<const unsigned char*>(&utf8_src.rawData()[0]), + _data.size(), + resultSize, + result); + + uassert(28755, "text contains invalid UTF-8", result == 0); + + // Resize _data so it is only as big as what it contains. + _data.resize(resultSize); +} + +String::String(u32string&& src) : _data(std::move(src)) {} + +std::string String::toString() const { + // output is the target, resize it so that it's guaranteed to fit all of the input characters, + // plus a null character if there isn't one. + std::string output(_data.size() * 4 + 1, '\0'); + size_t resultSize = + copyString32to8(reinterpret_cast<unsigned char*>(&output[0]), &_data[0], output.size()); + + // Resize output so it is only as large as what it contains. + output.resize(resultSize); + return output; +} + +size_t String::size() const { + return _data.size(); +} + +const char32_t& String::operator[](int i) const { + return _data[i]; +} + +String String::substr(size_t pos, size_t len) const { + return String(_data.substr(pos, len)); +} + +String String::toLower(CaseFoldMode mode) const { + u32string newdata(_data.size(), 0); + auto index = 0; + for (auto codepoint : _data) { + newdata[index++] = codepointToLower(codepoint, mode); + } + + return String(std::move(newdata)); +} + +String String::removeDiacritics() const { + u32string newdata(_data.size(), 0); + auto index = 0; + for (auto codepoint : _data) { + if (!codepointIsDiacritic(codepoint)) { + newdata[index++] = codepointRemoveDiacritics(codepoint); + } + } + + newdata.resize(index); + return String(std::move(newdata)); +} + +bool String::substrMatch(const String& str, + const String& find, + SubstrMatchOptions options, + CaseFoldMode cfMode) { + // In Turkish, lowercasing needs to be applied first because the letter İ has a different case + // folding mapping than the letter I, but removing diacritics removes the dot from İ. + if (cfMode == CaseFoldMode::kTurkish) { + String cleanStr = str.toLower(cfMode); + String cleanFind = find.toLower(cfMode); + return substrMatch(cleanStr, cleanFind, options | kCaseSensitive, CaseFoldMode::kNormal); + } + + if (options & kDiacriticSensitive) { + if (options & kCaseSensitive) { + // Case sensitive and diacritic sensitive. + return std::search(str._data.cbegin(), + str._data.cend(), + find._data.cbegin(), + find._data.cend(), + [&](char32_t c1, char32_t c2) { return (c1 == c2); }) != + str._data.cend(); + } + + // Case insensitive and diacritic sensitive. + return std::search(str._data.cbegin(), + str._data.cend(), + find._data.cbegin(), + find._data.cend(), + [&](char32_t c1, char32_t c2) { + return (codepointToLower(c1, cfMode) == + codepointToLower(c2, cfMode)); + }) != str._data.cend(); + } + + String cleanStr = str.removeDiacritics(); + String cleanFind = find.removeDiacritics(); + + return substrMatch(cleanStr, cleanFind, options | kDiacriticSensitive, cfMode); +} + +} // namespace unicode +} // namespace mongo diff --git a/src/mongo/db/fts/unicode/string.h b/src/mongo/db/fts/unicode/string.h new file mode 100644 index 00000000000..1fa77af2f3f --- /dev/null +++ b/src/mongo/db/fts/unicode/string.h @@ -0,0 +1,139 @@ +/** + * Copyright (C) 2015 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include <cstdint> +#include <string> + +#include "mongo/base/string_data.h" +#include "mongo/db/fts/unicode/codepoints.h" + +namespace mongo { +namespace unicode { + +/** + * A string class that support basic Unicode functionality such as removing diacritic marks, and + * lowercasing. The String is constructed with UTF-8 source data, and is converted under the hood to + * a u32string (UTF-32) so operations can be easily done with individual Unicode code points. + */ +class String { +public: + String() = default; + +#if defined(_MSC_VER) && _MSC_VER < 1900 + String(String&& other) : _data(std::move(other._data)) {} + + String& operator=(String&& other) { + _data = std::move(other._data); + return *this; + } +#endif + + /** + * Construct a String with UTF-8 source data (supports standard C++ string literals, and + * std::strings). + */ + explicit String(StringData utf8_src); + + /** + * Return a lowercased version of the String instance using the Unicode data in u_data.h. + */ + String toLower(CaseFoldMode mode = CaseFoldMode::kNormal) const; + + /** + * Returns a version of the String instance with diacritics and combining marks removed. + */ + String removeDiacritics() const; + + /** + * Returns a substring of the String instance, using the same semantics as std::string::substr. + */ + String substr(size_t begin, size_t end) const; + + /** + * Returns a UTF-8 encoded std::string version of the String instance. + */ + std::string toString() const; + + /** + * Returns the number Unicode codepoints in the String. + */ + size_t size() const; + + /** + * Returns the Unicode codepoint at index i of the String. + */ + const char32_t& operator[](int i) const; + + /** + * Options for the substrMatch method. + */ + using SubstrMatchOptions = uint8_t; + + /** + * No options (case insensitive and diacritic insensitive). + */ + static const SubstrMatchOptions kNone = 0; + + /** + * Perform case sensitive substring match. + */ + static const SubstrMatchOptions kCaseSensitive = 1 << 0; + + /** + * Perform diacritic sensitive substring match. + */ + static const SubstrMatchOptions kDiacriticSensitive = 1 << 1; + + /** + * Search the string 'str' for the string 'find'. If 'find' exists in 'str', return true, else + * return false. Optionally searches can be made case sensitive and diacritic insensitive. If + * the search is case insensitive, non-Turkish case folding is used unless the + * CaseFoldMode::Turkish is passed to mode. + */ + static bool substrMatch(const String& str, + const String& find, + SubstrMatchOptions options, + CaseFoldMode mode = CaseFoldMode::kNormal); + +private: + /** + * Private constructor used by substr, toLower, and removeDiacritics to build a String from + * UTF-32 data. + */ + String(std::u32string&& src); + + /** + * The underlying UTF-32 data. + */ + std::u32string _data; +}; + +} // namespace unicode +} // namespace mongo diff --git a/src/mongo/db/fts/unicode/string_test.cpp b/src/mongo/db/fts/unicode/string_test.cpp new file mode 100644 index 00000000000..9354f7bf25c --- /dev/null +++ b/src/mongo/db/fts/unicode/string_test.cpp @@ -0,0 +1,187 @@ +/** + * Copyright (C) 2015 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/db/fts/unicode/string.h" +#include "mongo/shell/linenoise_utf8.h" +#include "mongo/unittest/unittest.h" +#include "mongo/util/text.h" + +#ifdef MSC_VER +// Microsoft VS 2013 does not handle UTF-8 strings in char literal strings, error C4566 +// The Microsoft compiler can be tricked into using UTF-8 strings as follows: +// 1. The file has a UTF-8 BOM +// 2. The string literal is a wide character string literal (ie, prefixed with L) +// at this point. +#define UTF8(x) toUtf8String(L##x) +#else +#define UTF8(x) x +#endif + +namespace mongo { +namespace unicode { + +using linenoise_utf8::copyString32to8; + +TEST(UnicodeString, RemoveDiacritics) { + // NFC Normalized Text. + String test1 = String(UTF8("¿CUÁNTOS AÑOS TIENES TÚ?")); + + // NFD Normalized Text ("Café"). + const char test2[] = {'C', 'a', 'f', 'e', static_cast<char>(0xcc), static_cast<char>(0x81), 0}; + + ASSERT_EQUALS(UTF8("¿CUANTOS ANOS TIENES TU?"), test1.removeDiacritics().toString()); + ASSERT_EQUALS(UTF8("Cafe"), String(test2).removeDiacritics().toString()); +} + +TEST(UnicodeString, CaseFolding) { + String test1 = String(UTF8("СКОЛЬКО ТЕБЕ ЛЕТ?")); + String test2 = String(UTF8("¿CUÁNTOS AÑOS TIENES TÚ?")); + + ASSERT_EQUALS(UTF8("сколько тебе лет?"), test1.toLower().toString()); + ASSERT_EQUALS(UTF8("¿cuántos años tienes tú?"), test2.toLower().toString()); +} + +TEST(UnicodeString, CaseFoldingTurkish) { + String test1 = String(UTF8("KAC YASINDASINIZ")); + String test2 = String(UTF8("KAC YASİNDASİNİZ")); + + ASSERT_EQUALS(UTF8("kac yasındasınız"), test1.toLower(CaseFoldMode::kTurkish).toString()); + ASSERT_EQUALS(UTF8("kac yasindasiniz"), test2.toLower(CaseFoldMode::kTurkish).toString()); +} + +TEST(UnicodeString, CaseFoldingAndRemoveDiacritics) { + // NFC Normalized Text. + String test1 = String(UTF8("Πόσο χρονών είσαι?")); + String test2 = String(UTF8("¿CUÁNTOS AÑOS TIENES TÚ?")); + + // NFD Normalized Text ("CAFÉ"). + const char test3[] = {'C', 'A', 'F', 'E', static_cast<char>(0xcc), static_cast<char>(0x81), 0}; + + ASSERT_EQUALS(UTF8("ποσο χρονων εισαι?"), test1.toLower().removeDiacritics().toString()); + ASSERT_EQUALS(UTF8("¿cuantos anos tienes tu?"), test2.toLower().removeDiacritics().toString()); + ASSERT_EQUALS(UTF8("cafe"), String(test3).toLower().removeDiacritics().toString()); +} + +TEST(UnicodeString, SubstringMatch) { + String str = String(UTF8("Одумайся! Престол свой сохрани; И ярость укроти.")); + + // Case insensitive & diacritic insensitive. + ASSERT(String::substrMatch(str, String(UTF8("ПРЁСТОЛ СВОИ")), String::kNone)); + ASSERT_FALSE(String::substrMatch(str, String(UTF8("Престол сохрани")), String::kNone)); + + // Case sensitive & diacritic insensitive. + ASSERT(String::substrMatch(str, String(UTF8("Одумаися!")), String::kCaseSensitive)); + ASSERT_FALSE(String::substrMatch(str, String(UTF8("одумайся!")), String::kCaseSensitive)); + + // Case insensitive & diacritic sensitive. + ASSERT(String::substrMatch(str, String(UTF8("одумайся!")), String::kDiacriticSensitive)); + ASSERT_FALSE(String::substrMatch(str, String(UTF8("Одумаися!")), String::kDiacriticSensitive)); + + // Case sensitive & diacritic sensitive. + ASSERT(String::substrMatch( + str, String(UTF8("Одумайся!")), String::kDiacriticSensitive | String::kCaseSensitive)); + ASSERT_FALSE(String::substrMatch( + str, String(UTF8("Одумаися!")), String::kDiacriticSensitive | String::kCaseSensitive)); +} + +TEST(UnicodeString, SubstringMatchTurkish) { + String str = String(UTF8("KAÇ YAŞINDASINIZ?")); + + // Case insensitive & diacritic insensitive. + ASSERT(String::substrMatch( + str, String(UTF8("yasındasınız")), String::kNone, CaseFoldMode::kTurkish)); + ASSERT_FALSE(String::substrMatch( + str, String(UTF8("yasindasiniz")), String::kNone, CaseFoldMode::kTurkish)); + + // Case insensitive & diacritic sensitive. + ASSERT(String::substrMatch( + str, String(UTF8("yaşındasınız")), String::kDiacriticSensitive, CaseFoldMode::kTurkish)); + ASSERT_FALSE(String::substrMatch( + str, String(UTF8("yaşindasiniz")), String::kDiacriticSensitive, CaseFoldMode::kTurkish)); +} + +TEST(UnicodeString, BadUTF8) { + // Overlong. + const char invalid1[] = {static_cast<char>(0xC0), static_cast<char>(0xAF), 0}; + + // Invalid code positions. + const char invalid2[] = { + static_cast<char>(0xED), static_cast<char>(0xA0), static_cast<char>(0x80), 0}; + const char invalid3[] = { + static_cast<char>(0xC2), static_cast<char>(0x41), static_cast<char>(0x42), 0}; + const char invalid4[] = {static_cast<char>(0x61), + static_cast<char>(0xF1), + static_cast<char>(0x80), + static_cast<char>(0x80), + static_cast<char>(0xE1), + static_cast<char>(0x80), + static_cast<char>(0xC2), + static_cast<char>(0x62), + static_cast<char>(0x80), + static_cast<char>(0x63), + static_cast<char>(0x80), + static_cast<char>(0xBF), + static_cast<char>(0x64), + 0}; + + ASSERT_THROWS(String test1(invalid1), AssertionException); + ASSERT_THROWS(String test2(invalid2), AssertionException); + ASSERT_THROWS(String test3(invalid3), AssertionException); + ASSERT_THROWS(String test4(invalid4), AssertionException); +} + +TEST(UnicodeString, UTF32ToUTF8) { + std::u32string original; + original.push_back(0x004D); + original.push_back(0x0430); + original.push_back(0x4E8C); + original.push_back(0x10302); + original.push_back(0); + + std::string expected_result; + expected_result.push_back(0x4D); + expected_result.push_back(0xD0); + expected_result.push_back(0xB0); + expected_result.push_back(0xE4); + expected_result.push_back(0xBA); + expected_result.push_back(0x8C); + expected_result.push_back(0xF0); + expected_result.push_back(0x90); + expected_result.push_back(0x8C); + expected_result.push_back(0x82); + expected_result.push_back(0); + + std::string result(11, '\0'); + + copyString32to8(reinterpret_cast<unsigned char*>(&result[0]), &original[0], 11); + + ASSERT_EQUALS(expected_result, result); +} + +} // namespace unicode +} // namespace mongo diff --git a/src/mongo/shell/linenoise_utf8.h b/src/mongo/shell/linenoise_utf8.h index 4bd4c2bdc7e..aab3e6f73f2 100644 --- a/src/mongo/shell/linenoise_utf8.h +++ b/src/mongo/shell/linenoise_utf8.h @@ -31,10 +31,14 @@ #include <memory> #include <string.h> +#if defined(_MSC_VER) && _MSC_VER < 1900 +#include <string> +#endif + namespace linenoise_utf8 { typedef unsigned char UChar8; // UTF-8 octet -typedef unsigned int UChar32; // Unicode code point +typedef char32_t UChar32; // Unicode code point // Error bits (or-ed together) returned from utf8toUChar32string // diff --git a/src/third_party/unicode-8.0.0/CaseFolding.txt b/src/third_party/unicode-8.0.0/CaseFolding.txt new file mode 100644 index 00000000000..0197a6c40fb --- /dev/null +++ b/src/third_party/unicode-8.0.0/CaseFolding.txt @@ -0,0 +1,1414 @@ +# CaseFolding-8.0.0.txt +# Date: 2015-01-13, 18:16:36 GMT [MD] +# +# Unicode Character Database +# Copyright (c) 1991-2015 Unicode, Inc. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# For documentation, see http://www.unicode.org/reports/tr44/ +# +# Case Folding Properties +# +# This file is a supplement to the UnicodeData file. +# It provides a case folding mapping generated from the Unicode Character Database. +# If all characters are mapped according to the full mapping below, then +# case differences (according to UnicodeData.txt and SpecialCasing.txt) +# are eliminated. +# +# The data supports both implementations that require simple case foldings +# (where string lengths don't change), and implementations that allow full case folding +# (where string lengths may grow). Note that where they can be supported, the +# full case foldings are superior: for example, they allow "MASSE" and "Maße" to match. +# +# All code points not listed in this file map to themselves. +# +# NOTE: case folding does not preserve normalization formats! +# +# For information on case folding, including how to have case folding +# preserve normalization formats, see Section 3.13 Default Case Algorithms in +# The Unicode Standard. +# +# ================================================================================ +# Format +# ================================================================================ +# The entries in this file are in the following machine-readable format: +# +# <code>; <status>; <mapping>; # <name> +# +# The status field is: +# C: common case folding, common mappings shared by both simple and full mappings. +# F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces. +# S: simple case folding, mappings to single characters where different from F. +# T: special case for uppercase I and dotted uppercase I +# - For non-Turkic languages, this mapping is normally not used. +# - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters. +# Note that the Turkic mappings do not maintain canonical equivalence without additional processing. +# See the discussions of case mapping in the Unicode Standard for more information. +# +# Usage: +# A. To do a simple case folding, use the mappings with status C + S. +# B. To do a full case folding, use the mappings with status C + F. +# +# The mappings with status T can be used or omitted depending on the desired case-folding +# behavior. (The default option is to exclude them.) +# +# ================================================================= + +# Property: Case_Folding + +# All code points not explicitly listed for Case_Folding +# have the value C for the status field, and the code point itself for the mapping fielddiff --git a/src/third_party/unicode-8.0.0/PropList.txt b/src/third_party/unicode-8.0.0/PropList.txt new file mode 100644 index 00000000000..2eb2926e072 --- /dev/null +++ b/src/third_party/unicode-8.0.0/PropList.txt @@ -0,0 +1,1525 @@ +# PropList-8.0.0.txt +# Date: 2015-05-16, 17:50:38 GMT [MD] +# +# Unicode Character Database +# Copyright (c) 1991-2015 Unicode, Inc. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# For documentation, see http://www.unicode.org/reports/tr44/ + +# ================================================ + +0009..000D ; White_Space # Cc [5] <control-0009>..<control-000D> +0020 ; White_Space # Zs SPACE +0085 ; White_Space # Cc <control-0085> +00A0 ; White_Space # Zs NO-BREAK SPACE +1680 ; White_Space # Zs OGHAM SPACE MARK +2000..200A ; White_Space # Zs [11] EN QUAD..HAIR SPACE +2028 ; White_Space # Zl LINE SEPARATOR +2029 ; White_Space # Zp PARAGRAPH SEPARATOR +202F ; White_Space # Zs NARROW NO-BREAK SPACE +205F ; White_Space # Zs MEDIUM MATHEMATICAL SPACE +3000 ; White_Space # Zs IDEOGRAPHIC SPACE + +# Total code points: 25 + +# ================================================ + +061C ; Bidi_Control # Cf ARABIC LETTER MARK +200E..200F ; Bidi_Control # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK +202A..202E ; Bidi_Control # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE +2066..2069 ; Bidi_Control # Cf [4] LEFT-TO-RIGHT ISOLATE..POP DIRECTIONAL ISOLATE + +# Total code points: 12 + +# ================================================ + +200C..200D ; Join_Control # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER + +# Total code points: 2 + +# ================================================ + +002D ; Dash # Pd HYPHEN-MINUS +058A ; Dash # Pd ARMENIAN HYPHEN +05BE ; Dash # Pd HEBREW PUNCTUATION MAQAF +1400 ; Dash # Pd CANADIAN SYLLABICS HYPHEN +1806 ; Dash # Pd MONGOLIAN TODO SOFT HYPHEN +2010..2015 ; Dash # Pd [6] HYPHEN..HORIZONTAL BAR +2053 ; Dash # Po SWUNG DASH +207B ; Dash # Sm SUPERSCRIPT MINUS +208B ; Dash # Sm SUBSCRIPT MINUS +2212 ; Dash # Sm MINUS SIGN +2E17 ; Dash # Pd DOUBLE OBLIQUE HYPHEN +2E1A ; Dash # Pd HYPHEN WITH DIAERESIS +2E3A..2E3B ; Dash # Pd [2] TWO-EM DASH..THREE-EM DASH +2E40 ; Dash # Pd DOUBLE HYPHEN +301C ; Dash # Pd WAVE DASH +3030 ; Dash # Pd WAVY DASH +30A0 ; Dash # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN +FE31..FE32 ; Dash # Pd [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH +FE58 ; Dash # Pd SMALL EM DASH +FE63 ; Dash # Pd SMALL HYPHEN-MINUS +FF0D ; Dash # Pd FULLWIDTH HYPHEN-MINUS + +# Total code points: 28 + +# ================================================ + +002D ; Hyphen # Pd HYPHEN-MINUS +00AD ; Hyphen # Cf SOFT HYPHEN +058A ; Hyphen # Pd ARMENIAN HYPHEN +1806 ; Hyphen # Pd MONGOLIAN TODO SOFT HYPHEN +2010..2011 ; Hyphen # Pd [2] HYPHEN..NON-BREAKING HYPHEN +2E17 ; Hyphen # Pd DOUBLE OBLIQUE HYPHEN +30FB ; Hyphen # Po KATAKANA MIDDLE DOT +FE63 ; Hyphen # Pd SMALL HYPHEN-MINUS +FF0D ; Hyphen # Pd FULLWIDTH HYPHEN-MINUS +FF65 ; Hyphen # Po HALFWIDTH KATAKANA MIDDLE DOT + +# Total code points: 11 + +# ================================================ + +0022 ; Quotation_Mark # Po QUOTATION MARK +0027 ; Quotation_Mark # Po APOSTROPHE +00AB ; Quotation_Mark # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +00BB ; Quotation_Mark # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +2018 ; Quotation_Mark # Pi LEFT SINGLE QUOTATION MARK +2019 ; Quotation_Mark # Pf RIGHT SINGLE QUOTATION MARK +201A ; Quotation_Mark # Ps SINGLE LOW-9 QUOTATION MARK +201B..201C ; Quotation_Mark # Pi [2] SINGLE HIGH-REVERSED-9 QUOTATION MARK..LEFT DOUBLE QUOTATION MARK +201D ; Quotation_Mark # Pf RIGHT DOUBLE QUOTATION MARK +201E ; Quotation_Mark # Ps DOUBLE LOW-9 QUOTATION MARK +201F ; Quotation_Mark # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK +2039 ; Quotation_Mark # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK +203A ; Quotation_Mark # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +2E42 ; Quotation_Mark # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK +300C ; Quotation_Mark # Ps LEFT CORNER BRACKET +300D ; Quotation_Mark # Pe RIGHT CORNER BRACKET +300E ; Quotation_Mark # Ps LEFT WHITE CORNER BRACKET +300F ; Quotation_Mark # Pe RIGHT WHITE CORNER BRACKET +301D ; Quotation_Mark # Ps REVERSED DOUBLE PRIME QUOTATION MARK +301E..301F ; Quotation_Mark # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK +FE41 ; Quotation_Mark # Ps PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET +FE42 ; Quotation_Mark # Pe PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET +FE43 ; Quotation_Mark # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET +FE44 ; Quotation_Mark # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET +FF02 ; Quotation_Mark # Po FULLWIDTH QUOTATION MARK +FF07 ; Quotation_Mark # Po FULLWIDTH APOSTROPHE +FF62 ; Quotation_Mark # Ps HALFWIDTH LEFT CORNER BRACKET +FF63 ; Quotation_Mark # Pe HALFWIDTH RIGHT CORNER BRACKET + +# Total code points: 30 + +# ================================================ + +0021 ; Terminal_Punctuation # Po EXCLAMATION MARK +002C ; Terminal_Punctuation # Po COMMA +002E ; Terminal_Punctuation # Po FULL STOP +003A..003B ; Terminal_Punctuation # Po [2] COLON..SEMICOLON +003F ; Terminal_Punctuation # Po QUESTION MARK +037E ; Terminal_Punctuation # Po GREEK QUESTION MARK +0387 ; Terminal_Punctuation # Po GREEK ANO TELEIA +0589 ; Terminal_Punctuation # Po ARMENIAN FULL STOP +05C3 ; Terminal_Punctuation # Po HEBREW PUNCTUATION SOF PASUQ +060C ; Terminal_Punctuation # Po ARABIC COMMA +061B ; Terminal_Punctuation # Po ARABIC SEMICOLON +061F ; Terminal_Punctuation # Po ARABIC QUESTION MARK +06D4 ; Terminal_Punctuation # Po ARABIC FULL STOP +0700..070A ; Terminal_Punctuation # Po [11] SYRIAC END OF PARAGRAPH..SYRIAC CONTRACTION +070C ; Terminal_Punctuation # Po SYRIAC HARKLEAN METOBELUS +07F8..07F9 ; Terminal_Punctuation # Po [2] NKO COMMA..NKO EXCLAMATION MARK +0830..083E ; Terminal_Punctuation # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU +085E ; Terminal_Punctuation # Po MANDAIC PUNCTUATION +0964..0965 ; Terminal_Punctuation # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA +0E5A..0E5B ; Terminal_Punctuation # Po [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT +0F08 ; Terminal_Punctuation # Po TIBETAN MARK SBRUL SHAD +0F0D..0F12 ; Terminal_Punctuation # Po [6] TIBETAN MARK SHAD..TIBETAN MARK RGYA GRAM SHAD +104A..104B ; Terminal_Punctuation # Po [2] MYANMAR SIGN LITTLE SECTION..MYANMAR SIGN SECTION +1361..1368 ; Terminal_Punctuation # Po [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR +166D..166E ; Terminal_Punctuation # Po [2] CANADIAN SYLLABICS CHI SIGN..CANADIAN SYLLABICS FULL STOP +16EB..16ED ; Terminal_Punctuation # Po [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION +1735..1736 ; Terminal_Punctuation # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION +17D4..17D6 ; Terminal_Punctuation # Po [3] KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH +17DA ; Terminal_Punctuation # Po KHMER SIGN KOOMUUT +1802..1805 ; Terminal_Punctuation # Po [4] MONGOLIAN COMMA..MONGOLIAN FOUR DOTS +1808..1809 ; Terminal_Punctuation # Po [2] MONGOLIAN MANCHU COMMA..MONGOLIAN MANCHU FULL STOP +1944..1945 ; Terminal_Punctuation # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK +1AA8..1AAB ; Terminal_Punctuation # Po [4] TAI THAM SIGN KAAN..TAI THAM SIGN SATKAANKUU +1B5A..1B5B ; Terminal_Punctuation # Po [2] BALINESE PANTI..BALINESE PAMADA +1B5D..1B5F ; Terminal_Punctuation # Po [3] BALINESE CARIK PAMUNGKAH..BALINESE CARIK PAREREN +1C3B..1C3F ; Terminal_Punctuation # Po [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK +1C7E..1C7F ; Terminal_Punctuation # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD +203C..203D ; Terminal_Punctuation # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG +2047..2049 ; Terminal_Punctuation # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK +2E2E ; Terminal_Punctuation # Po REVERSED QUESTION MARK +2E3C ; Terminal_Punctuation # Po STENOGRAPHIC FULL STOP +2E41 ; Terminal_Punctuation # Po REVERSED COMMA +3001..3002 ; Terminal_Punctuation # Po [2] IDEOGRAPHIC COMMA..IDEOGRAPHIC FULL STOP +A4FE..A4FF ; Terminal_Punctuation # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP +A60D..A60F ; Terminal_Punctuation # Po [3] VAI COMMA..VAI QUESTION MARK +A6F3..A6F7 ; Terminal_Punctuation # Po [5] BAMUM FULL STOP..BAMUM QUESTION MARK +A876..A877 ; Terminal_Punctuation # Po [2] PHAGS-PA MARK SHAD..PHAGS-PA MARK DOUBLE SHAD +A8CE..A8CF ; Terminal_Punctuation # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA +A92F ; Terminal_Punctuation # Po KAYAH LI SIGN SHYA +A9C7..A9C9 ; Terminal_Punctuation # Po [3] JAVANESE PADA PANGKAT..JAVANESE PADA LUNGSI +AA5D..AA5F ; Terminal_Punctuation # Po [3] CHAM PUNCTUATION DANDA..CHAM PUNCTUATION TRIPLE DANDA +AADF ; Terminal_Punctuation # Po TAI VIET SYMBOL KOI KOI +AAF0..AAF1 ; Terminal_Punctuation # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM +ABEB ; Terminal_Punctuation # Po MEETEI MAYEK CHEIKHEI +FE50..FE52 ; Terminal_Punctuation # Po [3] SMALL COMMA..SMALL FULL STOP +FE54..FE57 ; Terminal_Punctuation # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK +FF01 ; Terminal_Punctuation # Po FULLWIDTH EXCLAMATION MARK +FF0C ; Terminal_Punctuation # Po FULLWIDTH COMMA +FF0E ; Terminal_Punctuation # Po FULLWIDTH FULL STOP +FF1A..FF1B ; Terminal_Punctuation # Po [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON +FF1F ; Terminal_Punctuation # Po FULLWIDTH QUESTION MARK +FF61 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC FULL STOP +FF64 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA +1039F ; Terminal_Punctuation # Po UGARITIC WORD DIVIDER +103D0 ; Terminal_Punctuation # Po OLD PERSIAN WORD DIVIDER +10857 ; Terminal_Punctuation # Po IMPERIAL ARAMAIC SECTION SIGN +1091F ; Terminal_Punctuation # Po PHOENICIAN WORD SEPARATOR +10A56..10A57 ; Terminal_Punctuation # Po [2] KHAROSHTHI PUNCTUATION DANDA..KHAROSHTHI PUNCTUATION DOUBLE DANDA +10AF0..10AF5 ; Terminal_Punctuation # Po [6] MANICHAEAN PUNCTUATION STAR..MANICHAEAN PUNCTUATION TWO DOTS +10B3A..10B3F ; Terminal_Punctuation # Po [6] TINY TWO DOTS OVER ONE DOT PUNCTUATION..LARGE ONE RING OVER TWO RINGS PUNCTUATION +10B99..10B9C ; Terminal_Punctuation # Po [4] PSALTER PAHLAVI SECTION MARK..PSALTER PAHLAVI FOUR DOTS WITH DOT +11047..1104D ; Terminal_Punctuation # Po [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS +110BE..110C1 ; Terminal_Punctuation # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +11141..11143 ; Terminal_Punctuation # Po [3] CHAKMA DANDA..CHAKMA QUESTION MARK +111C5..111C6 ; Terminal_Punctuation # Po [2] SHARADA DANDA..SHARADA DOUBLE DANDA +111CD ; Terminal_Punctuation # Po SHARADA SUTRA MARK +111DE..111DF ; Terminal_Punctuation # Po [2] SHARADA SECTION MARK-1..SHARADA SECTION MARK-2 +11238..1123C ; Terminal_Punctuation # Po [5] KHOJKI DANDA..KHOJKI DOUBLE SECTION MARK +112A9 ; Terminal_Punctuation # Po MULTANI SECTION MARK +115C2..115C5 ; Terminal_Punctuation # Po [4] SIDDHAM DANDA..SIDDHAM SEPARATOR BAR +115C9..115D7 ; Terminal_Punctuation # Po [15] SIDDHAM END OF TEXT MARK..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES +11641..11642 ; Terminal_Punctuation # Po [2] MODI DANDA..MODI DOUBLE DANDA +1173C..1173E ; Terminal_Punctuation # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI +12470..12474 ; Terminal_Punctuation # Po [5] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON +16A6E..16A6F ; Terminal_Punctuation # Po [2] MRO DANDA..MRO DOUBLE DANDA +16AF5 ; Terminal_Punctuation # Po BASSA VAH FULL STOP +16B37..16B39 ; Terminal_Punctuation # Po [3] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN CIM CHEEM +16B44 ; Terminal_Punctuation # Po PAHAWH HMONG SIGN XAUS +1BC9F ; Terminal_Punctuation # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP +1DA87..1DA8A ; Terminal_Punctuation # Po [4] SIGNWRITING COMMA..SIGNWRITING COLON + +# Total code points: 238 + +# ================================================ + +005E ; Other_Math # Sk CIRCUMFLEX ACCENT +03D0..03D2 ; Other_Math # L& [3] GREEK BETA SYMBOL..GREEK UPSILON WITH HOOK SYMBOL +03D5 ; Other_Math # L& GREEK PHI SYMBOL +03F0..03F1 ; Other_Math # L& [2] GREEK KAPPA SYMBOL..GREEK RHO SYMBOL +03F4..03F5 ; Other_Math # L& [2] GREEK CAPITAL THETA SYMBOL..GREEK LUNATE EPSILON SYMBOL +2016 ; Other_Math # Po DOUBLE VERTICAL LINE +2032..2034 ; Other_Math # Po [3] PRIME..TRIPLE PRIME +2040 ; Other_Math # Pc CHARACTER TIE +2061..2064 ; Other_Math # Cf [4] FUNCTION APPLICATION..INVISIBLE PLUS +207D ; Other_Math # Ps SUPERSCRIPT LEFT PARENTHESIS +207E ; Other_Math # Pe SUPERSCRIPT RIGHT PARENTHESIS +208D ; Other_Math # Ps SUBSCRIPT LEFT PARENTHESIS +208E ; Other_Math # Pe SUBSCRIPT RIGHT PARENTHESIS +20D0..20DC ; Other_Math # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE +20E1 ; Other_Math # Mn COMBINING LEFT RIGHT ARROW ABOVE +20E5..20E6 ; Other_Math # Mn [2] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING DOUBLE VERTICAL STROKE OVERLAY +20EB..20EF ; Other_Math # Mn [5] COMBINING LONG DOUBLE SOLIDUS OVERLAY..COMBINING RIGHT ARROW BELOW +2102 ; Other_Math # L& DOUBLE-STRUCK CAPITAL C +2107 ; Other_Math # L& EULER CONSTANT +210A..2113 ; Other_Math # L& [10] SCRIPT SMALL G..SCRIPT SMALL L +2115 ; Other_Math # L& DOUBLE-STRUCK CAPITAL N +2119..211D ; Other_Math # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +2124 ; Other_Math # L& DOUBLE-STRUCK CAPITAL Z +2128 ; Other_Math # L& BLACK-LETTER CAPITAL Z +2129 ; Other_Math # So TURNED GREEK SMALL LETTER IOTA +212C..212D ; Other_Math # L& [2] SCRIPT CAPITAL B..BLACK-LETTER CAPITAL C +212F..2131 ; Other_Math # L& [3] SCRIPT SMALL E..SCRIPT CAPITAL F +2133..2134 ; Other_Math # L& [2] SCRIPT CAPITAL M..SCRIPT SMALL O +2135..2138 ; Other_Math # Lo [4] ALEF SYMBOL..DALET SYMBOL +213C..213F ; Other_Math # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI +2145..2149 ; Other_Math # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J +2195..2199 ; Other_Math # So [5] UP DOWN ARROW..SOUTH WEST ARROW +219C..219F ; Other_Math # So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW +21A1..21A2 ; Other_Math # So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL +21A4..21A5 ; Other_Math # So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR +21A7 ; Other_Math # So DOWNWARDS ARROW FROM BAR +21A9..21AD ; Other_Math # So [5] LEFTWARDS ARROW WITH HOOK..LEFT RIGHT WAVE ARROW +21B0..21B1 ; Other_Math # So [2] UPWARDS ARROW WITH TIP LEFTWARDS..UPWARDS ARROW WITH TIP RIGHTWARDS +21B6..21B7 ; Other_Math # So [2] ANTICLOCKWISE TOP SEMICIRCLE ARROW..CLOCKWISE TOP SEMICIRCLE ARROW +21BC..21CD ; Other_Math # So [18] LEFTWARDS HARPOON WITH BARB UPWARDS..LEFTWARDS DOUBLE ARROW WITH STROKE +21D0..21D1 ; Other_Math # So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW +21D3 ; Other_Math # So DOWNWARDS DOUBLE ARROW +21D5..21DB ; Other_Math # So [7] UP DOWN DOUBLE ARROW..RIGHTWARDS TRIPLE ARROW +21DD ; Other_Math # So RIGHTWARDS SQUIGGLE ARROW +21E4..21E5 ; Other_Math # So [2] LEFTWARDS ARROW TO BAR..RIGHTWARDS ARROW TO BAR +2308 ; Other_Math # Ps LEFT CEILING +2309 ; Other_Math # Pe RIGHT CEILING +230A ; Other_Math # Ps LEFT FLOOR +230B ; Other_Math # Pe RIGHT FLOOR +23B4..23B5 ; Other_Math # So [2] TOP SQUARE BRACKET..BOTTOM SQUARE BRACKET +23B7 ; Other_Math # So RADICAL SYMBOL BOTTOM +23D0 ; Other_Math # So VERTICAL LINE EXTENSION +23E2 ; Other_Math # So WHITE TRAPEZIUM +25A0..25A1 ; Other_Math # So [2] BLACK SQUARE..WHITE SQUARE +25AE..25B6 ; Other_Math # So [9] BLACK VERTICAL RECTANGLE..BLACK RIGHT-POINTING TRIANGLE +25BC..25C0 ; Other_Math # So [5] BLACK DOWN-POINTING TRIANGLE..BLACK LEFT-POINTING TRIANGLE +25C6..25C7 ; Other_Math # So [2] BLACK DIAMOND..WHITE DIAMOND +25CA..25CB ; Other_Math # So [2] LOZENGE..WHITE CIRCLE +25CF..25D3 ; Other_Math # So [5] BLACK CIRCLE..CIRCLE WITH UPPER HALF BLACK +25E2 ; Other_Math # So BLACK LOWER RIGHT TRIANGLE +25E4 ; Other_Math # So BLACK UPPER LEFT TRIANGLE +25E7..25EC ; Other_Math # So [6] SQUARE WITH LEFT HALF BLACK..WHITE UP-POINTING TRIANGLE WITH DOT +2605..2606 ; Other_Math # So [2] BLACK STAR..WHITE STAR +2640 ; Other_Math # So FEMALE SIGN +2642 ; Other_Math # So MALE SIGN +2660..2663 ; Other_Math # So [4] BLACK SPADE SUIT..BLACK CLUB SUIT +266D..266E ; Other_Math # So [2] MUSIC FLAT SIGN..MUSIC NATURAL SIGN +27C5 ; Other_Math # Ps LEFT S-SHAPED BAG DELIMITER +27C6 ; Other_Math # Pe RIGHT S-SHAPED BAG DELIMITER +27E6 ; Other_Math # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET +27E7 ; Other_Math # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET +27E8 ; Other_Math # Ps MATHEMATICAL LEFT ANGLE BRACKET +27E9 ; Other_Math # Pe MATHEMATICAL RIGHT ANGLE BRACKET +27EA ; Other_Math # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET +27EB ; Other_Math # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET +27EC ; Other_Math # Ps MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET +27ED ; Other_Math # Pe MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET +27EE ; Other_Math # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS +27EF ; Other_Math # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS +2983 ; Other_Math # Ps LEFT WHITE CURLY BRACKET +2984 ; Other_Math # Pe RIGHT WHITE CURLY BRACKET +2985 ; Other_Math # Ps LEFT WHITE PARENTHESIS +2986 ; Other_Math # Pe RIGHT WHITE PARENTHESIS +2987 ; Other_Math # Ps Z NOTATION LEFT IMAGE BRACKET +2988 ; Other_Math # Pe Z NOTATION RIGHT IMAGE BRACKET +2989 ; Other_Math # Ps Z NOTATION LEFT BINDING BRACKET +298A ; Other_Math # Pe Z NOTATION RIGHT BINDING BRACKET +298B ; Other_Math # Ps LEFT SQUARE BRACKET WITH UNDERBAR +298C ; Other_Math # Pe RIGHT SQUARE BRACKET WITH UNDERBAR +298D ; Other_Math # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER +298E ; Other_Math # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +298F ; Other_Math # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +2990 ; Other_Math # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER +2991 ; Other_Math # Ps LEFT ANGLE BRACKET WITH DOT +2992 ; Other_Math # Pe RIGHT ANGLE BRACKET WITH DOT +2993 ; Other_Math # Ps LEFT ARC LESS-THAN BRACKET +2994 ; Other_Math # Pe RIGHT ARC GREATER-THAN BRACKET +2995 ; Other_Math # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET +2996 ; Other_Math # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET +2997 ; Other_Math # Ps LEFT BLACK TORTOISE SHELL BRACKET +2998 ; Other_Math # Pe RIGHT BLACK TORTOISE SHELL BRACKET +29D8 ; Other_Math # Ps LEFT WIGGLY FENCE +29D9 ; Other_Math # Pe RIGHT WIGGLY FENCE +29DA ; Other_Math # Ps LEFT DOUBLE WIGGLY FENCE +29DB ; Other_Math # Pe RIGHT DOUBLE WIGGLY FENCE +29FC ; Other_Math # Ps LEFT-POINTING CURVED ANGLE BRACKET +29FD ; Other_Math # Pe RIGHT-POINTING CURVED ANGLE BRACKET +FE61 ; Other_Math # Po SMALL ASTERISK +FE63 ; Other_Math # Pd SMALL HYPHEN-MINUS +FE68 ; Other_Math # Po SMALL REVERSE SOLIDUS +FF3C ; Other_Math # Po FULLWIDTH REVERSE SOLIDUS +FF3E ; Other_Math # Sk FULLWIDTH CIRCUMFLEX ACCENT +1D400..1D454 ; Other_Math # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G +1D456..1D49C ; Other_Math # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F ; Other_Math # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2 ; Other_Math # L& MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6 ; Other_Math # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC ; Other_Math # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B9 ; Other_Math # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D +1D4BB ; Other_Math # L& MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3 ; Other_Math # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D505 ; Other_Math # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A ; Other_Math # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514 ; Other_Math # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C ; Other_Math # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D51E..1D539 ; Other_Math # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E ; Other_Math # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544 ; Other_Math # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546 ; Other_Math # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550 ; Other_Math # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D552..1D6A5 ; Other_Math # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J +1D6A8..1D6C0 ; Other_Math # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6C2..1D6DA ; Other_Math # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DC..1D6FA ; Other_Math # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA +1D6FC..1D714 ; Other_Math # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D716..1D734 ; Other_Math # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D736..1D74E ; Other_Math # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D750..1D76E ; Other_Math # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D770..1D788 ; Other_Math # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D78A..1D7A8 ; Other_Math # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7AA..1D7C2 ; Other_Math # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C4..1D7CB ; Other_Math # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1D7CE..1D7FF ; Other_Math # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1EE00..1EE03 ; Other_Math # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; Other_Math # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; Other_Math # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; Other_Math # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; Other_Math # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; Other_Math # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; Other_Math # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; Other_Math # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; Other_Math # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; Other_Math # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; Other_Math # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; Other_Math # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; Other_Math # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; Other_Math # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; Other_Math # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; Other_Math # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; Other_Math # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; Other_Math # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; Other_Math # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; Other_Math # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; Other_Math # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; Other_Math # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; Other_Math # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN + +# Total code points: 1362 + +# ================================================ + +0030..0039 ; Hex_Digit # Nd [10] DIGIT ZERO..DIGIT NINE +0041..0046 ; Hex_Digit # L& [6] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER F +0061..0066 ; Hex_Digit # L& [6] LATIN SMALL LETTER A..LATIN SMALL LETTER F +FF10..FF19 ; Hex_Digit # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE +FF21..FF26 ; Hex_Digit # L& [6] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER F +FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER F + +# Total code points: 44 + +# ================================================ + +0030..0039 ; ASCII_Hex_Digit # Nd [10] DIGIT ZERO..DIGIT NINE +0041..0046 ; ASCII_Hex_Digit # L& [6] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER F +0061..0066 ; ASCII_Hex_Digit # L& [6] LATIN SMALL LETTER A..LATIN SMALL LETTER F + +# Total code points: 22 + +# ================================================ + +0345 ; Other_Alphabetic # Mn COMBINING GREEK YPOGEGRAMMENI +05B0..05BD ; Other_Alphabetic # Mn [14] HEBREW POINT SHEVA..HEBREW POINT METEG +05BF ; Other_Alphabetic # Mn HEBREW POINT RAFE +05C1..05C2 ; Other_Alphabetic # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C4..05C5 ; Other_Alphabetic # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT +05C7 ; Other_Alphabetic # Mn HEBREW POINT QAMATS QATAN +0610..061A ; Other_Alphabetic # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA +064B..0657 ; Other_Alphabetic # Mn [13] ARABIC FATHATAN..ARABIC INVERTED DAMMA +0659..065F ; Other_Alphabetic # Mn [7] ARABIC ZWARAKAY..ARABIC WAVY HAMZA BELOW +0670 ; Other_Alphabetic # Mn ARABIC LETTER SUPERSCRIPT ALEF +06D6..06DC ; Other_Alphabetic # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN +06E1..06E4 ; Other_Alphabetic # Mn [4] ARABIC SMALL HIGH DOTLESS HEAD OF KHAH..ARABIC SMALL HIGH MADDA +06E7..06E8 ; Other_Alphabetic # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON +06ED ; Other_Alphabetic # Mn ARABIC SMALL LOW MEEM +0711 ; Other_Alphabetic # Mn SYRIAC LETTER SUPERSCRIPT ALAPH +0730..073F ; Other_Alphabetic # Mn [16] SYRIAC PTHAHA ABOVE..SYRIAC RWAHA +07A6..07B0 ; Other_Alphabetic # Mn [11] THAANA ABAFILI..THAANA SUKUN +0816..0817 ; Other_Alphabetic # Mn [2] SAMARITAN MARK IN..SAMARITAN MARK IN-ALAF +081B..0823 ; Other_Alphabetic # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A +0825..0827 ; Other_Alphabetic # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U +0829..082C ; Other_Alphabetic # Mn [4] SAMARITAN VOWEL SIGN LONG I..SAMARITAN VOWEL SIGN SUKUN +08E3..08E9 ; Other_Alphabetic # Mn [7] ARABIC TURNED DAMMA BELOW..ARABIC CURLY KASRATAN +08F0..0902 ; Other_Alphabetic # Mn [19] ARABIC OPEN FATHATAN..DEVANAGARI SIGN ANUSVARA +0903 ; Other_Alphabetic # Mc DEVANAGARI SIGN VISARGA +093A ; Other_Alphabetic # Mn DEVANAGARI VOWEL SIGN OE +093B ; Other_Alphabetic # Mc DEVANAGARI VOWEL SIGN OOE +093E..0940 ; Other_Alphabetic # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II +0941..0948 ; Other_Alphabetic # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI +0949..094C ; Other_Alphabetic # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU +094E..094F ; Other_Alphabetic # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW +0955..0957 ; Other_Alphabetic # Mn [3] DEVANAGARI VOWEL SIGN CANDRA LONG E..DEVANAGARI VOWEL SIGN UUE +0962..0963 ; Other_Alphabetic # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL +0981 ; Other_Alphabetic # Mn BENGALI SIGN CANDRABINDU +0982..0983 ; Other_Alphabetic # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA +09BE..09C0 ; Other_Alphabetic # Mc [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II +09C1..09C4 ; Other_Alphabetic # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR +09C7..09C8 ; Other_Alphabetic # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI +09CB..09CC ; Other_Alphabetic # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU +09D7 ; Other_Alphabetic # Mc BENGALI AU LENGTH MARK +09E2..09E3 ; Other_Alphabetic # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL +0A01..0A02 ; Other_Alphabetic # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI +0A03 ; Other_Alphabetic # Mc GURMUKHI SIGN VISARGA +0A3E..0A40 ; Other_Alphabetic # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II +0A41..0A42 ; Other_Alphabetic # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU +0A47..0A48 ; Other_Alphabetic # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI +0A4B..0A4C ; Other_Alphabetic # Mn [2] GURMUKHI VOWEL SIGN OO..GURMUKHI VOWEL SIGN AU +0A51 ; Other_Alphabetic # Mn GURMUKHI SIGN UDAAT +0A70..0A71 ; Other_Alphabetic # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK +0A75 ; Other_Alphabetic # Mn GURMUKHI SIGN YAKASH +0A81..0A82 ; Other_Alphabetic # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA +0A83 ; Other_Alphabetic # Mc GUJARATI SIGN VISARGA +0ABE..0AC0 ; Other_Alphabetic # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II +0AC1..0AC5 ; Other_Alphabetic # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E +0AC7..0AC8 ; Other_Alphabetic # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI +0AC9 ; Other_Alphabetic # Mc GUJARATI VOWEL SIGN CANDRA O +0ACB..0ACC ; Other_Alphabetic # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU +0AE2..0AE3 ; Other_Alphabetic # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0B01 ; Other_Alphabetic # Mn ORIYA SIGN CANDRABINDU +0B02..0B03 ; Other_Alphabetic # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA +0B3E ; Other_Alphabetic # Mc ORIYA VOWEL SIGN AA +0B3F ; Other_Alphabetic # Mn ORIYA VOWEL SIGN I +0B40 ; Other_Alphabetic # Mc ORIYA VOWEL SIGN II +0B41..0B44 ; Other_Alphabetic # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR +0B47..0B48 ; Other_Alphabetic # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI +0B4B..0B4C ; Other_Alphabetic # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU +0B56 ; Other_Alphabetic # Mn ORIYA AI LENGTH MARK +0B57 ; Other_Alphabetic # Mc ORIYA AU LENGTH MARK +0B62..0B63 ; Other_Alphabetic # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL +0B82 ; Other_Alphabetic # Mn TAMIL SIGN ANUSVARA +0BBE..0BBF ; Other_Alphabetic # Mc [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I +0BC0 ; Other_Alphabetic # Mn TAMIL VOWEL SIGN II +0BC1..0BC2 ; Other_Alphabetic # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU +0BC6..0BC8 ; Other_Alphabetic # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI +0BCA..0BCC ; Other_Alphabetic # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU +0BD7 ; Other_Alphabetic # Mc TAMIL AU LENGTH MARK +0C00 ; Other_Alphabetic # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE +0C01..0C03 ; Other_Alphabetic # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA +0C3E..0C40 ; Other_Alphabetic # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II +0C41..0C44 ; Other_Alphabetic # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR +0C46..0C48 ; Other_Alphabetic # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI +0C4A..0C4C ; Other_Alphabetic # Mn [3] TELUGU VOWEL SIGN O..TELUGU VOWEL SIGN AU +0C55..0C56 ; Other_Alphabetic # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK +0C62..0C63 ; Other_Alphabetic # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL +0C81 ; Other_Alphabetic # Mn KANNADA SIGN CANDRABINDU +0C82..0C83 ; Other_Alphabetic # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA +0CBE ; Other_Alphabetic # Mc KANNADA VOWEL SIGN AA +0CBF ; Other_Alphabetic # Mn KANNADA VOWEL SIGN I +0CC0..0CC4 ; Other_Alphabetic # Mc [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR +0CC6 ; Other_Alphabetic # Mn KANNADA VOWEL SIGN E +0CC7..0CC8 ; Other_Alphabetic # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI +0CCA..0CCB ; Other_Alphabetic # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO +0CCC ; Other_Alphabetic # Mn KANNADA VOWEL SIGN AU +0CD5..0CD6 ; Other_Alphabetic # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0CE2..0CE3 ; Other_Alphabetic # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL +0D01 ; Other_Alphabetic # Mn MALAYALAM SIGN CANDRABINDU +0D02..0D03 ; Other_Alphabetic # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA +0D3E..0D40 ; Other_Alphabetic # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II +0D41..0D44 ; Other_Alphabetic # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR +0D46..0D48 ; Other_Alphabetic # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI +0D4A..0D4C ; Other_Alphabetic # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU +0D57 ; Other_Alphabetic # Mc MALAYALAM AU LENGTH MARK +0D62..0D63 ; Other_Alphabetic # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL +0D82..0D83 ; Other_Alphabetic # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA +0DCF..0DD1 ; Other_Alphabetic # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA +0DD2..0DD4 ; Other_Alphabetic # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA +0DD6 ; Other_Alphabetic # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA +0DD8..0DDF ; Other_Alphabetic # Mc [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA +0DF2..0DF3 ; Other_Alphabetic # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA +0E31 ; Other_Alphabetic # Mn THAI CHARACTER MAI HAN-AKAT +0E34..0E3A ; Other_Alphabetic # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU +0E4D ; Other_Alphabetic # Mn THAI CHARACTER NIKHAHIT +0EB1 ; Other_Alphabetic # Mn LAO VOWEL SIGN MAI KAN +0EB4..0EB9 ; Other_Alphabetic # Mn [6] LAO VOWEL SIGN I..LAO VOWEL SIGN UU +0EBB..0EBC ; Other_Alphabetic # Mn [2] LAO VOWEL SIGN MAI KON..LAO SEMIVOWEL SIGN LO +0ECD ; Other_Alphabetic # Mn LAO NIGGAHITA +0F71..0F7E ; Other_Alphabetic # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO +0F7F ; Other_Alphabetic # Mc TIBETAN SIGN RNAM BCAD +0F80..0F81 ; Other_Alphabetic # Mn [2] TIBETAN VOWEL SIGN REVERSED I..TIBETAN VOWEL SIGN REVERSED II +0F8D..0F97 ; Other_Alphabetic # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA +0F99..0FBC ; Other_Alphabetic # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA +102B..102C ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA +102D..1030 ; Other_Alphabetic # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU +1031 ; Other_Alphabetic # Mc MYANMAR VOWEL SIGN E +1032..1036 ; Other_Alphabetic # Mn [5] MYANMAR VOWEL SIGN AI..MYANMAR SIGN ANUSVARA +1038 ; Other_Alphabetic # Mc MYANMAR SIGN VISARGA +103B..103C ; Other_Alphabetic # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA +103D..103E ; Other_Alphabetic # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA +1056..1057 ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR +1058..1059 ; Other_Alphabetic # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL +105E..1060 ; Other_Alphabetic # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA +1062 ; Other_Alphabetic # Mc MYANMAR VOWEL SIGN SGAW KAREN EU +1067..1068 ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR VOWEL SIGN WESTERN PWO KAREN UE +1071..1074 ; Other_Alphabetic # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE +1082 ; Other_Alphabetic # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA +1083..1084 ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E +1085..1086 ; Other_Alphabetic # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y +109C ; Other_Alphabetic # Mc MYANMAR VOWEL SIGN AITON A +109D ; Other_Alphabetic # Mn MYANMAR VOWEL SIGN AITON AI +135F ; Other_Alphabetic # Mn ETHIOPIC COMBINING GEMINATION MARK +1712..1713 ; Other_Alphabetic # Mn [2] TAGALOG VOWEL SIGN I..TAGALOG VOWEL SIGN U +1732..1733 ; Other_Alphabetic # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U +1752..1753 ; Other_Alphabetic # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U +1772..1773 ; Other_Alphabetic # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +17B6 ; Other_Alphabetic # Mc KHMER VOWEL SIGN AA +17B7..17BD ; Other_Alphabetic # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA +17BE..17C5 ; Other_Alphabetic # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU +17C6 ; Other_Alphabetic # Mn KHMER SIGN NIKAHIT +17C7..17C8 ; Other_Alphabetic # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU +18A9 ; Other_Alphabetic # Mn MONGOLIAN LETTER ALI GALI DAGALGA +1920..1922 ; Other_Alphabetic # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U +1923..1926 ; Other_Alphabetic # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU +1927..1928 ; Other_Alphabetic # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O +1929..192B ; Other_Alphabetic # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA +1930..1931 ; Other_Alphabetic # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA +1932 ; Other_Alphabetic # Mn LIMBU SMALL LETTER ANUSVARA +1933..1938 ; Other_Alphabetic # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA +1A17..1A18 ; Other_Alphabetic # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U +1A19..1A1A ; Other_Alphabetic # Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O +1A1B ; Other_Alphabetic # Mn BUGINESE VOWEL SIGN AE +1A55 ; Other_Alphabetic # Mc TAI THAM CONSONANT SIGN MEDIAL RA +1A56 ; Other_Alphabetic # Mn TAI THAM CONSONANT SIGN MEDIAL LA +1A57 ; Other_Alphabetic # Mc TAI THAM CONSONANT SIGN LA TANG LAI +1A58..1A5E ; Other_Alphabetic # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA +1A61 ; Other_Alphabetic # Mc TAI THAM VOWEL SIGN A +1A62 ; Other_Alphabetic # Mn TAI THAM VOWEL SIGN MAI SAT +1A63..1A64 ; Other_Alphabetic # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA +1A65..1A6C ; Other_Alphabetic # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW +1A6D..1A72 ; Other_Alphabetic # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI +1A73..1A74 ; Other_Alphabetic # Mn [2] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN MAI KANG +1B00..1B03 ; Other_Alphabetic # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG +1B04 ; Other_Alphabetic # Mc BALINESE SIGN BISAH +1B35 ; Other_Alphabetic # Mc BALINESE VOWEL SIGN TEDUNG +1B36..1B3A ; Other_Alphabetic # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA +1B3B ; Other_Alphabetic # Mc BALINESE VOWEL SIGN RA REPA TEDUNG +1B3C ; Other_Alphabetic # Mn BALINESE VOWEL SIGN LA LENGA +1B3D..1B41 ; Other_Alphabetic # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG +1B42 ; Other_Alphabetic # Mn BALINESE VOWEL SIGN PEPET +1B43 ; Other_Alphabetic # Mc BALINESE VOWEL SIGN PEPET TEDUNG +1B80..1B81 ; Other_Alphabetic # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR +1B82 ; Other_Alphabetic # Mc SUNDANESE SIGN PANGWISAD +1BA1 ; Other_Alphabetic # Mc SUNDANESE CONSONANT SIGN PAMINGKAL +1BA2..1BA5 ; Other_Alphabetic # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU +1BA6..1BA7 ; Other_Alphabetic # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG +1BA8..1BA9 ; Other_Alphabetic # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAC..1BAD ; Other_Alphabetic # Mn [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA +1BE7 ; Other_Alphabetic # Mc BATAK VOWEL SIGN E +1BE8..1BE9 ; Other_Alphabetic # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE +1BEA..1BEC ; Other_Alphabetic # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O +1BED ; Other_Alphabetic # Mn BATAK VOWEL SIGN KARO O +1BEE ; Other_Alphabetic # Mc BATAK VOWEL SIGN U +1BEF..1BF1 ; Other_Alphabetic # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H +1C24..1C2B ; Other_Alphabetic # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU +1C2C..1C33 ; Other_Alphabetic # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T +1C34..1C35 ; Other_Alphabetic # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG +1CF2..1CF3 ; Other_Alphabetic # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA +1DE7..1DF4 ; Other_Alphabetic # Mn [14] COMBINING LATIN SMALL LETTER ALPHA..COMBINING LATIN SMALL LETTER U WITH DIAERESIS +24B6..24E9 ; Other_Alphabetic # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z +2DE0..2DFF ; Other_Alphabetic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS +A674..A67B ; Other_Alphabetic # Mn [8] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC LETTER OMEGA +A69E..A69F ; Other_Alphabetic # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E +A823..A824 ; Other_Alphabetic # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I +A825..A826 ; Other_Alphabetic # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E +A827 ; Other_Alphabetic # Mc SYLOTI NAGRI VOWEL SIGN OO +A880..A881 ; Other_Alphabetic # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA +A8B4..A8C3 ; Other_Alphabetic # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU +A926..A92A ; Other_Alphabetic # Mn [5] KAYAH LI VOWEL UE..KAYAH LI VOWEL O +A947..A951 ; Other_Alphabetic # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R +A952 ; Other_Alphabetic # Mc REJANG CONSONANT SIGN H +A980..A982 ; Other_Alphabetic # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR +A983 ; Other_Alphabetic # Mc JAVANESE SIGN WIGNYAN +A9B4..A9B5 ; Other_Alphabetic # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG +A9B6..A9B9 ; Other_Alphabetic # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT +A9BA..A9BB ; Other_Alphabetic # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE +A9BC ; Other_Alphabetic # Mn JAVANESE VOWEL SIGN PEPET +A9BD..A9BF ; Other_Alphabetic # Mc [3] JAVANESE CONSONANT SIGN KERET..JAVANESE CONSONANT SIGN CAKRA +AA29..AA2E ; Other_Alphabetic # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE +AA2F..AA30 ; Other_Alphabetic # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI +AA31..AA32 ; Other_Alphabetic # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE +AA33..AA34 ; Other_Alphabetic # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA +AA35..AA36 ; Other_Alphabetic # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA +AA43 ; Other_Alphabetic # Mn CHAM CONSONANT SIGN FINAL NG +AA4C ; Other_Alphabetic # Mn CHAM CONSONANT SIGN FINAL M +AA4D ; Other_Alphabetic # Mc CHAM CONSONANT SIGN FINAL H +AAB0 ; Other_Alphabetic # Mn TAI VIET MAI KANG +AAB2..AAB4 ; Other_Alphabetic # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U +AAB7..AAB8 ; Other_Alphabetic # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA +AABE ; Other_Alphabetic # Mn TAI VIET VOWEL AM +AAEB ; Other_Alphabetic # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; Other_Alphabetic # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF5 ; Other_Alphabetic # Mc MEETEI MAYEK VOWEL SIGN VISARGA +ABE3..ABE4 ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP +ABE5 ; Other_Alphabetic # Mn MEETEI MAYEK VOWEL SIGN ANAP +ABE6..ABE7 ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP +ABE8 ; Other_Alphabetic # Mn MEETEI MAYEK VOWEL SIGN UNAP +ABE9..ABEA ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG +FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA +10376..1037A ; Other_Alphabetic # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII +10A01..10A03 ; Other_Alphabetic # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R +10A05..10A06 ; Other_Alphabetic # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O +10A0C..10A0F ; Other_Alphabetic # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA +11000 ; Other_Alphabetic # Mc BRAHMI SIGN CANDRABINDU +11001 ; Other_Alphabetic # Mn BRAHMI SIGN ANUSVARA +11002 ; Other_Alphabetic # Mc BRAHMI SIGN VISARGA +11038..11045 ; Other_Alphabetic # Mn [14] BRAHMI VOWEL SIGN AA..BRAHMI VOWEL SIGN AU +11082 ; Other_Alphabetic # Mc KAITHI SIGN VISARGA +110B0..110B2 ; Other_Alphabetic # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II +110B3..110B6 ; Other_Alphabetic # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI +110B7..110B8 ; Other_Alphabetic # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +11100..11102 ; Other_Alphabetic # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; Other_Alphabetic # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; Other_Alphabetic # Mc CHAKMA VOWEL SIGN E +1112D..11132 ; Other_Alphabetic # Mn [6] CHAKMA VOWEL SIGN AI..CHAKMA AU MARK +11180..11181 ; Other_Alphabetic # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; Other_Alphabetic # Mc SHARADA SIGN VISARGA +111B3..111B5 ; Other_Alphabetic # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; Other_Alphabetic # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF ; Other_Alphabetic # Mc SHARADA VOWEL SIGN AU +1122C..1122E ; Other_Alphabetic # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II +1122F..11231 ; Other_Alphabetic # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI +11232..11233 ; Other_Alphabetic # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU +11234 ; Other_Alphabetic # Mn KHOJKI SIGN ANUSVARA +11237 ; Other_Alphabetic # Mn KHOJKI SIGN SHADDA +112DF ; Other_Alphabetic # Mn KHUDAWADI SIGN ANUSVARA +112E0..112E2 ; Other_Alphabetic # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II +112E3..112E8 ; Other_Alphabetic # Mn [6] KHUDAWADI VOWEL SIGN U..KHUDAWADI VOWEL SIGN AU +11300..11301 ; Other_Alphabetic # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU +11302..11303 ; Other_Alphabetic # Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA +1133E..1133F ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN AA..GRANTHA VOWEL SIGN I +11340 ; Other_Alphabetic # Mn GRANTHA VOWEL SIGN II +11341..11344 ; Other_Alphabetic # Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR +11347..11348 ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI +1134B..1134C ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN OO..GRANTHA VOWEL SIGN AU +11357 ; Other_Alphabetic # Mc GRANTHA AU LENGTH MARK +11362..11363 ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +114B0..114B2 ; Other_Alphabetic # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II +114B3..114B8 ; Other_Alphabetic # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL +114B9 ; Other_Alphabetic # Mc TIRHUTA VOWEL SIGN E +114BA ; Other_Alphabetic # Mn TIRHUTA VOWEL SIGN SHORT E +114BB..114BE ; Other_Alphabetic # Mc [4] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN AU +114BF..114C0 ; Other_Alphabetic # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA +114C1 ; Other_Alphabetic # Mc TIRHUTA SIGN VISARGA +115AF..115B1 ; Other_Alphabetic # Mc [3] SIDDHAM VOWEL SIGN AA..SIDDHAM VOWEL SIGN II +115B2..115B5 ; Other_Alphabetic # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR +115B8..115BB ; Other_Alphabetic # Mc [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU +115BC..115BD ; Other_Alphabetic # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA +115BE ; Other_Alphabetic # Mc SIDDHAM SIGN VISARGA +115DC..115DD ; Other_Alphabetic # Mn [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU +11630..11632 ; Other_Alphabetic # Mc [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II +11633..1163A ; Other_Alphabetic # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI +1163B..1163C ; Other_Alphabetic # Mc [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU +1163D ; Other_Alphabetic # Mn MODI SIGN ANUSVARA +1163E ; Other_Alphabetic # Mc MODI SIGN VISARGA +11640 ; Other_Alphabetic # Mn MODI SIGN ARDHACANDRA +116AB ; Other_Alphabetic # Mn TAKRI SIGN ANUSVARA +116AC ; Other_Alphabetic # Mc TAKRI SIGN VISARGA +116AD ; Other_Alphabetic # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; Other_Alphabetic # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; Other_Alphabetic # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +1171D..1171F ; Other_Alphabetic # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +11720..11721 ; Other_Alphabetic # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA +11722..11725 ; Other_Alphabetic # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU +11726 ; Other_Alphabetic # Mc AHOM VOWEL SIGN E +11727..1172A ; Other_Alphabetic # Mn [4] AHOM VOWEL SIGN AW..AHOM VOWEL SIGN AM +16B30..16B36 ; Other_Alphabetic # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16F51..16F7E ; Other_Alphabetic # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG +1BC9E ; Other_Alphabetic # Mn DUPLOYAN DOUBLE MARK +1F130..1F149 ; Other_Alphabetic # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z +1F150..1F169 ; Other_Alphabetic # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F170..1F189 ; Other_Alphabetic # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z + +# Total code points: 1116 + +# ================================================ + +3006 ; Ideographic # Lo IDEOGRAPHIC CLOSING MARK +3007 ; Ideographic # Nl IDEOGRAPHIC NUMBER ZERO +3021..3029 ; Ideographic # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE +3038..303A ; Ideographic # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY +3400..4DB5 ; Ideographic # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 +4E00..9FD5 ; Ideographic # Lo [20950] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FD5 +F900..FA6D ; Ideographic # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 +20000..2A6D6 ; Ideographic # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 +2A700..2B734 ; Ideographic # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 +2B740..2B81D ; Ideographic # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D +2B820..2CEA1 ; Ideographic # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 +2F800..2FA1D ; Ideographic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D + +# Total code points: 81404 + +# ================================================ + +005E ; Diacritic # Sk CIRCUMFLEX ACCENT +0060 ; Diacritic # Sk GRAVE ACCENT +00A8 ; Diacritic # Sk DIAERESIS +00AF ; Diacritic # Sk MACRON +00B4 ; Diacritic # Sk ACUTE ACCENT +00B7 ; Diacritic # Po MIDDLE DOT +00B8 ; Diacritic # Sk CEDILLA +02B0..02C1 ; Diacritic # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP +02C2..02C5 ; Diacritic # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD +02C6..02D1 ; Diacritic # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON +02D2..02DF ; Diacritic # Sk [14] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT +02E0..02E4 ; Diacritic # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +02E5..02EB ; Diacritic # Sk [7] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK +02EC ; Diacritic # Lm MODIFIER LETTER VOICING +02ED ; Diacritic # Sk MODIFIER LETTER UNASPIRATED +02EE ; Diacritic # Lm MODIFIER LETTER DOUBLE APOSTROPHE +02EF..02FF ; Diacritic # Sk [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW +0300..034E ; Diacritic # Mn [79] COMBINING GRAVE ACCENT..COMBINING UPWARDS ARROW BELOW +0350..0357 ; Diacritic # Mn [8] COMBINING RIGHT ARROWHEAD ABOVE..COMBINING RIGHT HALF RING ABOVE +035D..0362 ; Diacritic # Mn [6] COMBINING DOUBLE BREVE..COMBINING DOUBLE RIGHTWARDS ARROW BELOW +0374 ; Diacritic # Lm GREEK NUMERAL SIGN +0375 ; Diacritic # Sk GREEK LOWER NUMERAL SIGN +037A ; Diacritic # Lm GREEK YPOGEGRAMMENI +0384..0385 ; Diacritic # Sk [2] GREEK TONOS..GREEK DIALYTIKA TONOS +0483..0487 ; Diacritic # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE +0559 ; Diacritic # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING +0591..05A1 ; Diacritic # Mn [17] HEBREW ACCENT ETNAHTA..HEBREW ACCENT PAZER +05A3..05BD ; Diacritic # Mn [27] HEBREW ACCENT MUNAH..HEBREW POINT METEG +05BF ; Diacritic # Mn HEBREW POINT RAFE +05C1..05C2 ; Diacritic # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C4 ; Diacritic # Mn HEBREW MARK UPPER DOT +064B..0652 ; Diacritic # Mn [8] ARABIC FATHATAN..ARABIC SUKUN +0657..0658 ; Diacritic # Mn [2] ARABIC INVERTED DAMMA..ARABIC MARK NOON GHUNNA +06DF..06E0 ; Diacritic # Mn [2] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH UPRIGHT RECTANGULAR ZERO +06E5..06E6 ; Diacritic # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH +06EA..06EC ; Diacritic # Mn [3] ARABIC EMPTY CENTRE LOW STOP..ARABIC ROUNDED HIGH STOP WITH FILLED CENTRE +0730..074A ; Diacritic # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH +07A6..07B0 ; Diacritic # Mn [11] THAANA ABAFILI..THAANA SUKUN +07EB..07F3 ; Diacritic # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE +07F4..07F5 ; Diacritic # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE +0818..0819 ; Diacritic # Mn [2] SAMARITAN MARK OCCLUSION..SAMARITAN MARK DAGESH +08E3..08FE ; Diacritic # Mn [28] ARABIC TURNED DAMMA BELOW..ARABIC DAMMA WITH DOT +093C ; Diacritic # Mn DEVANAGARI SIGN NUKTA +094D ; Diacritic # Mn DEVANAGARI SIGN VIRAMA +0951..0954 ; Diacritic # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT +0971 ; Diacritic # Lm DEVANAGARI SIGN HIGH SPACING DOT +09BC ; Diacritic # Mn BENGALI SIGN NUKTA +09CD ; Diacritic # Mn BENGALI SIGN VIRAMA +0A3C ; Diacritic # Mn GURMUKHI SIGN NUKTA +0A4D ; Diacritic # Mn GURMUKHI SIGN VIRAMA +0ABC ; Diacritic # Mn GUJARATI SIGN NUKTA +0ACD ; Diacritic # Mn GUJARATI SIGN VIRAMA +0B3C ; Diacritic # Mn ORIYA SIGN NUKTA +0B4D ; Diacritic # Mn ORIYA SIGN VIRAMA +0BCD ; Diacritic # Mn TAMIL SIGN VIRAMA +0C4D ; Diacritic # Mn TELUGU SIGN VIRAMA +0CBC ; Diacritic # Mn KANNADA SIGN NUKTA +0CCD ; Diacritic # Mn KANNADA SIGN VIRAMA +0D4D ; Diacritic # Mn MALAYALAM SIGN VIRAMA +0DCA ; Diacritic # Mn SINHALA SIGN AL-LAKUNA +0E47..0E4C ; Diacritic # Mn [6] THAI CHARACTER MAITAIKHU..THAI CHARACTER THANTHAKHAT +0E4E ; Diacritic # Mn THAI CHARACTER YAMAKKAN +0EC8..0ECC ; Diacritic # Mn [5] LAO TONE MAI EK..LAO CANCELLATION MARK +0F18..0F19 ; Diacritic # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS +0F35 ; Diacritic # Mn TIBETAN MARK NGAS BZUNG NYI ZLA +0F37 ; Diacritic # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS +0F39 ; Diacritic # Mn TIBETAN MARK TSA -PHRU +0F3E..0F3F ; Diacritic # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES +0F82..0F84 ; Diacritic # Mn [3] TIBETAN SIGN NYI ZLA NAA DA..TIBETAN MARK HALANTA +0F86..0F87 ; Diacritic # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS +0FC6 ; Diacritic # Mn TIBETAN SYMBOL PADMA GDAN +1037 ; Diacritic # Mn MYANMAR SIGN DOT BELOW +1039..103A ; Diacritic # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT +1087..108C ; Diacritic # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3 +108D ; Diacritic # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE +108F ; Diacritic # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5 +109A..109B ; Diacritic # Mc [2] MYANMAR SIGN KHAMTI TONE-1..MYANMAR SIGN KHAMTI TONE-3 +17C9..17D3 ; Diacritic # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT +17DD ; Diacritic # Mn KHMER SIGN ATTHACAN +1939..193B ; Diacritic # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I +1A75..1A7C ; Diacritic # Mn [8] TAI THAM SIGN TONE-1..TAI THAM SIGN KHUEN-LUE KARAN +1A7F ; Diacritic # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT +1AB0..1ABD ; Diacritic # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW +1B34 ; Diacritic # Mn BALINESE SIGN REREKAN +1B44 ; Diacritic # Mc BALINESE ADEG ADEG +1B6B..1B73 ; Diacritic # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG +1BAA ; Diacritic # Mc SUNDANESE SIGN PAMAAEH +1BAB ; Diacritic # Mn SUNDANESE SIGN VIRAMA +1C36..1C37 ; Diacritic # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA +1C78..1C7D ; Diacritic # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD +1CD0..1CD2 ; Diacritic # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA +1CD3 ; Diacritic # Po VEDIC SIGN NIHSHVASA +1CD4..1CE0 ; Diacritic # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA +1CE1 ; Diacritic # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA +1CE2..1CE8 ; Diacritic # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL +1CED ; Diacritic # Mn VEDIC SIGN TIRYAK +1CF4 ; Diacritic # Mn VEDIC TONE CANDRA ABOVE +1CF8..1CF9 ; Diacritic # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE +1D2C..1D6A ; Diacritic # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1DC4..1DCF ; Diacritic # Mn [12] COMBINING MACRON-ACUTE..COMBINING ZIGZAG BELOW +1DF5 ; Diacritic # Mn COMBINING UP TACK ABOVE +1DFD..1DFF ; Diacritic # Mn [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +1FBD ; Diacritic # Sk GREEK KORONIS +1FBF..1FC1 ; Diacritic # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI +1FCD..1FCF ; Diacritic # Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI +1FDD..1FDF ; Diacritic # Sk [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI +1FED..1FEF ; Diacritic # Sk [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA +1FFD..1FFE ; Diacritic # Sk [2] GREEK OXIA..GREEK DASIA +2CEF..2CF1 ; Diacritic # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2E2F ; Diacritic # Lm VERTICAL TILDE +302A..302D ; Diacritic # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; Diacritic # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK +3099..309A ; Diacritic # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +309B..309C ; Diacritic # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +30FC ; Diacritic # Lm KATAKANA-HIRAGANA PROLONGED SOUND MARK +A66F ; Diacritic # Mn COMBINING CYRILLIC VZMET +A67C..A67D ; Diacritic # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A67F ; Diacritic # Lm CYRILLIC PAYEROK +A69C..A69D ; Diacritic # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A6F0..A6F1 ; Diacritic # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS +A717..A71F ; Diacritic # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK +A720..A721 ; Diacritic # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE +A788 ; Diacritic # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT +A7F8..A7F9 ; Diacritic # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +A8C4 ; Diacritic # Mn SAURASHTRA SIGN VIRAMA +A8E0..A8F1 ; Diacritic # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA +A92B..A92D ; Diacritic # Mn [3] KAYAH LI TONE PLOPHU..KAYAH LI TONE CALYA PLOPHU +A92E ; Diacritic # Po KAYAH LI SIGN CWI +A953 ; Diacritic # Mc REJANG VIRAMA +A9B3 ; Diacritic # Mn JAVANESE SIGN CECAK TELU +A9C0 ; Diacritic # Mc JAVANESE PANGKON +A9E5 ; Diacritic # Mn MYANMAR SIGN SHAN SAW +AA7B ; Diacritic # Mc MYANMAR SIGN PAO KAREN TONE +AA7C ; Diacritic # Mn MYANMAR SIGN TAI LAING TONE-2 +AA7D ; Diacritic # Mc MYANMAR SIGN TAI LAING TONE-5 +AABF ; Diacritic # Mn TAI VIET TONE MAI EK +AAC0 ; Diacritic # Lo TAI VIET TONE MAI NUENG +AAC1 ; Diacritic # Mn TAI VIET TONE MAI THO +AAC2 ; Diacritic # Lo TAI VIET TONE MAI SONG +AAF6 ; Diacritic # Mn MEETEI MAYEK VIRAMA +AB5B ; Diacritic # Sk MODIFIER BREVE WITH INVERTED BREVE +AB5C..AB5F ; Diacritic # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +ABEC ; Diacritic # Mc MEETEI MAYEK LUM IYEK +ABED ; Diacritic # Mn MEETEI MAYEK APUN IYEK +FB1E ; Diacritic # Mn HEBREW POINT JUDEO-SPANISH VARIKA +FE20..FE2F ; Diacritic # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF +FF3E ; Diacritic # Sk FULLWIDTH CIRCUMFLEX ACCENT +FF40 ; Diacritic # Sk FULLWIDTH GRAVE ACCENT +FF70 ; Diacritic # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF9E..FF9F ; Diacritic # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +FFE3 ; Diacritic # Sk FULLWIDTH MACRON +102E0 ; Diacritic # Mn COPTIC EPACT THOUSANDS MARK +10AE5..10AE6 ; Diacritic # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW +110B9..110BA ; Diacritic # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +11133..11134 ; Diacritic # Mn [2] CHAKMA VIRAMA..CHAKMA MAAYYAA +11173 ; Diacritic # Mn MAHAJANI SIGN NUKTA +111C0 ; Diacritic # Mc SHARADA SIGN VIRAMA +111CA..111CC ; Diacritic # Mn [3] SHARADA SIGN NUKTA..SHARADA EXTRA SHORT VOWEL MARK +11235 ; Diacritic # Mc KHOJKI SIGN VIRAMA +11236 ; Diacritic # Mn KHOJKI SIGN NUKTA +112E9..112EA ; Diacritic # Mn [2] KHUDAWADI SIGN NUKTA..KHUDAWADI SIGN VIRAMA +1133C ; Diacritic # Mn GRANTHA SIGN NUKTA +1134D ; Diacritic # Mc GRANTHA SIGN VIRAMA +11366..1136C ; Diacritic # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX +11370..11374 ; Diacritic # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +114C2..114C3 ; Diacritic # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA +115BF..115C0 ; Diacritic # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA +1163F ; Diacritic # Mn MODI SIGN VIRAMA +116B6 ; Diacritic # Mc TAKRI SIGN VIRAMA +116B7 ; Diacritic # Mn TAKRI SIGN NUKTA +1172B ; Diacritic # Mn AHOM SIGN KILLER +16AF0..16AF4 ; Diacritic # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE +16F8F..16F92 ; Diacritic # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16F93..16F9F ; Diacritic # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 +1D167..1D169 ; Diacritic # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 +1D16D..1D172 ; Diacritic # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 +1D17B..1D182 ; Diacritic # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE +1D185..1D18B ; Diacritic # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE +1D1AA..1D1AD ; Diacritic # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO +1E8D0..1E8D6 ; Diacritic # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS + +# Total code points: 773 + +# ================================================ + +00B7 ; Extender # Po MIDDLE DOT +02D0..02D1 ; Extender # Lm [2] MODIFIER LETTER TRIANGULAR COLON..MODIFIER LETTER HALF TRIANGULAR COLON +0640 ; Extender # Lm ARABIC TATWEEL +07FA ; Extender # Lm NKO LAJANYALAN +0E46 ; Extender # Lm THAI CHARACTER MAIYAMOK +0EC6 ; Extender # Lm LAO KO LA +180A ; Extender # Po MONGOLIAN NIRUGU +1843 ; Extender # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +1AA7 ; Extender # Lm TAI THAM SIGN MAI YAMOK +1C36 ; Extender # Mn LEPCHA SIGN RAN +1C7B ; Extender # Lm OL CHIKI RELAA +3005 ; Extender # Lm IDEOGRAPHIC ITERATION MARK +3031..3035 ; Extender # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF +309D..309E ; Extender # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK +30FC..30FE ; Extender # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK +A015 ; Extender # Lm YI SYLLABLE WU +A60C ; Extender # Lm VAI SYLLABLE LENGTHENER +A9CF ; Extender # Lm JAVANESE PANGRANGKEP +A9E6 ; Extender # Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION +AA70 ; Extender # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION +AADD ; Extender # Lm TAI VIET SYMBOL SAM +AAF3..AAF4 ; Extender # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +1135D ; Extender # Lo GRANTHA SIGN PLUTA +115C6..115C8 ; Extender # Po [3] SIDDHAM REPETITION MARK-1..SIDDHAM REPETITION MARK-3 +16B42..16B43 ; Extender # Lm [2] PAHAWH HMONG SIGN VOS NRUA..PAHAWH HMONG SIGN IB YAM + +# Total code points: 38 + +# ================================================ + +00AA ; Other_Lowercase # Lo FEMININE ORDINAL INDICATOR +00BA ; Other_Lowercase # Lo MASCULINE ORDINAL INDICATOR +02B0..02B8 ; Other_Lowercase # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y +02C0..02C1 ; Other_Lowercase # Lm [2] MODIFIER LETTER GLOTTAL STOP..MODIFIER LETTER REVERSED GLOTTAL STOP +02E0..02E4 ; Other_Lowercase # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +0345 ; Other_Lowercase # Mn COMBINING GREEK YPOGEGRAMMENI +037A ; Other_Lowercase # Lm GREEK YPOGEGRAMMENI +1D2C..1D6A ; Other_Lowercase # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D78 ; Other_Lowercase # Lm MODIFIER LETTER CYRILLIC EN +1D9B..1DBF ; Other_Lowercase # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA +2071 ; Other_Lowercase # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; Other_Lowercase # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; Other_Lowercase # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2170..217F ; Other_Lowercase # Nl [16] SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL ONE THOUSAND +24D0..24E9 ; Other_Lowercase # So [26] CIRCLED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z +2C7C..2C7D ; Other_Lowercase # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V +A69C..A69D ; Other_Lowercase # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A770 ; Other_Lowercase # Lm MODIFIER LETTER US +A7F8..A7F9 ; Other_Lowercase # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +AB5C..AB5F ; Other_Lowercase # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK + +# Total code points: 189 + +# ================================================ + +2160..216F ; Other_Uppercase # Nl [16] ROMAN NUMERAL ONE..ROMAN NUMERAL ONE THOUSAND +24B6..24CF ; Other_Uppercase # So [26] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN CAPITAL LETTER Z +1F130..1F149 ; Other_Uppercase # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z +1F150..1F169 ; Other_Uppercase # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F170..1F189 ; Other_Uppercase # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z + +# Total code points: 120 + +# ================================================ + +FDD0..FDEF ; Noncharacter_Code_Point # Cn [32] <noncharacter-FDD0>..<noncharacter-FDEF> +FFFE..FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-FFFE>..<noncharacter-FFFF> +1FFFE..1FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-1FFFE>..<noncharacter-1FFFF> +2FFFE..2FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-2FFFE>..<noncharacter-2FFFF> +3FFFE..3FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-3FFFE>..<noncharacter-3FFFF> +4FFFE..4FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-4FFFE>..<noncharacter-4FFFF> +5FFFE..5FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-5FFFE>..<noncharacter-5FFFF> +6FFFE..6FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-6FFFE>..<noncharacter-6FFFF> +7FFFE..7FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-7FFFE>..<noncharacter-7FFFF> +8FFFE..8FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-8FFFE>..<noncharacter-8FFFF> +9FFFE..9FFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-9FFFE>..<noncharacter-9FFFF> +AFFFE..AFFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-AFFFE>..<noncharacter-AFFFF> +BFFFE..BFFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-BFFFE>..<noncharacter-BFFFF> +CFFFE..CFFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-CFFFE>..<noncharacter-CFFFF> +DFFFE..DFFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-DFFFE>..<noncharacter-DFFFF> +EFFFE..EFFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-EFFFE>..<noncharacter-EFFFF> +FFFFE..FFFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-FFFFE>..<noncharacter-FFFFF> +10FFFE..10FFFF; Noncharacter_Code_Point # Cn [2] <noncharacter-10FFFE>..<noncharacter-10FFFF> + +# Total code points: 66 + +# ================================================ + +09BE ; Other_Grapheme_Extend # Mc BENGALI VOWEL SIGN AA +09D7 ; Other_Grapheme_Extend # Mc BENGALI AU LENGTH MARK +0B3E ; Other_Grapheme_Extend # Mc ORIYA VOWEL SIGN AA +0B57 ; Other_Grapheme_Extend # Mc ORIYA AU LENGTH MARK +0BBE ; Other_Grapheme_Extend # Mc TAMIL VOWEL SIGN AA +0BD7 ; Other_Grapheme_Extend # Mc TAMIL AU LENGTH MARK +0CC2 ; Other_Grapheme_Extend # Mc KANNADA VOWEL SIGN UU +0CD5..0CD6 ; Other_Grapheme_Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0D3E ; Other_Grapheme_Extend # Mc MALAYALAM VOWEL SIGN AA +0D57 ; Other_Grapheme_Extend # Mc MALAYALAM AU LENGTH MARK +0DCF ; Other_Grapheme_Extend # Mc SINHALA VOWEL SIGN AELA-PILLA +0DDF ; Other_Grapheme_Extend # Mc SINHALA VOWEL SIGN GAYANUKITTA +200C..200D ; Other_Grapheme_Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER +302E..302F ; Other_Grapheme_Extend # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK +FF9E..FF9F ; Other_Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +1133E ; Other_Grapheme_Extend # Mc GRANTHA VOWEL SIGN AA +11357 ; Other_Grapheme_Extend # Mc GRANTHA AU LENGTH MARK +114B0 ; Other_Grapheme_Extend # Mc TIRHUTA VOWEL SIGN AA +114BD ; Other_Grapheme_Extend # Mc TIRHUTA VOWEL SIGN SHORT O +115AF ; Other_Grapheme_Extend # Mc SIDDHAM VOWEL SIGN AA +1D165 ; Other_Grapheme_Extend # Mc MUSICAL SYMBOL COMBINING STEM +1D16E..1D172 ; Other_Grapheme_Extend # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5 + +# Total code points: 30 + +# ================================================ + +2FF0..2FF1 ; IDS_Binary_Operator # So [2] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO BELOW +2FF4..2FFB ; IDS_Binary_Operator # So [8] IDEOGRAPHIC DESCRIPTION CHARACTER FULL SURROUND..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID + +# Total code points: 10 + +# ================================================ + +2FF2..2FF3 ; IDS_Trinary_Operator # So [2] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO MIDDLE AND RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO MIDDLE AND BELOW + +# Total code points: 2 + +# ================================================ + +2E80..2E99 ; Radical # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP +2E9B..2EF3 ; Radical # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE +2F00..2FD5 ; Radical # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE + +# Total code points: 329 + +# ================================================ + +3400..4DB5 ; Unified_Ideograph # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 +4E00..9FD5 ; Unified_Ideograph # Lo [20950] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FD5 +FA0E..FA0F ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA0E..CJK COMPATIBILITY IDEOGRAPH-FA0F +FA11 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA11 +FA13..FA14 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA13..CJK COMPATIBILITY IDEOGRAPH-FA14 +FA1F ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA1F +FA21 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA21 +FA23..FA24 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA23..CJK COMPATIBILITY IDEOGRAPH-FA24 +FA27..FA29 ; Unified_Ideograph # Lo [3] CJK COMPATIBILITY IDEOGRAPH-FA27..CJK COMPATIBILITY IDEOGRAPH-FA29 +20000..2A6D6 ; Unified_Ideograph # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 +2A700..2B734 ; Unified_Ideograph # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 +2B740..2B81D ; Unified_Ideograph # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D +2B820..2CEA1 ; Unified_Ideograph # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 + +# Total code points: 80388 + +# ================================================ + +034F ; Other_Default_Ignorable_Code_Point # Mn COMBINING GRAPHEME JOINER +115F..1160 ; Other_Default_Ignorable_Code_Point # Lo [2] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG FILLER +17B4..17B5 ; Other_Default_Ignorable_Code_Point # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +2065 ; Other_Default_Ignorable_Code_Point # Cn <reserved-2065> +3164 ; Other_Default_Ignorable_Code_Point # Lo HANGUL FILLER +FFA0 ; Other_Default_Ignorable_Code_Point # Lo HALFWIDTH HANGUL FILLER +FFF0..FFF8 ; Other_Default_Ignorable_Code_Point # Cn [9] <reserved-FFF0>..<reserved-FFF8> +E0000 ; Other_Default_Ignorable_Code_Point # Cn <reserved-E0000> +E0002..E001F ; Other_Default_Ignorable_Code_Point # Cn [30] <reserved-E0002>..<reserved-E001F> +E0080..E00FF ; Other_Default_Ignorable_Code_Point # Cn [128] <reserved-E0080>..<reserved-E00FF> +E01F0..E0FFF ; Other_Default_Ignorable_Code_Point # Cn [3600] <reserved-E01F0>..<reserved-E0FFF> + +# Total code points: 3776 + +# ================================================ + +0149 ; Deprecated # L& LATIN SMALL LETTER N PRECEDED BY APOSTROPHE +0673 ; Deprecated # Lo ARABIC LETTER ALEF WITH WAVY HAMZA BELOW +0F77 ; Deprecated # Mn TIBETAN VOWEL SIGN VOCALIC RR +0F79 ; Deprecated # Mn TIBETAN VOWEL SIGN VOCALIC LL +17A3..17A4 ; Deprecated # Lo [2] KHMER INDEPENDENT VOWEL QAQ..KHMER INDEPENDENT VOWEL QAA +206A..206F ; Deprecated # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES +2329 ; Deprecated # Ps LEFT-POINTING ANGLE BRACKET +232A ; Deprecated # Pe RIGHT-POINTING ANGLE BRACKET +E0001 ; Deprecated # Cf LANGUAGE TAG +E007F ; Deprecated # Cf CANCEL TAG + +# Total code points: 16 + +# ================================================ + +0069..006A ; Soft_Dotted # L& [2] LATIN SMALL LETTER I..LATIN SMALL LETTER J +012F ; Soft_Dotted # L& LATIN SMALL LETTER I WITH OGONEK +0249 ; Soft_Dotted # L& LATIN SMALL LETTER J WITH STROKE +0268 ; Soft_Dotted # L& LATIN SMALL LETTER I WITH STROKE +029D ; Soft_Dotted # L& LATIN SMALL LETTER J WITH CROSSED-TAIL +02B2 ; Soft_Dotted # Lm MODIFIER LETTER SMALL J +03F3 ; Soft_Dotted # L& GREEK LETTER YOT +0456 ; Soft_Dotted # L& CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +0458 ; Soft_Dotted # L& CYRILLIC SMALL LETTER JE +1D62 ; Soft_Dotted # Lm LATIN SUBSCRIPT SMALL LETTER I +1D96 ; Soft_Dotted # L& LATIN SMALL LETTER I WITH RETROFLEX HOOK +1DA4 ; Soft_Dotted # Lm MODIFIER LETTER SMALL I WITH STROKE +1DA8 ; Soft_Dotted # Lm MODIFIER LETTER SMALL J WITH CROSSED-TAIL +1E2D ; Soft_Dotted # L& LATIN SMALL LETTER I WITH TILDE BELOW +1ECB ; Soft_Dotted # L& LATIN SMALL LETTER I WITH DOT BELOW +2071 ; Soft_Dotted # Lm SUPERSCRIPT LATIN SMALL LETTER I +2148..2149 ; Soft_Dotted # L& [2] DOUBLE-STRUCK ITALIC SMALL I..DOUBLE-STRUCK ITALIC SMALL J +2C7C ; Soft_Dotted # Lm LATIN SUBSCRIPT SMALL LETTER J +1D422..1D423 ; Soft_Dotted # L& [2] MATHEMATICAL BOLD SMALL I..MATHEMATICAL BOLD SMALL J +1D456..1D457 ; Soft_Dotted # L& [2] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL ITALIC SMALL J +1D48A..1D48B ; Soft_Dotted # L& [2] MATHEMATICAL BOLD ITALIC SMALL I..MATHEMATICAL BOLD ITALIC SMALL J +1D4BE..1D4BF ; Soft_Dotted # L& [2] MATHEMATICAL SCRIPT SMALL I..MATHEMATICAL SCRIPT SMALL J +1D4F2..1D4F3 ; Soft_Dotted # L& [2] MATHEMATICAL BOLD SCRIPT SMALL I..MATHEMATICAL BOLD SCRIPT SMALL J +1D526..1D527 ; Soft_Dotted # L& [2] MATHEMATICAL FRAKTUR SMALL I..MATHEMATICAL FRAKTUR SMALL J +1D55A..1D55B ; Soft_Dotted # L& [2] MATHEMATICAL DOUBLE-STRUCK SMALL I..MATHEMATICAL DOUBLE-STRUCK SMALL J +1D58E..1D58F ; Soft_Dotted # L& [2] MATHEMATICAL BOLD FRAKTUR SMALL I..MATHEMATICAL BOLD FRAKTUR SMALL J +1D5C2..1D5C3 ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF SMALL I..MATHEMATICAL SANS-SERIF SMALL J +1D5F6..1D5F7 ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF BOLD SMALL I..MATHEMATICAL SANS-SERIF BOLD SMALL J +1D62A..1D62B ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF ITALIC SMALL I..MATHEMATICAL SANS-SERIF ITALIC SMALL J +1D65E..1D65F ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL I..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL J +1D692..1D693 ; Soft_Dotted # L& [2] MATHEMATICAL MONOSPACE SMALL I..MATHEMATICAL MONOSPACE SMALL J + +# Total code points: 46 + +# ================================================ + +0E40..0E44 ; Logical_Order_Exception # Lo [5] THAI CHARACTER SARA E..THAI CHARACTER SARA AI MAIMALAI +0EC0..0EC4 ; Logical_Order_Exception # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI +19B5..19B7 ; Logical_Order_Exception # Lo [3] NEW TAI LUE VOWEL SIGN E..NEW TAI LUE VOWEL SIGN O +19BA ; Logical_Order_Exception # Lo NEW TAI LUE VOWEL SIGN AY +AAB5..AAB6 ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O +AAB9 ; Logical_Order_Exception # Lo TAI VIET VOWEL UEA +AABB..AABC ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL AUE..TAI VIET VOWEL AY + +# Total code points: 19 + +# ================================================ + +2118 ; Other_ID_Start # Sm SCRIPT CAPITAL P +212E ; Other_ID_Start # So ESTIMATED SYMBOL +309B..309C ; Other_ID_Start # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK + +# Total code points: 4 + +# ================================================ + +00B7 ; Other_ID_Continue # Po MIDDLE DOT +0387 ; Other_ID_Continue # Po GREEK ANO TELEIA +1369..1371 ; Other_ID_Continue # No [9] ETHIOPIC DIGIT ONE..ETHIOPIC DIGIT NINE +19DA ; Other_ID_Continue # No NEW TAI LUE THAM DIGIT ONE + +# Total code points: 12 + +# ================================================ + +0021 ; STerm # Po EXCLAMATION MARK +002E ; STerm # Po FULL STOP +003F ; STerm # Po QUESTION MARK +0589 ; STerm # Po ARMENIAN FULL STOP +061F ; STerm # Po ARABIC QUESTION MARK +06D4 ; STerm # Po ARABIC FULL STOP +0700..0702 ; STerm # Po [3] SYRIAC END OF PARAGRAPH..SYRIAC SUBLINEAR FULL STOP +07F9 ; STerm # Po NKO EXCLAMATION MARK +0964..0965 ; STerm # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA +104A..104B ; STerm # Po [2] MYANMAR SIGN LITTLE SECTION..MYANMAR SIGN SECTION +1362 ; STerm # Po ETHIOPIC FULL STOP +1367..1368 ; STerm # Po [2] ETHIOPIC QUESTION MARK..ETHIOPIC PARAGRAPH SEPARATOR +166E ; STerm # Po CANADIAN SYLLABICS FULL STOP +1735..1736 ; STerm # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION +1803 ; STerm # Po MONGOLIAN FULL STOP +1809 ; STerm # Po MONGOLIAN MANCHU FULL STOP +1944..1945 ; STerm # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK +1AA8..1AAB ; STerm # Po [4] TAI THAM SIGN KAAN..TAI THAM SIGN SATKAANKUU +1B5A..1B5B ; STerm # Po [2] BALINESE PANTI..BALINESE PAMADA +1B5E..1B5F ; STerm # Po [2] BALINESE CARIK SIKI..BALINESE CARIK PAREREN +1C3B..1C3C ; STerm # Po [2] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION NYET THYOOM TA-ROL +1C7E..1C7F ; STerm # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD +203C..203D ; STerm # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG +2047..2049 ; STerm # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK +2E2E ; STerm # Po REVERSED QUESTION MARK +2E3C ; STerm # Po STENOGRAPHIC FULL STOP +3002 ; STerm # Po IDEOGRAPHIC FULL STOP +A4FF ; STerm # Po LISU PUNCTUATION FULL STOP +A60E..A60F ; STerm # Po [2] VAI FULL STOP..VAI QUESTION MARK +A6F3 ; STerm # Po BAMUM FULL STOP +A6F7 ; STerm # Po BAMUM QUESTION MARK +A876..A877 ; STerm # Po [2] PHAGS-PA MARK SHAD..PHAGS-PA MARK DOUBLE SHAD +A8CE..A8CF ; STerm # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA +A92F ; STerm # Po KAYAH LI SIGN SHYA +A9C8..A9C9 ; STerm # Po [2] JAVANESE PADA LINGSA..JAVANESE PADA LUNGSI +AA5D..AA5F ; STerm # Po [3] CHAM PUNCTUATION DANDA..CHAM PUNCTUATION TRIPLE DANDA +AAF0..AAF1 ; STerm # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM +ABEB ; STerm # Po MEETEI MAYEK CHEIKHEI +FE52 ; STerm # Po SMALL FULL STOP +FE56..FE57 ; STerm # Po [2] SMALL QUESTION MARK..SMALL EXCLAMATION MARK +FF01 ; STerm # Po FULLWIDTH EXCLAMATION MARK +FF0E ; STerm # Po FULLWIDTH FULL STOP +FF1F ; STerm # Po FULLWIDTH QUESTION MARK +FF61 ; STerm # Po HALFWIDTH IDEOGRAPHIC FULL STOP +10A56..10A57 ; STerm # Po [2] KHAROSHTHI PUNCTUATION DANDA..KHAROSHTHI PUNCTUATION DOUBLE DANDA +11047..11048 ; STerm # Po [2] BRAHMI DANDA..BRAHMI DOUBLE DANDA +110BE..110C1 ; STerm # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +11141..11143 ; STerm # Po [3] CHAKMA DANDA..CHAKMA QUESTION MARK +111C5..111C6 ; STerm # Po [2] SHARADA DANDA..SHARADA DOUBLE DANDA +111CD ; STerm # Po SHARADA SUTRA MARK +111DE..111DF ; STerm # Po [2] SHARADA SECTION MARK-1..SHARADA SECTION MARK-2 +11238..11239 ; STerm # Po [2] KHOJKI DANDA..KHOJKI DOUBLE DANDA +1123B..1123C ; STerm # Po [2] KHOJKI SECTION MARK..KHOJKI DOUBLE SECTION MARK +112A9 ; STerm # Po MULTANI SECTION MARK +115C2..115C3 ; STerm # Po [2] SIDDHAM DANDA..SIDDHAM DOUBLE DANDA +115C9..115D7 ; STerm # Po [15] SIDDHAM END OF TEXT MARK..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES +11641..11642 ; STerm # Po [2] MODI DANDA..MODI DOUBLE DANDA +1173C..1173E ; STerm # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI +16A6E..16A6F ; STerm # Po [2] MRO DANDA..MRO DOUBLE DANDA +16AF5 ; STerm # Po BASSA VAH FULL STOP +16B37..16B38 ; STerm # Po [2] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS TSHAB CEEB +16B44 ; STerm # Po PAHAWH HMONG SIGN XAUS +1BC9F ; STerm # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP +1DA88 ; STerm # Po SIGNWRITING FULL STOP + +# Total code points: 120 + +# ================================================ + +180B..180D ; Variation_Selector # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +FE00..FE0F ; Variation_Selector # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 +E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 + +# Total code points: 259 + +# ================================================ + +0009..000D ; Pattern_White_Space # Cc [5] <control-0009>..<control-000D> +0020 ; Pattern_White_Space # Zs SPACE +0085 ; Pattern_White_Space # Cc <control-0085> +200E..200F ; Pattern_White_Space # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK +2028 ; Pattern_White_Space # Zl LINE SEPARATOR +2029 ; Pattern_White_Space # Zp PARAGRAPH SEPARATOR + +# Total code points: 11 + +# ================================================ + +0021..0023 ; Pattern_Syntax # Po [3] EXCLAMATION MARK..NUMBER SIGN +0024 ; Pattern_Syntax # Sc DOLLAR SIGN +0025..0027 ; Pattern_Syntax # Po [3] PERCENT SIGN..APOSTROPHE +0028 ; Pattern_Syntax # Ps LEFT PARENTHESIS +0029 ; Pattern_Syntax # Pe RIGHT PARENTHESIS +002A ; Pattern_Syntax # Po ASTERISK +002B ; Pattern_Syntax # Sm PLUS SIGN +002C ; Pattern_Syntax # Po COMMA +002D ; Pattern_Syntax # Pd HYPHEN-MINUS +002E..002F ; Pattern_Syntax # Po [2] FULL STOP..SOLIDUS +003A..003B ; Pattern_Syntax # Po [2] COLON..SEMICOLON +003C..003E ; Pattern_Syntax # Sm [3] LESS-THAN SIGN..GREATER-THAN SIGN +003F..0040 ; Pattern_Syntax # Po [2] QUESTION MARK..COMMERCIAL AT +005B ; Pattern_Syntax # Ps LEFT SQUARE BRACKET +005C ; Pattern_Syntax # Po REVERSE SOLIDUS +005D ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET +005E ; Pattern_Syntax # Sk CIRCUMFLEX ACCENT +0060 ; Pattern_Syntax # Sk GRAVE ACCENT +007B ; Pattern_Syntax # Ps LEFT CURLY BRACKET +007C ; Pattern_Syntax # Sm VERTICAL LINE +007D ; Pattern_Syntax # Pe RIGHT CURLY BRACKET +007E ; Pattern_Syntax # Sm TILDE +00A1 ; Pattern_Syntax # Po INVERTED EXCLAMATION MARK +00A2..00A5 ; Pattern_Syntax # Sc [4] CENT SIGN..YEN SIGN +00A6 ; Pattern_Syntax # So BROKEN BAR +00A7 ; Pattern_Syntax # Po SECTION SIGN +00A9 ; Pattern_Syntax # So COPYRIGHT SIGN +00AB ; Pattern_Syntax # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +00AC ; Pattern_Syntax # Sm NOT SIGN +00AE ; Pattern_Syntax # So REGISTERED SIGN +00B0 ; Pattern_Syntax # So DEGREE SIGN +00B1 ; Pattern_Syntax # Sm PLUS-MINUS SIGN +00B6 ; Pattern_Syntax # Po PILCROW SIGN +00BB ; Pattern_Syntax # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +00BF ; Pattern_Syntax # Po INVERTED QUESTION MARK +00D7 ; Pattern_Syntax # Sm MULTIPLICATION SIGN +00F7 ; Pattern_Syntax # Sm DIVISION SIGN +2010..2015 ; Pattern_Syntax # Pd [6] HYPHEN..HORIZONTAL BAR +2016..2017 ; Pattern_Syntax # Po [2] DOUBLE VERTICAL LINE..DOUBLE LOW LINE +2018 ; Pattern_Syntax # Pi LEFT SINGLE QUOTATION MARK +2019 ; Pattern_Syntax # Pf RIGHT SINGLE QUOTATION MARK +201A ; Pattern_Syntax # Ps SINGLE LOW-9 QUOTATION MARK +201B..201C ; Pattern_Syntax # Pi [2] SINGLE HIGH-REVERSED-9 QUOTATION MARK..LEFT DOUBLE QUOTATION MARK +201D ; Pattern_Syntax # Pf RIGHT DOUBLE QUOTATION MARK +201E ; Pattern_Syntax # Ps DOUBLE LOW-9 QUOTATION MARK +201F ; Pattern_Syntax # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK +2020..2027 ; Pattern_Syntax # Po [8] DAGGER..HYPHENATION POINT +2030..2038 ; Pattern_Syntax # Po [9] PER MILLE SIGN..CARET +2039 ; Pattern_Syntax # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK +203A ; Pattern_Syntax # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +203B..203E ; Pattern_Syntax # Po [4] REFERENCE MARK..OVERLINE +2041..2043 ; Pattern_Syntax # Po [3] CARET INSERTION POINT..HYPHEN BULLET +2044 ; Pattern_Syntax # Sm FRACTION SLASH +2045 ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH QUILL +2046 ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH QUILL +2047..2051 ; Pattern_Syntax # Po [11] DOUBLE QUESTION MARK..TWO ASTERISKS ALIGNED VERTICALLY +2052 ; Pattern_Syntax # Sm COMMERCIAL MINUS SIGN +2053 ; Pattern_Syntax # Po SWUNG DASH +2055..205E ; Pattern_Syntax # Po [10] FLOWER PUNCTUATION MARK..VERTICAL FOUR DOTS +2190..2194 ; Pattern_Syntax # Sm [5] LEFTWARDS ARROW..LEFT RIGHT ARROW +2195..2199 ; Pattern_Syntax # So [5] UP DOWN ARROW..SOUTH WEST ARROW +219A..219B ; Pattern_Syntax # Sm [2] LEFTWARDS ARROW WITH STROKE..RIGHTWARDS ARROW WITH STROKE +219C..219F ; Pattern_Syntax # So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW +21A0 ; Pattern_Syntax # Sm RIGHTWARDS TWO HEADED ARROW +21A1..21A2 ; Pattern_Syntax # So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL +21A3 ; Pattern_Syntax # Sm RIGHTWARDS ARROW WITH TAIL +21A4..21A5 ; Pattern_Syntax # So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR +21A6 ; Pattern_Syntax # Sm RIGHTWARDS ARROW FROM BAR +21A7..21AD ; Pattern_Syntax # So [7] DOWNWARDS ARROW FROM BAR..LEFT RIGHT WAVE ARROW +21AE ; Pattern_Syntax # Sm LEFT RIGHT ARROW WITH STROKE +21AF..21CD ; Pattern_Syntax # So [31] DOWNWARDS ZIGZAG ARROW..LEFTWARDS DOUBLE ARROW WITH STROKE +21CE..21CF ; Pattern_Syntax # Sm [2] LEFT RIGHT DOUBLE ARROW WITH STROKE..RIGHTWARDS DOUBLE ARROW WITH STROKE +21D0..21D1 ; Pattern_Syntax # So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW +21D2 ; Pattern_Syntax # Sm RIGHTWARDS DOUBLE ARROW +21D3 ; Pattern_Syntax # So DOWNWARDS DOUBLE ARROW +21D4 ; Pattern_Syntax # Sm LEFT RIGHT DOUBLE ARROW +21D5..21F3 ; Pattern_Syntax # So [31] UP DOWN DOUBLE ARROW..UP DOWN WHITE ARROW +21F4..22FF ; Pattern_Syntax # Sm [268] RIGHT ARROW WITH SMALL CIRCLE..Z NOTATION BAG MEMBERSHIP +2300..2307 ; Pattern_Syntax # So [8] DIAMETER SIGN..WAVY LINE +2308 ; Pattern_Syntax # Ps LEFT CEILING +2309 ; Pattern_Syntax # Pe RIGHT CEILING +230A ; Pattern_Syntax # Ps LEFT FLOOR +230B ; Pattern_Syntax # Pe RIGHT FLOOR +230C..231F ; Pattern_Syntax # So [20] BOTTOM RIGHT CROP..BOTTOM RIGHT CORNER +2320..2321 ; Pattern_Syntax # Sm [2] TOP HALF INTEGRAL..BOTTOM HALF INTEGRAL +2322..2328 ; Pattern_Syntax # So [7] FROWN..KEYBOARD +2329 ; Pattern_Syntax # Ps LEFT-POINTING ANGLE BRACKET +232A ; Pattern_Syntax # Pe RIGHT-POINTING ANGLE BRACKET +232B..237B ; Pattern_Syntax # So [81] ERASE TO THE LEFT..NOT CHECK MARK +237C ; Pattern_Syntax # Sm RIGHT ANGLE WITH DOWNWARDS ZIGZAG ARROW +237D..239A ; Pattern_Syntax # So [30] SHOULDERED OPEN BOX..CLEAR SCREEN SYMBOL +239B..23B3 ; Pattern_Syntax # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM +23B4..23DB ; Pattern_Syntax # So [40] TOP SQUARE BRACKET..FUSE +23DC..23E1 ; Pattern_Syntax # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET +23E2..23FA ; Pattern_Syntax # So [25] WHITE TRAPEZIUM..BLACK CIRCLE FOR RECORD +23FB..23FF ; Pattern_Syntax # Cn [5] <reserved-23FB>..<reserved-23FF> +2400..2426 ; Pattern_Syntax # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO +2427..243F ; Pattern_Syntax # Cn [25] <reserved-2427>..<reserved-243F> +2440..244A ; Pattern_Syntax # So [11] OCR HOOK..OCR DOUBLE BACKSLASH +244B..245F ; Pattern_Syntax # Cn [21] <reserved-244B>..<reserved-245F> +2500..25B6 ; Pattern_Syntax # So [183] BOX DRAWINGS LIGHT HORIZONTAL..BLACK RIGHT-POINTING TRIANGLE +25B7 ; Pattern_Syntax # Sm WHITE RIGHT-POINTING TRIANGLE +25B8..25C0 ; Pattern_Syntax # So [9] BLACK RIGHT-POINTING SMALL TRIANGLE..BLACK LEFT-POINTING TRIANGLE +25C1 ; Pattern_Syntax # Sm WHITE LEFT-POINTING TRIANGLE +25C2..25F7 ; Pattern_Syntax # So [54] BLACK LEFT-POINTING SMALL TRIANGLE..WHITE CIRCLE WITH UPPER RIGHT QUADRANT +25F8..25FF ; Pattern_Syntax # Sm [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE +2600..266E ; Pattern_Syntax # So [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN +266F ; Pattern_Syntax # Sm MUSIC SHARP SIGN +2670..2767 ; Pattern_Syntax # So [248] WEST SYRIAC CROSS..ROTATED FLORAL HEART BULLET +2768 ; Pattern_Syntax # Ps MEDIUM LEFT PARENTHESIS ORNAMENT +2769 ; Pattern_Syntax # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT +276A ; Pattern_Syntax # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT +276B ; Pattern_Syntax # Pe MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT +276C ; Pattern_Syntax # Ps MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT +276D ; Pattern_Syntax # Pe MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT +276E ; Pattern_Syntax # Ps HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT +276F ; Pattern_Syntax # Pe HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT +2770 ; Pattern_Syntax # Ps HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT +2771 ; Pattern_Syntax # Pe HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT +2772 ; Pattern_Syntax # Ps LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT +2773 ; Pattern_Syntax # Pe LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT +2774 ; Pattern_Syntax # Ps MEDIUM LEFT CURLY BRACKET ORNAMENT +2775 ; Pattern_Syntax # Pe MEDIUM RIGHT CURLY BRACKET ORNAMENT +2794..27BF ; Pattern_Syntax # So [44] HEAVY WIDE-HEADED RIGHTWARDS ARROW..DOUBLE CURLY LOOP +27C0..27C4 ; Pattern_Syntax # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET +27C5 ; Pattern_Syntax # Ps LEFT S-SHAPED BAG DELIMITER +27C6 ; Pattern_Syntax # Pe RIGHT S-SHAPED BAG DELIMITER +27C7..27E5 ; Pattern_Syntax # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK +27E6 ; Pattern_Syntax # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET +27E7 ; Pattern_Syntax # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET +27E8 ; Pattern_Syntax # Ps MATHEMATICAL LEFT ANGLE BRACKET +27E9 ; Pattern_Syntax # Pe MATHEMATICAL RIGHT ANGLE BRACKET +27EA ; Pattern_Syntax # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET +27EB ; Pattern_Syntax # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET +27EC ; Pattern_Syntax # Ps MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET +27ED ; Pattern_Syntax # Pe MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET +27EE ; Pattern_Syntax # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS +27EF ; Pattern_Syntax # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS +27F0..27FF ; Pattern_Syntax # Sm [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW +2800..28FF ; Pattern_Syntax # So [256] BRAILLE PATTERN BLANK..BRAILLE PATTERN DOTS-12345678 +2900..2982 ; Pattern_Syntax # Sm [131] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..Z NOTATION TYPE COLON +2983 ; Pattern_Syntax # Ps LEFT WHITE CURLY BRACKET +2984 ; Pattern_Syntax # Pe RIGHT WHITE CURLY BRACKET +2985 ; Pattern_Syntax # Ps LEFT WHITE PARENTHESIS +2986 ; Pattern_Syntax # Pe RIGHT WHITE PARENTHESIS +2987 ; Pattern_Syntax # Ps Z NOTATION LEFT IMAGE BRACKET +2988 ; Pattern_Syntax # Pe Z NOTATION RIGHT IMAGE BRACKET +2989 ; Pattern_Syntax # Ps Z NOTATION LEFT BINDING BRACKET +298A ; Pattern_Syntax # Pe Z NOTATION RIGHT BINDING BRACKET +298B ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH UNDERBAR +298C ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH UNDERBAR +298D ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER +298E ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +298F ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +2990 ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER +2991 ; Pattern_Syntax # Ps LEFT ANGLE BRACKET WITH DOT +2992 ; Pattern_Syntax # Pe RIGHT ANGLE BRACKET WITH DOT +2993 ; Pattern_Syntax # Ps LEFT ARC LESS-THAN BRACKET +2994 ; Pattern_Syntax # Pe RIGHT ARC GREATER-THAN BRACKET +2995 ; Pattern_Syntax # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET +2996 ; Pattern_Syntax # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET +2997 ; Pattern_Syntax # Ps LEFT BLACK TORTOISE SHELL BRACKET +2998 ; Pattern_Syntax # Pe RIGHT BLACK TORTOISE SHELL BRACKET +2999..29D7 ; Pattern_Syntax # Sm [63] DOTTED FENCE..BLACK HOURGLASS +29D8 ; Pattern_Syntax # Ps LEFT WIGGLY FENCE +29D9 ; Pattern_Syntax # Pe RIGHT WIGGLY FENCE +29DA ; Pattern_Syntax # Ps LEFT DOUBLE WIGGLY FENCE +29DB ; Pattern_Syntax # Pe RIGHT DOUBLE WIGGLY FENCE +29DC..29FB ; Pattern_Syntax # Sm [32] INCOMPLETE INFINITY..TRIPLE PLUS +29FC ; Pattern_Syntax # Ps LEFT-POINTING CURVED ANGLE BRACKET +29FD ; Pattern_Syntax # Pe RIGHT-POINTING CURVED ANGLE BRACKET +29FE..2AFF ; Pattern_Syntax # Sm [258] TINY..N-ARY WHITE VERTICAL BAR +2B00..2B2F ; Pattern_Syntax # So [48] NORTH EAST WHITE ARROW..WHITE VERTICAL ELLIPSE +2B30..2B44 ; Pattern_Syntax # Sm [21] LEFT ARROW WITH SMALL CIRCLE..RIGHTWARDS ARROW THROUGH SUPERSET +2B45..2B46 ; Pattern_Syntax # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW +2B47..2B4C ; Pattern_Syntax # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR +2B4D..2B73 ; Pattern_Syntax # So [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR +2B74..2B75 ; Pattern_Syntax # Cn [2] <reserved-2B74>..<reserved-2B75> +2B76..2B95 ; Pattern_Syntax # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW +2B96..2B97 ; Pattern_Syntax # Cn [2] <reserved-2B96>..<reserved-2B97> +2B98..2BB9 ; Pattern_Syntax # So [34] THREE-D TOP-LIGHTED LEFTWARDS EQUILATERAL ARROWHEAD..UP ARROWHEAD IN A RECTANGLE BOX +2BBA..2BBC ; Pattern_Syntax # Cn [3] <reserved-2BBA>..<reserved-2BBC> +2BBD..2BC8 ; Pattern_Syntax # So [12] BALLOT BOX WITH LIGHT X..BLACK MEDIUM RIGHT-POINTING TRIANGLE CENTRED +2BC9 ; Pattern_Syntax # Cn <reserved-2BC9> +2BCA..2BD1 ; Pattern_Syntax # So [8] TOP HALF BLACK CIRCLE..UNCERTAINTY SIGN +2BD2..2BEB ; Pattern_Syntax # Cn [26] <reserved-2BD2>..<reserved-2BEB> +2BEC..2BEF ; Pattern_Syntax # So [4] LEFTWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS..DOWNWARDS TWO-HEADED ARROW WITH TRIANGLE ARROWHEADS +2BF0..2BFF ; Pattern_Syntax # Cn [16] <reserved-2BF0>..<reserved-2BFF> +2E00..2E01 ; Pattern_Syntax # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER +2E02 ; Pattern_Syntax # Pi LEFT SUBSTITUTION BRACKET +2E03 ; Pattern_Syntax # Pf RIGHT SUBSTITUTION BRACKET +2E04 ; Pattern_Syntax # Pi LEFT DOTTED SUBSTITUTION BRACKET +2E05 ; Pattern_Syntax # Pf RIGHT DOTTED SUBSTITUTION BRACKET +2E06..2E08 ; Pattern_Syntax # Po [3] RAISED INTERPOLATION MARKER..DOTTED TRANSPOSITION MARKER +2E09 ; Pattern_Syntax # Pi LEFT TRANSPOSITION BRACKET +2E0A ; Pattern_Syntax # Pf RIGHT TRANSPOSITION BRACKET +2E0B ; Pattern_Syntax # Po RAISED SQUARE +2E0C ; Pattern_Syntax # Pi LEFT RAISED OMISSION BRACKET +2E0D ; Pattern_Syntax # Pf RIGHT RAISED OMISSION BRACKET +2E0E..2E16 ; Pattern_Syntax # Po [9] EDITORIAL CORONIS..DOTTED RIGHT-POINTING ANGLE +2E17 ; Pattern_Syntax # Pd DOUBLE OBLIQUE HYPHEN +2E18..2E19 ; Pattern_Syntax # Po [2] INVERTED INTERROBANG..PALM BRANCH +2E1A ; Pattern_Syntax # Pd HYPHEN WITH DIAERESIS +2E1B ; Pattern_Syntax # Po TILDE WITH RING ABOVE +2E1C ; Pattern_Syntax # Pi LEFT LOW PARAPHRASE BRACKET +2E1D ; Pattern_Syntax # Pf RIGHT LOW PARAPHRASE BRACKET +2E1E..2E1F ; Pattern_Syntax # Po [2] TILDE WITH DOT ABOVE..TILDE WITH DOT BELOW +2E20 ; Pattern_Syntax # Pi LEFT VERTICAL BAR WITH QUILL +2E21 ; Pattern_Syntax # Pf RIGHT VERTICAL BAR WITH QUILL +2E22 ; Pattern_Syntax # Ps TOP LEFT HALF BRACKET +2E23 ; Pattern_Syntax # Pe TOP RIGHT HALF BRACKET +2E24 ; Pattern_Syntax # Ps BOTTOM LEFT HALF BRACKET +2E25 ; Pattern_Syntax # Pe BOTTOM RIGHT HALF BRACKET +2E26 ; Pattern_Syntax # Ps LEFT SIDEWAYS U BRACKET +2E27 ; Pattern_Syntax # Pe RIGHT SIDEWAYS U BRACKET +2E28 ; Pattern_Syntax # Ps LEFT DOUBLE PARENTHESIS +2E29 ; Pattern_Syntax # Pe RIGHT DOUBLE PARENTHESIS +2E2A..2E2E ; Pattern_Syntax # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK +2E2F ; Pattern_Syntax # Lm VERTICAL TILDE +2E30..2E39 ; Pattern_Syntax # Po [10] RING POINT..TOP HALF SECTION SIGN +2E3A..2E3B ; Pattern_Syntax # Pd [2] TWO-EM DASH..THREE-EM DASH +2E3C..2E3F ; Pattern_Syntax # Po [4] STENOGRAPHIC FULL STOP..CAPITULUM +2E40 ; Pattern_Syntax # Pd DOUBLE HYPHEN +2E41 ; Pattern_Syntax # Po REVERSED COMMA +2E42 ; Pattern_Syntax # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK +2E43..2E7F ; Pattern_Syntax # Cn [61] <reserved-2E43>..<reserved-2E7F> +3001..3003 ; Pattern_Syntax # Po [3] IDEOGRAPHIC COMMA..DITTO MARK +3008 ; Pattern_Syntax # Ps LEFT ANGLE BRACKET +3009 ; Pattern_Syntax # Pe RIGHT ANGLE BRACKET +300A ; Pattern_Syntax # Ps LEFT DOUBLE ANGLE BRACKET +300B ; Pattern_Syntax # Pe RIGHT DOUBLE ANGLE BRACKET +300C ; Pattern_Syntax # Ps LEFT CORNER BRACKET +300D ; Pattern_Syntax # Pe RIGHT CORNER BRACKET +300E ; Pattern_Syntax # Ps LEFT WHITE CORNER BRACKET +300F ; Pattern_Syntax # Pe RIGHT WHITE CORNER BRACKET +3010 ; Pattern_Syntax # Ps LEFT BLACK LENTICULAR BRACKET +3011 ; Pattern_Syntax # Pe RIGHT BLACK LENTICULAR BRACKET +3012..3013 ; Pattern_Syntax # So [2] POSTAL MARK..GETA MARK +3014 ; Pattern_Syntax # Ps LEFT TORTOISE SHELL BRACKET +3015 ; Pattern_Syntax # Pe RIGHT TORTOISE SHELL BRACKET +3016 ; Pattern_Syntax # Ps LEFT WHITE LENTICULAR BRACKET +3017 ; Pattern_Syntax # Pe RIGHT WHITE LENTICULAR BRACKET +3018 ; Pattern_Syntax # Ps LEFT WHITE TORTOISE SHELL BRACKET +3019 ; Pattern_Syntax # Pe RIGHT WHITE TORTOISE SHELL BRACKET +301A ; Pattern_Syntax # Ps LEFT WHITE SQUARE BRACKET +301B ; Pattern_Syntax # Pe RIGHT WHITE SQUARE BRACKET +301C ; Pattern_Syntax # Pd WAVE DASH +301D ; Pattern_Syntax # Ps REVERSED DOUBLE PRIME QUOTATION MARK +301E..301F ; Pattern_Syntax # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK +3020 ; Pattern_Syntax # So POSTAL MARK FACE +3030 ; Pattern_Syntax # Pd WAVY DASH +FD3E ; Pattern_Syntax # Pe ORNATE LEFT PARENTHESIS +FD3F ; Pattern_Syntax # Ps ORNATE RIGHT PARENTHESIS +FE45..FE46 ; Pattern_Syntax # Po [2] SESAME DOT..WHITE SESAME DOT + +# Total code points: 2760 + +# EOF diff --git a/src/third_party/unicode-8.0.0/ReadMe.txt b/src/third_party/unicode-8.0.0/ReadMe.txt new file mode 100644 index 00000000000..fc4a9044b7e --- /dev/null +++ b/src/third_party/unicode-8.0.0/ReadMe.txt @@ -0,0 +1,17 @@ +# Date: 2015-06-16, 20:24:00 GMT [KW] +# +# Unicode Character Database +# Copyright (c) 1991-2015 Unicode, Inc. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# +# For documentation, see the following: +# NamesList.html +# UAX #38, "Unicode Han Database (Unihan)" +# UAX #44, "Unicode Character Database." +# +# The UAXes can be accessed at http://www.unicode.org/versions/Unicode8.0.0/ + +This directory contains the final data files +for the Unicode Character Database, for Version 8.0.0 of the Unicode +Standard. + |