diff options
author | Karl Williamson <public@khwilliamson.com> | 2012-02-02 14:12:29 -0700 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2012-02-04 15:14:57 -0700 |
commit | 7620cb1076a0ca7cf9c77b73d4e7c6ff861d3a91 (patch) | |
tree | 2e061ab40e6bde92ebf4663a8ca0a2dcf4a818c1 | |
parent | 1f3b48882b3173e918e15fa542773c54410f6684 (diff) | |
download | perl-7620cb1076a0ca7cf9c77b73d4e7c6ff861d3a91.tar.gz |
Unicode 6.1
This commit delivers the official Unicode character database files for
release 6.1, plus the final bits needed to cope with the changes in them
from release 6.0, including documentation.
56 files changed, 9089 insertions, 2861 deletions
diff --git a/l1_char_class_tab.h b/l1_char_class_tab.h index ce8d53384d..4d2612bb9d 100644 --- a/l1_char_class_tab.h +++ b/l1_char_class_tab.h @@ -172,7 +172,7 @@ /* U+A4 CURRENCY SIGN */ _CC_GRAPH_L1|_CC_PRINT_L1, /* U+A5 YEN SIGN */ _CC_GRAPH_L1|_CC_PRINT_L1, /* U+A6 BROKEN BAR */ _CC_GRAPH_L1|_CC_PRINT_L1, -/* U+A7 SECTION SIGN */ _CC_GRAPH_L1|_CC_PRINT_L1, +/* U+A7 SECTION SIGN */ _CC_GRAPH_L1|_CC_PRINT_L1|_CC_PUNCT_L1, /* U+A8 DIAERESIS */ _CC_GRAPH_L1|_CC_PRINT_L1, /* U+A9 COPYRIGHT SIGN */ _CC_GRAPH_L1|_CC_PRINT_L1, /* U+AA FEMININE ORDINAL INDICATOR */ _CC_ALNUMC_L1|_CC_ALPHA_L1|_CC_CHARNAME_CONT|_CC_GRAPH_L1|_CC_IDFIRST_L1|_CC_LOWER_L1|_CC_PRINT_L1|_CC_WORDCHAR_L1, @@ -187,7 +187,7 @@ /* U+B3 SUPERSCRIPT THREE */ _CC_GRAPH_L1|_CC_PRINT_L1, /* U+B4 ACUTE ACCENT */ _CC_GRAPH_L1|_CC_PRINT_L1, /* U+B5 MICRO SIGN */ _CC_NONLATIN1_FOLD|_CC_ALNUMC_L1|_CC_ALPHA_L1|_CC_CHARNAME_CONT|_CC_GRAPH_L1|_CC_IDFIRST_L1|_CC_LOWER_L1|_CC_PRINT_L1|_CC_WORDCHAR_L1, -/* U+B6 PILCROW SIGN */ _CC_GRAPH_L1|_CC_PRINT_L1, +/* U+B6 PILCROW SIGN */ _CC_GRAPH_L1|_CC_PRINT_L1|_CC_PUNCT_L1, /* U+B7 MIDDLE DOT */ _CC_GRAPH_L1|_CC_PRINT_L1|_CC_PUNCT_L1, /* U+B8 CEDILLA */ _CC_GRAPH_L1|_CC_PRINT_L1, /* U+B9 SUPERSCRIPT ONE */ _CC_GRAPH_L1|_CC_PRINT_L1, diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm index 074284f5fb..a1f16a99ff 100644 --- a/lib/Unicode/UCD.pm +++ b/lib/Unicode/UCD.pm @@ -2252,20 +2252,56 @@ Devanagari, Gurmukhi, and Oriya scripts. The Name_Alias property is of this form. But each scalar consists of two components: 1) the name, and 2) the type of alias this is. They are -separated by a colon and a space. In Unicode 6.0, there are two alias types: -C<"correction">, which indicates that the name is a corrected form for the -original name (which remains valid) for the same code point; and C<"control">, -which adds a new name for a control character. +separated by a colon and a space. In Unicode 6.1, there are several alias types: + +=over + +=item C<correction> + +indicates that the name is a corrected form for the +original name (which remains valid) for the same code point. + +=item C<control> + +adds a new name for a control character. + +=item C<alternate> + +is an alternate name for a character + +=item C<figment> + +is a name for a character that has been documented but was never in any +actual standard. + +=item C<abbreviation> + +is a common abbreviation for a character + +=back + +The lists are ordered (roughly) so the most preferred names come before less +preferred ones. For example, - @aliases_ranges @alias_maps + @aliases_ranges @alias_maps + ... + 0x009E [ 'PRIVACY MESSAGE: control', 'PM: abbreviation' ] + 0x009F [ 'APPLICATION PROGRAM COMMAND: control', + 'APC: abbreviation' + ] + 0x00A0 'NBSP: abbreviation' + 0x00A1 "" + 0x00AD 'SHY: abbreviation' + 0x00AE "" + 0x01A2 'LATIN CAPITAL LETTER GHA: correction' + 0x01A3 'LATIN SMALL LETTER GHA: correction' + 0x01A4 "" ... - 0x01A2 LATIN CAPITAL LETTER GHA: correction - 0x01A3 LATIN SMALL LETTER GHA: correction -Unicode 6.1 will introduce other types, and some map entries will be lists of -multiple name-alias pairs for a single code point. +A map to the empty string means that there is no alias defined for the code +point. =item C<r> @@ -2409,7 +2445,9 @@ the function L<charnames/charnames::viacode(code)>. Note that for control characters (C<Gc=cc>), Unicode's data files have the string "C<E<lt>controlE<gt>>", but the real name of each of these characters is the empty -string. This function returns that real name, the empty string. +string. This function returns that real name, the empty string. (There are +names for these characters, but they are aliases, not the real name, and are +contained in the C<Name_Alias> property.) =item C<d> @@ -3179,6 +3217,9 @@ To convert from new-style to old-style, follow this recipe: gets the lower end of the range (0th element) and then looks up the old name for its block using C<charblock>). +Note that starting in Unicode 6.1, many of the block names have shorter +synonyms. These are always given in the new style. + =head1 BUGS Does not yet support EBCDIC platforms. diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t index b92dd51e4b..0178eba3af 100644 --- a/lib/Unicode/UCD.t +++ b/lib/Unicode/UCD.t @@ -342,7 +342,7 @@ is($bt->{AL}, 'Right-to-Left Arabic', 'AL is Right-to-Left Arabic'); # If this fails, then maybe one should look at the Unicode changes to see # what else might need to be updated. -is(Unicode::UCD::UnicodeVersion, '6.0.0', 'UnicodeVersion'); +is(Unicode::UCD::UnicodeVersion, '6.1.0', 'UnicodeVersion'); use Unicode::UCD qw(compexcl); @@ -470,7 +470,7 @@ is(Unicode::UCD::_getcode('U+123x'), undef, "_getcode(x123)"); { my $r1 = charscript('Latin'); my $n1 = @$r1; - is($n1, 30, "number of ranges in Latin script (Unicode 6.0.0)"); + is($n1, 30, "number of ranges in Latin script (Unicode 6.1.0)"); shift @$r1 while @$r1; my $r2 = charscript('Latin'); is(@$r2, $n1, "modifying results should not mess up internal caches"); diff --git a/lib/_charnames.pm b/lib/_charnames.pm index 5f64ebf8d3..02dbef056c 100644 --- a/lib/_charnames.pm +++ b/lib/_charnames.pm @@ -65,432 +65,26 @@ $Carp::Internal{ (__PACKAGE__) } = 1; # it alone, but since that is harder for a human to parse, I left it as-is. my %system_aliases = ( - # Synonyms for the icky 3.2 names that have parentheses. - 'LINE FEED' => pack("U", 0x0A), # LINE FEED (LF) - 'FORM FEED' => pack("U", 0x0C), # FORM FEED (FF) - 'CARRIAGE RETURN' => pack("U", 0x0D), # CARRIAGE RETURN (CR) - 'NEXT LINE' => pack("U", 0x85), # NEXT LINE (NEL) - # Some variant names from Wikipedia 'SINGLE-SHIFT 2' => pack("U", 0x8E), 'SINGLE-SHIFT 3' => pack("U", 0x8F), 'PRIVATE USE 1' => pack("U", 0x91), 'PRIVATE USE 2' => pack("U", 0x92), - 'START OF PROTECTED AREA' => pack("U", 0x96), - 'END OF PROTECTED AREA' => pack("U", 0x97), - - # Convenience. Standard abbreviations for the controls - 'NUL' => pack("U", 0x00), # NULL - 'SOH' => pack("U", 0x01), # START OF HEADING - 'STX' => pack("U", 0x02), # START OF TEXT - 'ETX' => pack("U", 0x03), # END OF TEXT - 'EOT' => pack("U", 0x04), # END OF TRANSMISSION - 'ENQ' => pack("U", 0x05), # ENQUIRY - 'ACK' => pack("U", 0x06), # ACKNOWLEDGE - 'BEL' => pack("U", 0x07), # ALERT; formerly BELL - 'BS' => pack("U", 0x08), # BACKSPACE - 'HT' => pack("U", 0x09), # HORIZONTAL TABULATION - 'LF' => pack("U", 0x0A), # LINE FEED (LF) - 'VT' => pack("U", 0x0B), # VERTICAL TABULATION - 'FF' => pack("U", 0x0C), # FORM FEED (FF) - 'CR' => pack("U", 0x0D), # CARRIAGE RETURN (CR) - 'SO' => pack("U", 0x0E), # SHIFT OUT - 'SI' => pack("U", 0x0F), # SHIFT IN - 'DLE' => pack("U", 0x10), # DATA LINK ESCAPE - 'DC1' => pack("U", 0x11), # DEVICE CONTROL ONE - 'DC2' => pack("U", 0x12), # DEVICE CONTROL TWO - 'DC3' => pack("U", 0x13), # DEVICE CONTROL THREE - 'DC4' => pack("U", 0x14), # DEVICE CONTROL FOUR - 'NAK' => pack("U", 0x15), # NEGATIVE ACKNOWLEDGE - 'SYN' => pack("U", 0x16), # SYNCHRONOUS IDLE - 'ETB' => pack("U", 0x17), # END OF TRANSMISSION BLOCK - 'CAN' => pack("U", 0x18), # CANCEL - 'EOM' => pack("U", 0x19), # END OF MEDIUM - 'SUB' => pack("U", 0x1A), # SUBSTITUTE - 'ESC' => pack("U", 0x1B), # ESCAPE - 'FS' => pack("U", 0x1C), # FILE SEPARATOR - 'GS' => pack("U", 0x1D), # GROUP SEPARATOR - 'RS' => pack("U", 0x1E), # RECORD SEPARATOR - 'US' => pack("U", 0x1F), # UNIT SEPARATOR - 'DEL' => pack("U", 0x7F), # DELETE - 'BPH' => pack("U", 0x82), # BREAK PERMITTED HERE - 'NBH' => pack("U", 0x83), # NO BREAK HERE - 'NEL' => pack("U", 0x85), # NEXT LINE (NEL) - 'SSA' => pack("U", 0x86), # START OF SELECTED AREA - 'ESA' => pack("U", 0x87), # END OF SELECTED AREA - 'HTS' => pack("U", 0x88), # CHARACTER TABULATION SET - 'HTJ' => pack("U", 0x89), # CHARACTER TABULATION WITH JUSTIFICATION - 'VTS' => pack("U", 0x8A), # LINE TABULATION SET - 'PLD' => pack("U", 0x8B), # PARTIAL LINE FORWARD - 'PLU' => pack("U", 0x8C), # PARTIAL LINE BACKWARD - 'RI' => pack("U", 0x8D), # REVERSE LINE FEED - 'SS2' => pack("U", 0x8E), # SINGLE SHIFT TWO - 'SS3' => pack("U", 0x8F), # SINGLE SHIFT THREE - 'DCS' => pack("U", 0x90), # DEVICE CONTROL STRING - 'PU1' => pack("U", 0x91), # PRIVATE USE ONE - 'PU2' => pack("U", 0x92), # PRIVATE USE TWO - 'STS' => pack("U", 0x93), # SET TRANSMIT STATE - 'CCH' => pack("U", 0x94), # CANCEL CHARACTER - 'MW' => pack("U", 0x95), # MESSAGE WAITING - 'SPA' => pack("U", 0x96), # START OF GUARDED AREA - 'EPA' => pack("U", 0x97), # END OF GUARDED AREA - 'SOS' => pack("U", 0x98), # START OF STRING - 'SCI' => pack("U", 0x9A), # SINGLE CHARACTER INTRODUCER - 'CSI' => pack("U", 0x9B), # CONTROL SEQUENCE INTRODUCER - 'ST' => pack("U", 0x9C), # STRING TERMINATOR - 'OSC' => pack("U", 0x9D), # OPERATING SYSTEM COMMAND - 'PM' => pack("U", 0x9E), # PRIVACY MESSAGE - 'APC' => pack("U", 0x9F), # APPLICATION PROGRAM COMMAND - - # There are no names for these in the Unicode standard; perhaps should be - # deprecated, but then again there are no alternative names, so am not - # deprecating. And if did, the code would have to change to not recommend - # an alternative for these. - 'PADDING CHARACTER' => pack("U", 0x80), - 'PAD' => pack("U", 0x80), - 'HIGH OCTET PRESET' => pack("U", 0x81), - 'HOP' => pack("U", 0x81), - 'INDEX' => pack("U", 0x84), - 'IND' => pack("U", 0x84), - 'SINGLE GRAPHIC CHARACTER INTRODUCER' => pack("U", 0x99), - 'SGC' => pack("U", 0x99), - - # More convenience. For further convenience, it is suggested some way of - # using the NamesList aliases be implemented, but there are ambiguities in - # NamesList.txt - 'BOM' => pack("U", 0xFEFF), # BYTE ORDER MARK - 'BYTE ORDER MARK'=> pack("U", 0xFEFF), - 'CGJ' => pack("U", 0x034F), # COMBINING GRAPHEME JOINER - 'FVS1' => pack("U", 0x180B), # MONGOLIAN FREE VARIATION SELECTOR ONE - 'FVS2' => pack("U", 0x180C), # MONGOLIAN FREE VARIATION SELECTOR TWO - 'FVS3' => pack("U", 0x180D), # MONGOLIAN FREE VARIATION SELECTOR THREE - 'LRE' => pack("U", 0x202A), # LEFT-TO-RIGHT EMBEDDING - 'LRM' => pack("U", 0x200E), # LEFT-TO-RIGHT MARK - 'LRO' => pack("U", 0x202D), # LEFT-TO-RIGHT OVERRIDE - 'MMSP' => pack("U", 0x205F), # MEDIUM MATHEMATICAL SPACE - 'MVS' => pack("U", 0x180E), # MONGOLIAN VOWEL SEPARATOR - 'NBSP' => pack("U", 0x00A0), # NO-BREAK SPACE - 'NNBSP' => pack("U", 0x202F), # NARROW NO-BREAK SPACE - 'PDF' => pack("U", 0x202C), # POP DIRECTIONAL FORMATTING - 'RLE' => pack("U", 0x202B), # RIGHT-TO-LEFT EMBEDDING - 'RLM' => pack("U", 0x200F), # RIGHT-TO-LEFT MARK - 'RLO' => pack("U", 0x202E), # RIGHT-TO-LEFT OVERRIDE - 'SHY' => pack("U", 0x00AD), # SOFT HYPHEN - 'VS1' => pack("U", 0xFE00), # VARIATION SELECTOR-1 - 'VS2' => pack("U", 0xFE01), # VARIATION SELECTOR-2 - 'VS3' => pack("U", 0xFE02), # VARIATION SELECTOR-3 - 'VS4' => pack("U", 0xFE03), # VARIATION SELECTOR-4 - 'VS5' => pack("U", 0xFE04), # VARIATION SELECTOR-5 - 'VS6' => pack("U", 0xFE05), # VARIATION SELECTOR-6 - 'VS7' => pack("U", 0xFE06), # VARIATION SELECTOR-7 - 'VS8' => pack("U", 0xFE07), # VARIATION SELECTOR-8 - 'VS9' => pack("U", 0xFE08), # VARIATION SELECTOR-9 - 'VS10' => pack("U", 0xFE09), # VARIATION SELECTOR-10 - 'VS11' => pack("U", 0xFE0A), # VARIATION SELECTOR-11 - 'VS12' => pack("U", 0xFE0B), # VARIATION SELECTOR-12 - 'VS13' => pack("U", 0xFE0C), # VARIATION SELECTOR-13 - 'VS14' => pack("U", 0xFE0D), # VARIATION SELECTOR-14 - 'VS15' => pack("U", 0xFE0E), # VARIATION SELECTOR-15 - 'VS16' => pack("U", 0xFE0F), # VARIATION SELECTOR-16 - 'VS17' => pack("U", 0xE0100), # VARIATION SELECTOR-17 - 'VS18' => pack("U", 0xE0101), # VARIATION SELECTOR-18 - 'VS19' => pack("U", 0xE0102), # VARIATION SELECTOR-19 - 'VS20' => pack("U", 0xE0103), # VARIATION SELECTOR-20 - 'VS21' => pack("U", 0xE0104), # VARIATION SELECTOR-21 - 'VS22' => pack("U", 0xE0105), # VARIATION SELECTOR-22 - 'VS23' => pack("U", 0xE0106), # VARIATION SELECTOR-23 - 'VS24' => pack("U", 0xE0107), # VARIATION SELECTOR-24 - 'VS25' => pack("U", 0xE0108), # VARIATION SELECTOR-25 - 'VS26' => pack("U", 0xE0109), # VARIATION SELECTOR-26 - 'VS27' => pack("U", 0xE010A), # VARIATION SELECTOR-27 - 'VS28' => pack("U", 0xE010B), # VARIATION SELECTOR-28 - 'VS29' => pack("U", 0xE010C), # VARIATION SELECTOR-29 - 'VS30' => pack("U", 0xE010D), # VARIATION SELECTOR-30 - 'VS31' => pack("U", 0xE010E), # VARIATION SELECTOR-31 - 'VS32' => pack("U", 0xE010F), # VARIATION SELECTOR-32 - 'VS33' => pack("U", 0xE0110), # VARIATION SELECTOR-33 - 'VS34' => pack("U", 0xE0111), # VARIATION SELECTOR-34 - 'VS35' => pack("U", 0xE0112), # VARIATION SELECTOR-35 - 'VS36' => pack("U", 0xE0113), # VARIATION SELECTOR-36 - 'VS37' => pack("U", 0xE0114), # VARIATION SELECTOR-37 - 'VS38' => pack("U", 0xE0115), # VARIATION SELECTOR-38 - 'VS39' => pack("U", 0xE0116), # VARIATION SELECTOR-39 - 'VS40' => pack("U", 0xE0117), # VARIATION SELECTOR-40 - 'VS41' => pack("U", 0xE0118), # VARIATION SELECTOR-41 - 'VS42' => pack("U", 0xE0119), # VARIATION SELECTOR-42 - 'VS43' => pack("U", 0xE011A), # VARIATION SELECTOR-43 - 'VS44' => pack("U", 0xE011B), # VARIATION SELECTOR-44 - 'VS45' => pack("U", 0xE011C), # VARIATION SELECTOR-45 - 'VS46' => pack("U", 0xE011D), # VARIATION SELECTOR-46 - 'VS47' => pack("U", 0xE011E), # VARIATION SELECTOR-47 - 'VS48' => pack("U", 0xE011F), # VARIATION SELECTOR-48 - 'VS49' => pack("U", 0xE0120), # VARIATION SELECTOR-49 - 'VS50' => pack("U", 0xE0121), # VARIATION SELECTOR-50 - 'VS51' => pack("U", 0xE0122), # VARIATION SELECTOR-51 - 'VS52' => pack("U", 0xE0123), # VARIATION SELECTOR-52 - 'VS53' => pack("U", 0xE0124), # VARIATION SELECTOR-53 - 'VS54' => pack("U", 0xE0125), # VARIATION SELECTOR-54 - 'VS55' => pack("U", 0xE0126), # VARIATION SELECTOR-55 - 'VS56' => pack("U", 0xE0127), # VARIATION SELECTOR-56 - 'VS57' => pack("U", 0xE0128), # VARIATION SELECTOR-57 - 'VS58' => pack("U", 0xE0129), # VARIATION SELECTOR-58 - 'VS59' => pack("U", 0xE012A), # VARIATION SELECTOR-59 - 'VS60' => pack("U", 0xE012B), # VARIATION SELECTOR-60 - 'VS61' => pack("U", 0xE012C), # VARIATION SELECTOR-61 - 'VS62' => pack("U", 0xE012D), # VARIATION SELECTOR-62 - 'VS63' => pack("U", 0xE012E), # VARIATION SELECTOR-63 - 'VS64' => pack("U", 0xE012F), # VARIATION SELECTOR-64 - 'VS65' => pack("U", 0xE0130), # VARIATION SELECTOR-65 - 'VS66' => pack("U", 0xE0131), # VARIATION SELECTOR-66 - 'VS67' => pack("U", 0xE0132), # VARIATION SELECTOR-67 - 'VS68' => pack("U", 0xE0133), # VARIATION SELECTOR-68 - 'VS69' => pack("U", 0xE0134), # VARIATION SELECTOR-69 - 'VS70' => pack("U", 0xE0135), # VARIATION SELECTOR-70 - 'VS71' => pack("U", 0xE0136), # VARIATION SELECTOR-71 - 'VS72' => pack("U", 0xE0137), # VARIATION SELECTOR-72 - 'VS73' => pack("U", 0xE0138), # VARIATION SELECTOR-73 - 'VS74' => pack("U", 0xE0139), # VARIATION SELECTOR-74 - 'VS75' => pack("U", 0xE013A), # VARIATION SELECTOR-75 - 'VS76' => pack("U", 0xE013B), # VARIATION SELECTOR-76 - 'VS77' => pack("U", 0xE013C), # VARIATION SELECTOR-77 - 'VS78' => pack("U", 0xE013D), # VARIATION SELECTOR-78 - 'VS79' => pack("U", 0xE013E), # VARIATION SELECTOR-79 - 'VS80' => pack("U", 0xE013F), # VARIATION SELECTOR-80 - 'VS81' => pack("U", 0xE0140), # VARIATION SELECTOR-81 - 'VS82' => pack("U", 0xE0141), # VARIATION SELECTOR-82 - 'VS83' => pack("U", 0xE0142), # VARIATION SELECTOR-83 - 'VS84' => pack("U", 0xE0143), # VARIATION SELECTOR-84 - 'VS85' => pack("U", 0xE0144), # VARIATION SELECTOR-85 - 'VS86' => pack("U", 0xE0145), # VARIATION SELECTOR-86 - 'VS87' => pack("U", 0xE0146), # VARIATION SELECTOR-87 - 'VS88' => pack("U", 0xE0147), # VARIATION SELECTOR-88 - 'VS89' => pack("U", 0xE0148), # VARIATION SELECTOR-89 - 'VS90' => pack("U", 0xE0149), # VARIATION SELECTOR-90 - 'VS91' => pack("U", 0xE014A), # VARIATION SELECTOR-91 - 'VS92' => pack("U", 0xE014B), # VARIATION SELECTOR-92 - 'VS93' => pack("U", 0xE014C), # VARIATION SELECTOR-93 - 'VS94' => pack("U", 0xE014D), # VARIATION SELECTOR-94 - 'VS95' => pack("U", 0xE014E), # VARIATION SELECTOR-95 - 'VS96' => pack("U", 0xE014F), # VARIATION SELECTOR-96 - 'VS97' => pack("U", 0xE0150), # VARIATION SELECTOR-97 - 'VS98' => pack("U", 0xE0151), # VARIATION SELECTOR-98 - 'VS99' => pack("U", 0xE0152), # VARIATION SELECTOR-99 - 'VS100' => pack("U", 0xE0153), # VARIATION SELECTOR-100 - 'VS101' => pack("U", 0xE0154), # VARIATION SELECTOR-101 - 'VS102' => pack("U", 0xE0155), # VARIATION SELECTOR-102 - 'VS103' => pack("U", 0xE0156), # VARIATION SELECTOR-103 - 'VS104' => pack("U", 0xE0157), # VARIATION SELECTOR-104 - 'VS105' => pack("U", 0xE0158), # VARIATION SELECTOR-105 - 'VS106' => pack("U", 0xE0159), # VARIATION SELECTOR-106 - 'VS107' => pack("U", 0xE015A), # VARIATION SELECTOR-107 - 'VS108' => pack("U", 0xE015B), # VARIATION SELECTOR-108 - 'VS109' => pack("U", 0xE015C), # VARIATION SELECTOR-109 - 'VS110' => pack("U", 0xE015D), # VARIATION SELECTOR-110 - 'VS111' => pack("U", 0xE015E), # VARIATION SELECTOR-111 - 'VS112' => pack("U", 0xE015F), # VARIATION SELECTOR-112 - 'VS113' => pack("U", 0xE0160), # VARIATION SELECTOR-113 - 'VS114' => pack("U", 0xE0161), # VARIATION SELECTOR-114 - 'VS115' => pack("U", 0xE0162), # VARIATION SELECTOR-115 - 'VS116' => pack("U", 0xE0163), # VARIATION SELECTOR-116 - 'VS117' => pack("U", 0xE0164), # VARIATION SELECTOR-117 - 'VS118' => pack("U", 0xE0165), # VARIATION SELECTOR-118 - 'VS119' => pack("U", 0xE0166), # VARIATION SELECTOR-119 - 'VS120' => pack("U", 0xE0167), # VARIATION SELECTOR-120 - 'VS121' => pack("U", 0xE0168), # VARIATION SELECTOR-121 - 'VS122' => pack("U", 0xE0169), # VARIATION SELECTOR-122 - 'VS123' => pack("U", 0xE016A), # VARIATION SELECTOR-123 - 'VS124' => pack("U", 0xE016B), # VARIATION SELECTOR-124 - 'VS125' => pack("U", 0xE016C), # VARIATION SELECTOR-125 - 'VS126' => pack("U", 0xE016D), # VARIATION SELECTOR-126 - 'VS127' => pack("U", 0xE016E), # VARIATION SELECTOR-127 - 'VS128' => pack("U", 0xE016F), # VARIATION SELECTOR-128 - 'VS129' => pack("U", 0xE0170), # VARIATION SELECTOR-129 - 'VS130' => pack("U", 0xE0171), # VARIATION SELECTOR-130 - 'VS131' => pack("U", 0xE0172), # VARIATION SELECTOR-131 - 'VS132' => pack("U", 0xE0173), # VARIATION SELECTOR-132 - 'VS133' => pack("U", 0xE0174), # VARIATION SELECTOR-133 - 'VS134' => pack("U", 0xE0175), # VARIATION SELECTOR-134 - 'VS135' => pack("U", 0xE0176), # VARIATION SELECTOR-135 - 'VS136' => pack("U", 0xE0177), # VARIATION SELECTOR-136 - 'VS137' => pack("U", 0xE0178), # VARIATION SELECTOR-137 - 'VS138' => pack("U", 0xE0179), # VARIATION SELECTOR-138 - 'VS139' => pack("U", 0xE017A), # VARIATION SELECTOR-139 - 'VS140' => pack("U", 0xE017B), # VARIATION SELECTOR-140 - 'VS141' => pack("U", 0xE017C), # VARIATION SELECTOR-141 - 'VS142' => pack("U", 0xE017D), # VARIATION SELECTOR-142 - 'VS143' => pack("U", 0xE017E), # VARIATION SELECTOR-143 - 'VS144' => pack("U", 0xE017F), # VARIATION SELECTOR-144 - 'VS145' => pack("U", 0xE0180), # VARIATION SELECTOR-145 - 'VS146' => pack("U", 0xE0181), # VARIATION SELECTOR-146 - 'VS147' => pack("U", 0xE0182), # VARIATION SELECTOR-147 - 'VS148' => pack("U", 0xE0183), # VARIATION SELECTOR-148 - 'VS149' => pack("U", 0xE0184), # VARIATION SELECTOR-149 - 'VS150' => pack("U", 0xE0185), # VARIATION SELECTOR-150 - 'VS151' => pack("U", 0xE0186), # VARIATION SELECTOR-151 - 'VS152' => pack("U", 0xE0187), # VARIATION SELECTOR-152 - 'VS153' => pack("U", 0xE0188), # VARIATION SELECTOR-153 - 'VS154' => pack("U", 0xE0189), # VARIATION SELECTOR-154 - 'VS155' => pack("U", 0xE018A), # VARIATION SELECTOR-155 - 'VS156' => pack("U", 0xE018B), # VARIATION SELECTOR-156 - 'VS157' => pack("U", 0xE018C), # VARIATION SELECTOR-157 - 'VS158' => pack("U", 0xE018D), # VARIATION SELECTOR-158 - 'VS159' => pack("U", 0xE018E), # VARIATION SELECTOR-159 - 'VS160' => pack("U", 0xE018F), # VARIATION SELECTOR-160 - 'VS161' => pack("U", 0xE0190), # VARIATION SELECTOR-161 - 'VS162' => pack("U", 0xE0191), # VARIATION SELECTOR-162 - 'VS163' => pack("U", 0xE0192), # VARIATION SELECTOR-163 - 'VS164' => pack("U", 0xE0193), # VARIATION SELECTOR-164 - 'VS165' => pack("U", 0xE0194), # VARIATION SELECTOR-165 - 'VS166' => pack("U", 0xE0195), # VARIATION SELECTOR-166 - 'VS167' => pack("U", 0xE0196), # VARIATION SELECTOR-167 - 'VS168' => pack("U", 0xE0197), # VARIATION SELECTOR-168 - 'VS169' => pack("U", 0xE0198), # VARIATION SELECTOR-169 - 'VS170' => pack("U", 0xE0199), # VARIATION SELECTOR-170 - 'VS171' => pack("U", 0xE019A), # VARIATION SELECTOR-171 - 'VS172' => pack("U", 0xE019B), # VARIATION SELECTOR-172 - 'VS173' => pack("U", 0xE019C), # VARIATION SELECTOR-173 - 'VS174' => pack("U", 0xE019D), # VARIATION SELECTOR-174 - 'VS175' => pack("U", 0xE019E), # VARIATION SELECTOR-175 - 'VS176' => pack("U", 0xE019F), # VARIATION SELECTOR-176 - 'VS177' => pack("U", 0xE01A0), # VARIATION SELECTOR-177 - 'VS178' => pack("U", 0xE01A1), # VARIATION SELECTOR-178 - 'VS179' => pack("U", 0xE01A2), # VARIATION SELECTOR-179 - 'VS180' => pack("U", 0xE01A3), # VARIATION SELECTOR-180 - 'VS181' => pack("U", 0xE01A4), # VARIATION SELECTOR-181 - 'VS182' => pack("U", 0xE01A5), # VARIATION SELECTOR-182 - 'VS183' => pack("U", 0xE01A6), # VARIATION SELECTOR-183 - 'VS184' => pack("U", 0xE01A7), # VARIATION SELECTOR-184 - 'VS185' => pack("U", 0xE01A8), # VARIATION SELECTOR-185 - 'VS186' => pack("U", 0xE01A9), # VARIATION SELECTOR-186 - 'VS187' => pack("U", 0xE01AA), # VARIATION SELECTOR-187 - 'VS188' => pack("U", 0xE01AB), # VARIATION SELECTOR-188 - 'VS189' => pack("U", 0xE01AC), # VARIATION SELECTOR-189 - 'VS190' => pack("U", 0xE01AD), # VARIATION SELECTOR-190 - 'VS191' => pack("U", 0xE01AE), # VARIATION SELECTOR-191 - 'VS192' => pack("U", 0xE01AF), # VARIATION SELECTOR-192 - 'VS193' => pack("U", 0xE01B0), # VARIATION SELECTOR-193 - 'VS194' => pack("U", 0xE01B1), # VARIATION SELECTOR-194 - 'VS195' => pack("U", 0xE01B2), # VARIATION SELECTOR-195 - 'VS196' => pack("U", 0xE01B3), # VARIATION SELECTOR-196 - 'VS197' => pack("U", 0xE01B4), # VARIATION SELECTOR-197 - 'VS198' => pack("U", 0xE01B5), # VARIATION SELECTOR-198 - 'VS199' => pack("U", 0xE01B6), # VARIATION SELECTOR-199 - 'VS200' => pack("U", 0xE01B7), # VARIATION SELECTOR-200 - 'VS201' => pack("U", 0xE01B8), # VARIATION SELECTOR-201 - 'VS202' => pack("U", 0xE01B9), # VARIATION SELECTOR-202 - 'VS203' => pack("U", 0xE01BA), # VARIATION SELECTOR-203 - 'VS204' => pack("U", 0xE01BB), # VARIATION SELECTOR-204 - 'VS205' => pack("U", 0xE01BC), # VARIATION SELECTOR-205 - 'VS206' => pack("U", 0xE01BD), # VARIATION SELECTOR-206 - 'VS207' => pack("U", 0xE01BE), # VARIATION SELECTOR-207 - 'VS208' => pack("U", 0xE01BF), # VARIATION SELECTOR-208 - 'VS209' => pack("U", 0xE01C0), # VARIATION SELECTOR-209 - 'VS210' => pack("U", 0xE01C1), # VARIATION SELECTOR-210 - 'VS211' => pack("U", 0xE01C2), # VARIATION SELECTOR-211 - 'VS212' => pack("U", 0xE01C3), # VARIATION SELECTOR-212 - 'VS213' => pack("U", 0xE01C4), # VARIATION SELECTOR-213 - 'VS214' => pack("U", 0xE01C5), # VARIATION SELECTOR-214 - 'VS215' => pack("U", 0xE01C6), # VARIATION SELECTOR-215 - 'VS216' => pack("U", 0xE01C7), # VARIATION SELECTOR-216 - 'VS217' => pack("U", 0xE01C8), # VARIATION SELECTOR-217 - 'VS218' => pack("U", 0xE01C9), # VARIATION SELECTOR-218 - 'VS219' => pack("U", 0xE01CA), # VARIATION SELECTOR-219 - 'VS220' => pack("U", 0xE01CB), # VARIATION SELECTOR-220 - 'VS221' => pack("U", 0xE01CC), # VARIATION SELECTOR-221 - 'VS222' => pack("U", 0xE01CD), # VARIATION SELECTOR-222 - 'VS223' => pack("U", 0xE01CE), # VARIATION SELECTOR-223 - 'VS224' => pack("U", 0xE01CF), # VARIATION SELECTOR-224 - 'VS225' => pack("U", 0xE01D0), # VARIATION SELECTOR-225 - 'VS226' => pack("U", 0xE01D1), # VARIATION SELECTOR-226 - 'VS227' => pack("U", 0xE01D2), # VARIATION SELECTOR-227 - 'VS228' => pack("U", 0xE01D3), # VARIATION SELECTOR-228 - 'VS229' => pack("U", 0xE01D4), # VARIATION SELECTOR-229 - 'VS230' => pack("U", 0xE01D5), # VARIATION SELECTOR-230 - 'VS231' => pack("U", 0xE01D6), # VARIATION SELECTOR-231 - 'VS232' => pack("U", 0xE01D7), # VARIATION SELECTOR-232 - 'VS233' => pack("U", 0xE01D8), # VARIATION SELECTOR-233 - 'VS234' => pack("U", 0xE01D9), # VARIATION SELECTOR-234 - 'VS235' => pack("U", 0xE01DA), # VARIATION SELECTOR-235 - 'VS236' => pack("U", 0xE01DB), # VARIATION SELECTOR-236 - 'VS237' => pack("U", 0xE01DC), # VARIATION SELECTOR-237 - 'VS238' => pack("U", 0xE01DD), # VARIATION SELECTOR-238 - 'VS239' => pack("U", 0xE01DE), # VARIATION SELECTOR-239 - 'VS240' => pack("U", 0xE01DF), # VARIATION SELECTOR-240 - 'VS241' => pack("U", 0xE01E0), # VARIATION SELECTOR-241 - 'VS242' => pack("U", 0xE01E1), # VARIATION SELECTOR-242 - 'VS243' => pack("U", 0xE01E2), # VARIATION SELECTOR-243 - 'VS244' => pack("U", 0xE01E3), # VARIATION SELECTOR-244 - 'VS245' => pack("U", 0xE01E4), # VARIATION SELECTOR-245 - 'VS246' => pack("U", 0xE01E5), # VARIATION SELECTOR-246 - 'VS247' => pack("U", 0xE01E6), # VARIATION SELECTOR-247 - 'VS248' => pack("U", 0xE01E7), # VARIATION SELECTOR-248 - 'VS249' => pack("U", 0xE01E8), # VARIATION SELECTOR-249 - 'VS250' => pack("U", 0xE01E9), # VARIATION SELECTOR-250 - 'VS251' => pack("U", 0xE01EA), # VARIATION SELECTOR-251 - 'VS252' => pack("U", 0xE01EB), # VARIATION SELECTOR-252 - 'VS253' => pack("U", 0xE01EC), # VARIATION SELECTOR-253 - 'VS254' => pack("U", 0xE01ED), # VARIATION SELECTOR-254 - 'VS255' => pack("U", 0xE01EE), # VARIATION SELECTOR-255 - 'VS256' => pack("U", 0xE01EF), # VARIATION SELECTOR-256 - 'WJ' => pack("U", 0x2060), # WORD JOINER - 'ZWJ' => pack("U", 0x200D), # ZERO WIDTH JOINER - 'ZWNJ' => pack("U", 0x200C), # ZERO WIDTH NON-JOINER - 'ZWSP' => pack("U", 0x200B), # ZERO WIDTH SPACE ); # These are the aliases above that differ under :loose and :full matching # because the :full versions have blanks or hyphens in them. -my %loose_system_aliases = ( - 'LINEFEED' => pack("U", 0x0A), - 'FORMFEED' => pack("U", 0x0C), - 'CARRIAGERETURN' => pack("U", 0x0D), - 'NEXTLINE' => pack("U", 0x85), - 'SINGLESHIFT2' => pack("U", 0x8E), - 'SINGLESHIFT3' => pack("U", 0x8F), - 'PRIVATEUSE1' => pack("U", 0x91), - 'PRIVATEUSE2' => pack("U", 0x92), - 'STARTOFPROTECTEDAREA' => pack("U", 0x96), - 'ENDOFPROTECTEDAREA' => pack("U", 0x97), - 'PADDINGCHARACTER' => pack("U", 0x80), - 'HIGHOCTETPRESET' => pack("U", 0x81), - 'SINGLEGRAPHICCHARACTERINTRODUCER' => pack("U", 0x99), - 'BYTEORDERMARK' => pack("U", 0xFEFF), -); +#my %loose_system_aliases = ( +#); my %deprecated_aliases = ( - # Pre-3.2 compatibility (only for the first 256 characters). # Use of these gives deprecated message. - 'HORIZONTAL TABULATION' => pack("U", 0x09), # CHARACTER TABULATION - 'VERTICAL TABULATION' => pack("U", 0x0B), # LINE TABULATION - 'FILE SEPARATOR' => pack("U", 0x1C), # INFORMATION SEPARATOR FOUR - 'GROUP SEPARATOR' => pack("U", 0x1D), # INFORMATION SEPARATOR THREE - 'RECORD SEPARATOR' => pack("U", 0x1E), # INFORMATION SEPARATOR TWO - 'UNIT SEPARATOR' => pack("U", 0x1F), # INFORMATION SEPARATOR ONE - 'HORIZONTAL TABULATION SET' => pack("U", 0x88), # CHARACTER TABULATION SET - 'HORIZONTAL TABULATION WITH JUSTIFICATION' => pack("U", 0x89), # CHARACTER TABULATION WITH JUSTIFICATION - 'PARTIAL LINE DOWN' => pack("U", 0x8B), # PARTIAL LINE FORWARD - 'PARTIAL LINE UP' => pack("U", 0x8C), # PARTIAL LINE BACKWARD - 'VERTICAL TABULATION SET' => pack("U", 0x8A), # LINE TABULATION SET - 'REVERSE INDEX' => pack("U", 0x8D), # REVERSE LINE FEED - # Unicode 6.0 co-opted this for U+1F514, so deprecate it for now. 'BELL' => pack("U", 0x07), ); -my %loose_deprecated_aliases = ( - 'HORIZONTALTABULATION' => pack("U", 0x09), - 'VERTICALTABULATION' => pack("U", 0x0B), - 'FILESEPARATOR' => pack("U", 0x1C), - 'GROUPSEPARATOR' => pack("U", 0x1D), - 'RECORDSEPARATOR' => pack("U", 0x1E), - 'UNITSEPARATOR' => pack("U", 0x1F), - 'HORIZONTALTABULATIONSET' => pack("U", 0x88), - 'HORIZONTALTABULATIONWITHJUSTIFICATION' => pack("U", 0x89), - 'PARTIALLINEDOWN' => pack("U", 0x8B), - 'PARTIALLINEUP' => pack("U", 0x8C), - 'VERTICALTABULATIONSET' => pack("U", 0x8A), - 'REVERSEINDEX' => pack("U", 0x8D), -); +#my %loose_deprecated_aliases = ( +#); # These are special cased in :loose matching, differing only in a medial # hyphen @@ -720,10 +314,13 @@ sub lookup_name ($$$) { if (exists $system_aliases{$lookup_name}) { $utf8 = $system_aliases{$lookup_name}; } - elsif ($loose && exists $loose_system_aliases{$lookup_name}) { - $utf8 = $loose_system_aliases{$lookup_name}; - } - elsif (exists $deprecated_aliases{$lookup_name}) { + # There are currently no entries in this hash, so don't waste time looking + # for them. But the code is retained for the unlikely possibility that + # some will be added in the future. +# elsif ($loose && exists $loose_system_aliases{$lookup_name}) { +# $utf8 = $loose_system_aliases{$lookup_name}; +# } + if (exists $deprecated_aliases{$lookup_name}) { require warnings; warnings::warnif('deprecated', "Unicode character name \"$name\" is deprecated, use \"" @@ -731,14 +328,17 @@ sub lookup_name ($$$) { . "\" instead"); $utf8 = $deprecated_aliases{$lookup_name}; } - elsif ($loose && exists $loose_deprecated_aliases{$lookup_name}) { - require warnings; - warnings::warnif('deprecated', - "Unicode character name \"$name\" is deprecated, use \"" - . viacode(ord $loose_deprecated_aliases{$lookup_name}) - . "\" instead"); - $utf8 = $loose_deprecated_aliases{$lookup_name}; - } + # There are currently no entries in this hash, so don't waste time looking + # for them. But the code is retained for the unlikely possibility that + # some will be added in the future. +# elsif ($loose && exists $loose_deprecated_aliases{$lookup_name}) { +# require warnings; +# warnings::warnif('deprecated', +# "Unicode character name \"$name\" is deprecated, use \"" +# . viacode(ord $loose_deprecated_aliases{$lookup_name}) +# . "\" instead"); +# $utf8 = $loose_deprecated_aliases{$lookup_name}; +# } } my @off; # Offsets into table of pattern match begin and end @@ -1099,6 +699,8 @@ sub viacode { return $viacode{$hex} if exists $viacode{$hex}; + my $return; + # If the code point is above the max in the table, there's no point # looking through it. Checking the length first is slightly faster if (length($hex) <= 5 || CORE::hex($hex) <= 0x10FFFF) { @@ -1119,20 +721,34 @@ sub viacode { # The name starts with the next character and goes up to the # next new-line. Using capturing parentheses above instead of # @+ more than doubles the execution time in Perl 5.13 - $viacode{$hex} = substr($txt, $+[0], index($txt, "\n", $+[0]) - $+[0]); - return $viacode{$hex}; + $return = substr($txt, $+[0], index($txt, "\n", $+[0]) - $+[0]); + + # If not one of these 4 code points, return what we've found. + if ($hex !~ / ^ 000 (?: 8[014] | 99 ) $ /x) { + $viacode{$hex} = $return; + return $return; + } + + # For backwards compatibility, we don't return the official name of + # the 4 code points if there are user-defined aliases for them -- so + # continue looking. } } # See if there is a user name for it, before giving up completely. # First get the scoped aliases, give up if have none. my $H_ref = (caller(1))[10]; - return if ! defined $H_ref - || ! exists $H_ref->{charnames_stringified_inverse_ords}; + return if ! defined $return + && (! defined $H_ref + || ! exists $H_ref->{charnames_stringified_inverse_ords}); my %code_point_aliases = split ',', $H_ref->{charnames_stringified_inverse_ords}; if (! exists $code_point_aliases{$hex}) { + + # If there is an official alias, and no user-defined one, return that + return $return if defined $return; + if (CORE::hex($hex) > 0x10FFFF) { carp "Unicode characters only allocated up to U+10FFFF (you asked for U+$hex)"; } diff --git a/lib/charnames.pm b/lib/charnames.pm index 534ed5cd0a..07c1b70cdf 100644 --- a/lib/charnames.pm +++ b/lib/charnames.pm @@ -211,13 +211,8 @@ use variables inside the C<\N{...}>. If you want similar run-time functionality, use L<charnames::string_vianame()|/charnames::string_vianame(I<name>)>. -For the C0 and C1 control characters (U+0000..U+001F, U+0080..U+009F) -there are no official Unicode names but you can use instead the ISO 6429 -names (LINE FEED, ESCAPE, and so forth, and their abbreviations, LF, -ESC, ...). In Unicode 3.2 (as of Perl 5.8) some naming changes took -place, and ISO 6429 was updated, see L</ALIASES>. Since Unicode 6.0, it -is deprecated to use C<BELL>. Instead use C<ALERT> (but C<BEL> will continue -to work). +Since Unicode 6.0, it is deprecated to use C<BELL>. Instead use C<ALERT> (but +C<BEL> will continue to work). If the input name is unknown, C<\N{NAME}> raises a warning and substitutes the Unicode REPLACEMENT CHARACTER (U+FFFD). @@ -258,104 +253,15 @@ string_vianame(), since C<\N{...}> look-ups are done at compile time. =head1 ALIASES -A few aliases have been defined for convenience; instead of having -to use the official names, - - LINE FEED (LF) - FORM FEED (FF) - CARRIAGE RETURN (CR) - NEXT LINE (NEL) - -(yes, with parentheses), one can use - - LINE FEED - FORM FEED - CARRIAGE RETURN - NEXT LINE - LF - FF - CR - NEL - -All the other standard abbreviations for the controls, such as C<ACK> for -C<ACKNOWLEDGE> also can be used. - -One can also use - - BYTE ORDER MARK - BOM - -and these abbreviations - - Abbreviation Full Name - - CGJ COMBINING GRAPHEME JOINER - FVS1 MONGOLIAN FREE VARIATION SELECTOR ONE - FVS2 MONGOLIAN FREE VARIATION SELECTOR TWO - FVS3 MONGOLIAN FREE VARIATION SELECTOR THREE - LRE LEFT-TO-RIGHT EMBEDDING - LRM LEFT-TO-RIGHT MARK - LRO LEFT-TO-RIGHT OVERRIDE - MMSP MEDIUM MATHEMATICAL SPACE - MVS MONGOLIAN VOWEL SEPARATOR - NBSP NO-BREAK SPACE - NNBSP NARROW NO-BREAK SPACE - PDF POP DIRECTIONAL FORMATTING - RLE RIGHT-TO-LEFT EMBEDDING - RLM RIGHT-TO-LEFT MARK - RLO RIGHT-TO-LEFT OVERRIDE - SHY SOFT HYPHEN - VS1 VARIATION SELECTOR-1 - . - . - . - VS256 VARIATION SELECTOR-256 - WJ WORD JOINER - ZWJ ZERO WIDTH JOINER - ZWNJ ZERO WIDTH NON-JOINER - ZWSP ZERO WIDTH SPACE - -For backward compatibility one can use the old names for -certain C0 and C1 controls - - old new - - FILE SEPARATOR INFORMATION SEPARATOR FOUR - GROUP SEPARATOR INFORMATION SEPARATOR THREE - HORIZONTAL TABULATION CHARACTER TABULATION - HORIZONTAL TABULATION SET CHARACTER TABULATION SET - HORIZONTAL TABULATION WITH JUSTIFICATION CHARACTER TABULATION - WITH JUSTIFICATION - PARTIAL LINE DOWN PARTIAL LINE FORWARD - PARTIAL LINE UP PARTIAL LINE BACKWARD - RECORD SEPARATOR INFORMATION SEPARATOR TWO - REVERSE INDEX REVERSE LINE FEED - UNIT SEPARATOR INFORMATION SEPARATOR ONE - VERTICAL TABULATION LINE TABULATION - VERTICAL TABULATION SET LINE TABULATION SET - -but the old names in addition to giving the character -will also give a warning about being deprecated. - -And finally, certain published variants are usable, including some for -controls that have no Unicode names: - - name character - - END OF PROTECTED AREA END OF GUARDED AREA, U+0097 - HIGH OCTET PRESET U+0081 - HOP U+0081 - IND U+0084 - INDEX U+0084 - PAD U+0080 - PADDING CHARACTER U+0080 - PRIVATE USE 1 PRIVATE USE ONE, U+0091 - PRIVATE USE 2 PRIVATE USE TWO, U+0092 - SGC U+0099 - SINGLE GRAPHIC CHARACTER INTRODUCER U+0099 - SINGLE-SHIFT 2 SINGLE SHIFT TWO, U+008E - SINGLE-SHIFT 3 SINGLE SHIFT THREE, U+008F - START OF PROTECTED AREA START OF GUARDED AREA, U+0096 +Starting in Unicode 6.1 and Perl v5.16, Unicode defines many abbreviations and +names that were formerly Perl extensions, and some additional ones that Perl +did not previously accept. The list is getting too long to reproduce here, +but you can get the complete list from the Unicode web site: +L<http://www.unicode.org/Public/UNIDATA/NameAliases.txt>. + +Earlier versions of Perl accepted almost all the 6.1 names. These were most +extensively documented in the v5.14 version of this pod: +L<http://perldoc.perl.org/5.14.0/charnames.html#ALIASES>. =head1 CUSTOM ALIASES @@ -434,8 +340,13 @@ prints "FOUR TEARDROP-SPOKED ASTERISK". The name returned is the official name for the code point, if available; otherwise your custom alias for it. This means that your alias will only be returned for code points that don't have an official -Unicode name (nor a Unicode version 1 name), such as private use code -points, and the 4 control characters U+0080, U+0081, U+0084, and U+0099. +Unicode name (nor alias) such as private use code points. +Until Unicode 6.1, the 4 control characters U+0080, U+0081, U+0084, and U+0099 +did not have names (actually, to be precise they still don't, but they do have +aliases, which for most purposes are indistiunguishable from true names). +To preserve backwards compatibility, any alias you define for these code +points will be returned by this function, in preference to the official alias. + If you define more than one name for the code point, it is indeterminate which one will be returned. diff --git a/lib/charnames.t b/lib/charnames.t index 4686b8127c..9d37daa58c 100644 --- a/lib/charnames.t +++ b/lib/charnames.t @@ -292,8 +292,8 @@ is("\N{BOM}", chr(0xFEFF), 'Verify "\N{BOM}" is correct'); is("\N{HORIZONTAL TABULATION}", "\t", 'Verify "\N{HORIZONTAL TABULATION}" eq "\t"'); - my $ok = grep { /"HORIZONTAL TABULATION" is deprecated.*"CHARACTER TABULATION"/ } @WARN; - ok($ok, '... and that gives deprecated warning'); + my $ok = ! grep { /"HORIZONTAL TABULATION" is deprecated.*"CHARACTER TABULATION"/ } @WARN; + ok($ok, '... and doesnt give deprecated warning'); # XXX These tests should be changed for 5.16, when we convert BELL to the # Unicode version. @@ -444,9 +444,13 @@ is(charnames::viacode("U+00000000000FEED"), "ARABIC LETTER WAW ISOLATED FORM", ' is("\N{VERTICAL TABULATION SET}", "\N{LINE TABULATION SET}", 'Verify "\N{VERTICAL TABULATION SET}" eq "\N{LINE TABULATION SET}"'); is("\N{REVERSE INDEX}", "\N{REVERSE LINE FEED}", 'Verify "\N{REVERSE INDEX}" eq "\N{REVERSE LINE FEED}"'); is("\N{SINGLE-SHIFT 2}", "\N{SINGLE SHIFT TWO}", 'Verify "\N{SINGLE-SHIFT 2}" eq "\N{SINGLE SHIFT TWO}"'); + is("\N{SINGLE-SHIFT-2}", "\N{SINGLE-SHIFT 2}", 'Verify "\N{SINGLE-SHIFT-2}" eq "\N{SINGLE SHIFT 2}"'); is("\N{SINGLE-SHIFT 3}", "\N{SINGLE SHIFT THREE}", 'Verify "\N{SINGLE-SHIFT 3}" eq "\N{SINGLE SHIFT THREE}"'); + is("\N{SINGLE-SHIFT-3}", "\N{SINGLE-SHIFT 3}", 'Verify "\N{SINGLE-SHIFT-3}" eq "\N{SINGLE SHIFT 3}"'); is("\N{PRIVATE USE 1}", "\N{PRIVATE USE ONE}", 'Verify "\N{PRIVATE USE 1}" eq "\N{PRIVATE USE ONE}"'); + is("\N{PRIVATE USE-1}", "\N{PRIVATE USE 1}", 'Verify "\N{PRIVATE USE-1}" eq "\N{PRIVATE USE 1}"'); is("\N{PRIVATE USE 2}", "\N{PRIVATE USE TWO}", 'Verify "\N{PRIVATE USE 2}" eq "\N{PRIVATE USE TWO}"'); + is("\N{PRIVATE USE-2}", "\N{PRIVATE USE 2}", 'Verify "\N{PRIVATE USE-2}" eq "\N{PRIVATE USE 2}"'); is("\N{START OF PROTECTED AREA}", "\N{START OF GUARDED AREA}", 'Verify "\N{START OF PROTECTED AREA}" eq "\N{START OF GUARDED AREA}"'); is("\N{END OF PROTECTED AREA}", "\N{END OF GUARDED AREA}", 'Verify "\N{END OF PROTECTED AREA}" eq "\N{END OF GUARDED AREA}"'); is("\N{VS1}", "\N{VARIATION SELECTOR-1}", 'Verify "\N{VS1}" eq "\N{VARIATION SELECTOR-1}"'); @@ -1056,7 +1060,7 @@ is("\N{U+1D0C5}", "\N{BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS}", 'V chomp; s/^\s*#.*//; next unless $_; - my ($hex, $name) = split ";"; + my ($hex, $name, $type) = split ";"; my $i = CORE::hex $hex; # Make sure that both aliases (the one in UnicodeData, and the one we @@ -1070,7 +1074,7 @@ is("\N{U+1D0C5}", "\N{BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS}", 'V # aliases for the same code point, and viacode should return only the # final one. So don't do it here; instead rely on the loop below to # pick up the test. - $names[$i] = $name; + $names[$i] = $name if $type eq 'correction'; } close $fh; @@ -1131,6 +1135,11 @@ is("\N{U+1D0C5}", "\N{BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS}", 'V my $hex = sprintf("%04X", $i); if (! $names[$i]) { + # These four code points now have names, from NameAlias, but + # aren't listed as having names in UnicodeData.txt, so viacode + # returns their alias names, not undef + next if $i == 0x80 || $i == 0x81 || $i == 0x84 || $i == 0x99; + # If there is no name for this code point, all we can # test is that. $all_pass &= ok(! defined charnames::viacode($i), "Verify viacode(0x$hex) is undefined"); diff --git a/lib/unicore/ArabicShaping.txt b/lib/unicore/ArabicShaping.txt index bab6a11f9a..35e79f6e1f 100644 --- a/lib/unicore/ArabicShaping.txt +++ b/lib/unicore/ArabicShaping.txt @@ -1,27 +1,30 @@ -# ArabicShaping-6.0.0.txt -# Date: 2010-04-30, 13:47:00 PDT [KW] +# ArabicShaping-6.1.0.txt +# Date: 2011-04-15, 23:16:00 GMT [KW] # # This file is a normative contributory data file in the # Unicode Character Database. # -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # -# This file defines the shaping classes for Arabic, Syriac, and N'Ko +# This file defines the Joining_Type and Joining_Group +# property values for Arabic, Syriac, N'Ko, and Mandaic # positional shaping, repeating in machine readable form the -# information exemplified in Tables 8-3, 8-7, 8-8, 8-11, 8-12, -# 8-13, and 13-5 of The Unicode Standard, Version 6.0. +# information exemplified in Tables 8-3, 8-8, 8-9, 8-10, 8-13, 8-14, +# 8-15, 13-5, 14-5, and 14-6 of The Unicode Standard, Version 6.1. # -# See sections 8.2, 8.3, and 13.5 of The Unicode Standard, Version 6.0 -# for more information. +# See sections 8.2, 8.3, 13.5, and 14.12 of The Unicode Standard, +# Version 6.1 for more information. # # Each line contains four fields, separated by a semicolon. # # Field 0: the code point, in 4-digit hexadecimal -# form, of an Arabic, Syriac, or N'Ko character. +# form, of an Arabic, Syriac, N'Ko, or Mandaic character. # -# Field 1: gives a short schematic name for that character, -# abbreviated from the normative Unicode character name. +# Field 1: gives a short schematic name for that character. +# The schematic name is descriptive of the shape, based as +# consistently as possible on a name for the skeleton and +# then the diacritic marks applied to the skeleton, if any. # Note that this schematic name is considered a comment, # and does not constitute a formal property value. # @@ -65,7 +68,7 @@ # to jg=No_Joining_Group in this data file. Other, more specific # joining group values will be defined only if an explicit proposal # to define those values exactly has been approved by the UTC. This -# is the convention exemplified by the N'Ko script. Only the Arabic +# is the convention exemplified by the N'Ko and Mandaic scripts. Only the Arabic # and Syriac scripts currently have explicit joining group values defined. # # Note: Code points that are not explicitly listed in this file are @@ -84,44 +87,45 @@ # Unicode; Schematic Name; Joining Type; Joining Group -# Arabic characters +# Arabic Characters 0600; ARABIC NUMBER SIGN; U; No_Joining_Group 0601; ARABIC SIGN SANAH; U; No_Joining_Group 0602; ARABIC FOOTNOTE MARKER; U; No_Joining_Group 0603; ARABIC SIGN SAFHA; U; No_Joining_Group +0604; ARABIC SIGN SAMVAT; U; No_Joining_Group 0608; ARABIC RAY; U; No_Joining_Group 060B; AFGHANI SIGN; U; No_Joining_Group -0620; YEH WITH RING; D; YEH +0620; DOTLESS YEH WITH SEPARATE RING BELOW; D; YEH 0621; HAMZA; U; No_Joining_Group -0622; MADDA ON ALEF; R; ALEF -0623; HAMZA ON ALEF; R; ALEF -0624; HAMZA ON WAW; R; WAW -0625; HAMZA UNDER ALEF; R; ALEF -0626; HAMZA ON YEH; D; YEH +0622; ALEF WITH MADDA ABOVE; R; ALEF +0623; ALEF WITH HAMZA ABOVE; R; ALEF +0624; WAW WITH HAMZA ABOVE; R; WAW +0625; ALEF WITH HAMZA BELOW; R; ALEF +0626; DOTLESS YEH WITH HAMZA ABOVE; D; YEH 0627; ALEF; R; ALEF 0628; BEH; D; BEH 0629; TEH MARBUTA; R; TEH MARBUTA -062A; TEH; D; BEH -062B; THEH; D; BEH -062C; JEEM; D; HAH +062A; DOTLESS BEH WITH 2 DOTS ABOVE; D; BEH +062B; DOTLESS BEH WITH 3 DOTS ABOVE; D; BEH +062C; HAH WITH DOT BELOW; D; HAH 062D; HAH; D; HAH -062E; KHAH; D; HAH +062E; HAH WITH DOT ABOVE; D; HAH 062F; DAL; R; DAL -0630; THAL; R; DAL +0630; DAL WITH DOT ABOVE; R; DAL 0631; REH; R; REH -0632; ZAIN; R; REH +0632; REH WITH DOT ABOVE; R; REH 0633; SEEN; D; SEEN -0634; SHEEN; D; SEEN +0634; SEEN WITH 3 DOTS ABOVE; D; SEEN 0635; SAD; D; SAD -0636; DAD; D; SAD +0636; SAD WITH DOT ABOVE; D; SAD 0637; TAH; D; TAH -0638; ZAH; D; TAH +0638; TAH WITH DOT ABOVE; D; TAH 0639; AIN; D; AIN -063A; GHAIN; D; AIN +063A; AIN WITH DOT ABOVE; D; AIN 063B; KEHEH WITH 2 DOTS ABOVE; D; GAF -063C; KEHEH WITH 3 DOTS BELOW; D; GAF -063D; FARSI YEH WITH INVERTED V; D; FARSI YEH +063C; KEHEH WITH 3 DOTS BELOW; D; GAF +063D; FARSI YEH WITH INVERTED V ABOVE; D; FARSI YEH 063E; FARSI YEH WITH 2 DOTS ABOVE; D; FARSI YEH 063F; FARSI YEH WITH 3 DOTS ABOVE; D; FARSI YEH 0640; TATWEEL; C; No_Joining_Group @@ -133,48 +137,48 @@ 0646; NOON; D; NOON 0647; HEH; D; HEH 0648; WAW; R; WAW -0649; ALEF MAKSURA; D; YEH +0649; DOTLESS YEH; D; YEH 064A; YEH; D; YEH 066E; DOTLESS BEH; D; BEH 066F; DOTLESS QAF; D; QAF -0671; HAMZAT WASL ON ALEF; R; ALEF -0672; WAVY HAMZA ON ALEF; R; ALEF -0673; WAVY HAMZA UNDER ALEF; R; ALEF +0671; ALEF WITH WASLA ABOVE; R; ALEF +0672; ALEF WITH WAVY HAMZA ABOVE; R; ALEF +0673; ALEF WITH WAVY HAMZA BELOW; R; ALEF 0674; HIGH HAMZA; U; No_Joining_Group 0675; HIGH HAMZA ALEF; R; ALEF 0676; HIGH HAMZA WAW; R; WAW -0677; HIGH HAMZA WAW WITH DAMMA; R; WAW -0678; HIGH HAMZA YEH; D; YEH -0679; TEH WITH SMALL TAH; D; BEH -067A; TEH WITH 2 DOTS VERTICAL ABOVE; D; BEH -067B; BEH WITH 2 DOTS VERTICAL BELOW; D; BEH -067C; TEH WITH RING; D; BEH -067D; TEH WITH 3 DOTS ABOVE DOWNWARD; D; BEH -067E; TEH WITH 3 DOTS BELOW; D; BEH -067F; TEH WITH 4 DOTS ABOVE; D; BEH -0680; BEH WITH 4 DOTS BELOW; D; BEH -0681; HAMZA ON HAH; D; HAH -0682; HAH WITH 2 DOTS VERTICAL ABOVE; D; HAH -0683; HAH WITH MIDDLE 2 DOTS; D; HAH -0684; HAH WITH MIDDLE 2 DOTS VERTICAL; D; HAH +0677; HIGH HAMZA WAW WITH DAMMA ABOVE; R; WAW +0678; HIGH HAMZA DOTLESS YEH; D; YEH +0679; DOTLESS BEH WITH TAH ABOVE; D; BEH +067A; DOTLESS BEH WITH VERTICAL 2 DOTS ABOVE; D; BEH +067B; DOTLESS BEH WITH VERTICAL 2 DOTS BELOW; D; BEH +067C; DOTLESS BEH WITH ATTACHED RING BELOW AND 2 DOTS ABOVE; D; BEH +067D; DOTLESS BEH WITH INVERTED 3 DOTS ABOVE; D; BEH +067E; DOTLESS BEH WITH 3 DOTS BELOW; D; BEH +067F; DOTLESS BEH WITH 4 DOTS ABOVE; D; BEH +0680; DOTLESS BEH WITH 4 DOTS BELOW; D; BEH +0681; HAH WITH HAMZA ABOVE; D; HAH +0682; HAH WITH VERTICAL 2 DOTS ABOVE; D; HAH +0683; HAH WITH 2 DOTS BELOW; D; HAH +0684; HAH WITH VERTICAL 2 DOTS BELOW; D; HAH 0685; HAH WITH 3 DOTS ABOVE; D; HAH -0686; HAH WITH MIDDLE 3 DOTS DOWNWARD; D; HAH -0687; HAH WITH MIDDLE 4 DOTS; D; HAH -0688; DAL WITH SMALL TAH; R; DAL -0689; DAL WITH RING; R; DAL +0686; HAH WITH 3 DOTS BELOW; D; HAH +0687; HAH WITH 4 DOTS BELOW; D; HAH +0688; DAL WITH TAH ABOVE; R; DAL +0689; DAL WITH ATTACHED RING BELOW; R; DAL 068A; DAL WITH DOT BELOW; R; DAL -068B; DAL WITH DOT BELOW AND SMALL TAH; R; DAL +068B; DAL WITH DOT BELOW AND TAH ABOVE; R; DAL 068C; DAL WITH 2 DOTS ABOVE; R; DAL 068D; DAL WITH 2 DOTS BELOW; R; DAL 068E; DAL WITH 3 DOTS ABOVE; R; DAL -068F; DAL WITH 3 DOTS ABOVE DOWNWARD; R; DAL +068F; DAL WITH INVERTED 3 DOTS ABOVE; R; DAL 0690; DAL WITH 4 DOTS ABOVE; R; DAL -0691; REH WITH SMALL TAH; R; REH -0692; REH WITH SMALL V; R; REH -0693; REH WITH RING; R; REH +0691; REH WITH TAH ABOVE; R; REH +0692; REH WITH V ABOVE; R; REH +0693; REH WITH ATTACHED RING BELOW; R; REH 0694; REH WITH DOT BELOW; R; REH -0695; REH WITH SMALL V BELOW; R; REH -0696; REH WITH DOT BELOW AND DOT ABOVE; R; REH +0695; REH WITH V BELOW; R; REH +0696; REH WITH DOT BELOW AND DOT WITHIN; R; REH 0697; REH WITH 2 DOTS ABOVE; R; REH 0698; REH WITH 3 DOTS ABOVE; R; REH 0699; REH WITH 4 DOTS ABOVE; R; REH @@ -186,66 +190,66 @@ 069F; TAH WITH 3 DOTS ABOVE; D; TAH 06A0; AIN WITH 3 DOTS ABOVE; D; AIN 06A1; DOTLESS FEH; D; FEH -06A2; FEH WITH DOT MOVED BELOW; D; FEH +06A2; DOTLESS FEH WITH DOT BELOW; D; FEH 06A3; FEH WITH DOT BELOW; D; FEH -06A4; FEH WITH 3 DOTS ABOVE; D; FEH -06A5; FEH WITH 3 DOTS BELOW; D; FEH -06A6; FEH WITH 4 DOTS ABOVE; D; FEH -06A7; QAF WITH DOT ABOVE; D; QAF -06A8; QAF WITH 3 DOTS ABOVE; D; QAF +06A4; DOTLESS FEH WITH 3 DOTS ABOVE; D; FEH +06A5; DOTLESS FEH WITH 3 DOTS BELOW; D; FEH +06A6; DOTLESS FEH WITH 4 DOTS ABOVE; D; FEH +06A7; DOTLESS QAF WITH DOT ABOVE; D; QAF +06A8; DOTLESS QAF WITH 3 DOTS ABOVE; D; QAF 06A9; KEHEH; D; GAF 06AA; SWASH KAF; D; SWASH KAF -06AB; KAF WITH RING; D; GAF +06AB; KEHEH WITH ATTACHED RING BELOW; D; GAF 06AC; KAF WITH DOT ABOVE; D; KAF 06AD; KAF WITH 3 DOTS ABOVE; D; KAF 06AE; KAF WITH 3 DOTS BELOW; D; KAF 06AF; GAF; D; GAF -06B0; GAF WITH RING; D; GAF +06B0; GAF WITH ATTACHED RING BELOW; D; GAF 06B1; GAF WITH 2 DOTS ABOVE; D; GAF 06B2; GAF WITH 2 DOTS BELOW; D; GAF -06B3; GAF WITH 2 DOTS VERTICAL BELOW; D; GAF +06B3; GAF WITH VERTICAL 2 DOTS BELOW; D; GAF 06B4; GAF WITH 3 DOTS ABOVE; D; GAF -06B5; LAM WITH SMALL V; D; LAM +06B5; LAM WITH V ABOVE; D; LAM 06B6; LAM WITH DOT ABOVE; D; LAM 06B7; LAM WITH 3 DOTS ABOVE; D; LAM 06B8; LAM WITH 3 DOTS BELOW; D; LAM 06B9; NOON WITH DOT BELOW; D; NOON 06BA; DOTLESS NOON; D; NOON -06BB; DOTLESS NOON WITH SMALL TAH; D; NOON -06BC; NOON WITH RING; D; NOON +06BB; DOTLESS NOON WITH TAH ABOVE; D; NOON +06BC; NOON WITH ATTACHED RING BELOW; D; NOON 06BD; NYA; D; NYA 06BE; KNOTTED HEH; D; KNOTTED HEH -06BF; HAH WITH MIDDLE 3 DOTS DOWNWARD AND DOT ABOVE; D; HAH -06C0; HAMZA ON HEH; R; TEH MARBUTA +06BF; HAH WITH 3 DOTS BELOW AND DOT ABOVE; D; HAH +06C0; DOTLESS TEH MARBUTA WITH HAMZA ABOVE; R; TEH MARBUTA 06C1; HEH GOAL; D; HEH GOAL -06C2; HAMZA ON HEH GOAL; D; HEH GOAL +06C2; HEH GOAL WITH HAMZA ABOVE; D; HEH GOAL 06C3; TEH MARBUTA GOAL; R; TEH MARBUTA GOAL -06C4; WAW WITH RING; R; WAW +06C4; WAW WITH ATTACHED RING WITHIN; R; WAW 06C5; WAW WITH BAR; R; WAW -06C6; WAW WITH SMALL V; R; WAW -06C7; WAW WITH DAMMA; R; WAW +06C6; WAW WITH V ABOVE; R; WAW +06C7; WAW WITH DAMMA ABOVE; R; WAW 06C8; WAW WITH ALEF ABOVE; R; WAW -06C9; WAW WITH INVERTED SMALL V; R; WAW +06C9; WAW WITH INVERTED V ABOVE; R; WAW 06CA; WAW WITH 2 DOTS ABOVE; R; WAW 06CB; WAW WITH 3 DOTS ABOVE; R; WAW 06CC; FARSI YEH; D; FARSI YEH 06CD; YEH WITH TAIL; R; YEH WITH TAIL -06CE; FARSI YEH WITH SMALL V; D; FARSI YEH +06CE; FARSI YEH WITH V ABOVE; D; FARSI YEH 06CF; WAW WITH DOT ABOVE; R; WAW -06D0; YEH WITH 2 DOTS VERTICAL BELOW; D; YEH -06D1; YEH WITH 3 DOTS BELOW; D; YEH +06D0; DOTLESS YEH WITH VERTICAL 2 DOTS BELOW; D; YEH +06D1; DOTLESS YEH WITH 3 DOTS BELOW; D; YEH 06D2; YEH BARREE; R; YEH BARREE -06D3; HAMZA ON YEH BARREE; R; YEH BARREE -06D5; AE; R; TEH MARBUTA +06D3; YEH BARREE WITH HAMZA ABOVE; R; YEH BARREE +06D5; DOTLESS TEH MARBUTA; R; TEH MARBUTA 06DD; ARABIC END OF AYAH; U; No_Joining_Group -06EE; DAL WITH INVERTED V; R; DAL -06EF; REH WITH INVERTED V; R; REH +06EE; DAL WITH INVERTED V ABOVE; R; DAL +06EF; REH WITH INVERTED V ABOVE; R; REH 06FA; SEEN WITH DOT BELOW AND 3 DOTS ABOVE; D; SEEN -06FB; DAD WITH DOT BELOW; D; SAD -06FC; GHAIN WITH DOT BELOW; D; AIN -06FF; HEH WITH INVERTED V; D; KNOTTED HEH +06FB; SAD WITH DOT BELOW AND DOT ABOVE; D; SAD +06FC; AIN WITH DOT BELOW AND DOT ABOVE; D; AIN +06FF; KNOTTED HEH WITH INVERTED V ABOVE; D; KNOTTED HEH -# Syriac characters +# Syriac Characters 0710; ALAPH; R; ALAPH 0712; BETH; D; BETH @@ -282,55 +286,55 @@ 074E; SOGDIAN KHAPH; D; KHAPH 074F; SOGDIAN FE; D; FE -# Arabic supplement characters +# Arabic Supplement Characters -0750; BEH WITH 3 DOTS HORIZONTALLY BELOW; D; BEH -0751; BEH WITH DOT BELOW AND 3 DOTS ABOVE; D; BEH -0752; BEH WITH 3 DOTS POINTING UPWARDS BELOW; D; BEH -0753; BEH WITH 3 DOTS POINTING UPWARDS BELOW AND 2 DOTS ABOVE; D; BEH -0754; BEH WITH 2 DOTS BELOW AND DOT ABOVE; D; BEH -0755; BEH WITH INVERTED SMALL V BELOW; D; BEH -0756; BEH WITH SMALL V; D; BEH +0750; DOTLESS BEH WITH HORIZONTAL 3 DOTS BELOW; D; BEH +0751; BEH WITH 3 DOTS ABOVE; D; BEH +0752; DOTLESS BEH WITH INVERTED 3 DOTS BELOW; D; BEH +0753; DOTLESS BEH WITH INVERTED 3 DOTS BELOW AND 2 DOTS ABOVE; D; BEH +0754; DOTLESS BEH WITH 2 DOTS BELOW AND DOT ABOVE; D; BEH +0755; DOTLESS BEH WITH INVERTED V BELOW; D; BEH +0756; DOTLESS BEH WITH V ABOVE; D; BEH 0757; HAH WITH 2 DOTS ABOVE; D; HAH -0758; HAH WITH 3 DOTS POINTING UPWARDS BELOW; D; HAH -0759; DAL WITH 2 DOTS VERTICALLY BELOW AND SMALL TAH; R; DAL -075A; DAL WITH INVERTED SMALL V BELOW; R; DAL -075B; REH WITH STROKE; R; REH +0758; HAH WITH INVERTED 3 DOTS BELOW; D; HAH +0759; DAL WITH VERTICAL 2 DOTS BELOW AND TAH ABOVE; R; DAL +075A; DAL WITH INVERTED V BELOW; R; DAL +075B; REH WITH BAR; R; REH 075C; SEEN WITH 4 DOTS ABOVE; D; SEEN 075D; AIN WITH 2 DOTS ABOVE; D; AIN -075E; AIN WITH 3 DOTS POINTING DOWNWARDS ABOVE; D; AIN -075F; AIN WITH 2 DOTS VERTICALLY ABOVE; D; AIN -0760; FEH WITH 2 DOTS BELOW; D; FEH -0761; FEH WITH 3 DOTS POINTING UPWARDS BELOW; D; FEH +075E; AIN WITH INVERTED 3 DOTS ABOVE; D; AIN +075F; AIN WITH VERTICAL 2 DOTS ABOVE; D; AIN +0760; DOTLESS FEH WITH 2 DOTS BELOW; D; FEH +0761; DOTLESS FEH WITH INVERTED 3 DOTS BELOW; D; FEH 0762; KEHEH WITH DOT ABOVE; D; GAF 0763; KEHEH WITH 3 DOTS ABOVE; D; GAF -0764; KEHEH WITH 3 DOTS POINTING UPWARDS BELOW; D; GAF +0764; KEHEH WITH INVERTED 3 DOTS BELOW; D; GAF 0765; MEEM WITH DOT ABOVE; D; MEEM 0766; MEEM WITH DOT BELOW; D; MEEM 0767; NOON WITH 2 DOTS BELOW; D; NOON -0768; NOON WITH SMALL TAH; D; NOON -0769; NOON WITH SMALL V; D; NOON +0768; NOON WITH TAH ABOVE; D; NOON +0769; NOON WITH V ABOVE; D; NOON 076A; LAM WITH BAR; D; LAM -076B; REH WITH 2 DOTS VERTICALLY ABOVE; R; REH +076B; REH WITH VERTICAL 2 DOTS ABOVE; R; REH 076C; REH WITH HAMZA ABOVE; R; REH -076D; SEEN WITH 2 DOTS VERTICALLY ABOVE; D; SEEN -076E; HAH WITH SMALL TAH BELOW; D; HAH -076F; HAH WITH SMALL TAH AND 2 DOTS; D; HAH -0770; SEEN WITH SMALL TAH AND 2 DOTS; D; SEEN -0771; REH WITH SMALL TAH AND 2 DOTS; R; REH -0772; HAH WITH SMALL TAH ABOVE; D; HAH +076D; SEEN WITH VERTICAL 2 DOTS ABOVE; D; SEEN +076E; HAH WITH TAH BELOW; D; HAH +076F; HAH WITH TAH AND 2 DOTS BELOW; D; HAH +0770; SEEN WITH 2 DOTS AND TAH ABOVE; D; SEEN +0771; REH WITH 2 DOTS AND TAH ABOVE; R; REH +0772; HAH WITH TAH ABOVE; D; HAH 0773; ALEF WITH DIGIT TWO ABOVE; R; ALEF 0774; ALEF WITH DIGIT THREE ABOVE; R; ALEF 0775; FARSI YEH WITH DIGIT TWO ABOVE; D; FARSI YEH 0776; FARSI YEH WITH DIGIT THREE ABOVE; D; FARSI YEH -0777; YEH WITH DIGIT FOUR BELOW; D; YEH +0777; DOTLESS YEH WITH DIGIT FOUR BELOW; D; YEH 0778; WAW WITH DIGIT TWO ABOVE; R; WAW 0779; WAW WITH DIGIT THREE ABOVE; R; WAW -077A; YEH BARREE WITH DIGIT TWO ABOVE; D; BURUSHASKI YEH BARREE -077B; YEH BARREE WITH DIGIT THREE ABOVE; D; BURUSHASKI YEH BARREE +077A; BURUSHASKI YEH BARREE WITH DIGIT TWO ABOVE; D; BURUSHASKI YEH BARREE +077B; BURUSHASKI YEH BARREE WITH DIGIT THREE ABOVE; D; BURUSHASKI YEH BARREE 077C; HAH WITH DIGIT FOUR BELOW; D; HAH 077D; SEEN WITH DIGIT FOUR ABOVE; D; SEEN -077E; SEEN WITH INVERTED V; D; SEEN +077E; SEEN WITH INVERTED V ABOVE; D; SEEN 077F; KAF WITH 2 DOTS ABOVE; D; KAF # N'Ko Characters @@ -370,6 +374,49 @@ 07EA; NKO JONA RA; D; No_Joining_Group 07FA; NKO LAJANYALAN; C; No_Joining_Group +# Mandaic Characters + +0840; MANDAIC HALQA; R; No_Joining_Group +0841; MANDAIC AB; D; No_Joining_Group +0842; MANDAIC AG; D; No_Joining_Group +0843; MANDAIC AD; D; No_Joining_Group +0844; MANDAIC AH; D; No_Joining_Group +0845; MANDAIC USHENNA; D; No_Joining_Group +0846; MANDAIC AZ; R; No_Joining_Group +0847; MANDAIC IT; D; No_Joining_Group +0848; MANDAIC ATT; D; No_Joining_Group +0849; MANDAIC AKSA; R; No_Joining_Group +084A; MANDAIC AK; D; No_Joining_Group +084B; MANDAIC AL; D; No_Joining_Group +084C; MANDAIC AM; D; No_Joining_Group +084D; MANDAIC AN; D; No_Joining_Group +084E; MANDAIC AS; D; No_Joining_Group +084F; MANDAIC IN; R; No_Joining_Group +0850; MANDAIC AP; D; No_Joining_Group +0851; MANDAIC ASZ; D; No_Joining_Group +0852; MANDAIC AQ; D; No_Joining_Group +0853; MANDAIC AR; D; No_Joining_Group +0854; MANDAIC ASH; R; No_Joining_Group +0855; MANDAIC AT; D; No_Joining_Group +0856; MANDAIC DUSHENNA; U; No_Joining_Group +0857; MANDAIC KAD; U; No_Joining_Group +0858; MANDAIC AIN; U; No_Joining_Group + +# Arabic Extended-A Characters + +08A0; DOTLESS BEH WITH V BELOW; D; BEH +08A2; HAH WITH DOT BELOW AND 2 DOTS ABOVE; D; HAH +08A3; TAH WITH 2 DOTS ABOVE; D; TAH +08A4; DOTLESS FEH WITH DOT BELOW AND 3 DOTS ABOVE; D; FEH +08A5; QAF WITH DOT BELOW; D; QAF +08A6; LAM WITH DOUBLE BAR; D; LAM +08A7; MEEM WITH 3 DOTS ABOVE; D; MEEM +08A8; YEH WITH HAMZA ABOVE; D; YEH +08A9; YEH WITH DOT ABOVE; D; YEH +08AA; REH WITH LOOP; R; REH +08AB; WAW WITH DOT WITHIN; R; WAW +08AC; ROHINGYA YEH; R; ROHINGYA YEH + # Other 200C; ZERO WIDTH NON-JOINER; U; No_Joining_Group diff --git a/lib/unicore/BidiMirroring.txt b/lib/unicore/BidiMirroring.txt index 902f9a6b88..2e719bc1e0 100644 --- a/lib/unicore/BidiMirroring.txt +++ b/lib/unicore/BidiMirroring.txt @@ -1,19 +1,19 @@ -# BidiMirroring-6.0.0.txt -# Date: 2010-06-21, 12:09:00 PDT [KW] +# BidiMirroring-6.1.0.txt +# Date: 2011-12-20, 19:31:00 GMT [KW, LI] # # Bidi_Mirroring_Glyph Property # # This file is an informative contributory data file in the # Unicode Character Database. # -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # -# This data file lists characters that have the Bidi_Mirrored=True property +# This data file lists characters that have the Bidi_Mirrored=Yes property # value, for which there is another Unicode character that typically has a glyph # that is the mirror image of the original character's glyph. # -# The repertoire covered by the file is Unicode 6.0.0. +# The repertoire covered by the file is Unicode 6.1.0. # # The file contains a list of lines with mappings from one code point # to another one for character-based mirroring. @@ -26,10 +26,18 @@ # variable-length hexadecimal value with 4 to 6 digits. # A comment indicates where the characters are "BEST FIT" mirroring. # -# Code points for which Bidi_Mirrored=True, but for which no appropriate +# Code points for which Bidi_Mirrored=Yes, but for which no appropriate # characters exist with mirrored glyphs, are # listed as comments at the end of the file. # +# Note: (2011-12-19) There is an inconsistency between the +# following statement about the default value +# of the Bidi_Mirroring_Glyph property and the +# value of the @missing line for Bidi_Mirroring_Glyph in +# PropertyValueAliases.txt. This inconsistency was discovered too +# late in the release process to be resolved by +# the UTC. The inconsistency will be resolved in a future revision. +# # Formally, the default value of the Bidi_Mirroring_Glyph property # for each code point is the code point itself, unless a mapping to # some other character is specified in this data file. When a code @@ -41,9 +49,13 @@ # at http://www.unicode.org/unicode/reports/tr9/ # # This file was originally created by Markus Scherer. -# Extended for Unicode 3.2, 4.0, 4.1, 5.0, 5.1, 5.2, and 6.0 by Ken Whistler. +# Extended for Unicode 3.2, 4.0, 4.1, 5.0, 5.1, 5.2, and 6.0 by Ken Whistler, +# and for Unicode 6.1 by Ken Whistler and Laurentiu Iancu. # # ############################################################ +# +# Property: Bidi_Mirroring_Glyph +# 0028; 0029 # LEFT PARENTHESIS 0029; 0028 # RIGHT PARENTHESIS @@ -209,6 +221,8 @@ 27C6; 27C5 # RIGHT S-SHAPED BAG DELIMITER 27C8; 27C9 # REVERSE SOLIDUS PRECEDING SUBSET 27C9; 27C8 # SUPERSET PRECEDING SOLIDUS +27CB; 27CD # MATHEMATICAL RISING DIAGONAL +27CD; 27CB # MATHEMATICAL FALLING DIAGONAL 27D5; 27D6 # LEFT OUTER JOIN 27D6; 27D5 # RIGHT OUTER JOIN 27DD; 27DE # LONG RIGHT TACK diff --git a/lib/unicore/Blocks.txt b/lib/unicore/Blocks.txt index 50df2e1d31..f9a384e3ff 100644 --- a/lib/unicore/Blocks.txt +++ b/lib/unicore/Blocks.txt @@ -1,8 +1,8 @@ -# Blocks-6.0.0.txt -# Date: 2010-06-04, 11:12:00 PDT [KW] +# Blocks-6.1.0.txt +# Date: 2011-06-14, 18:26:00 GMT [KW, LI] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # @@ -46,6 +46,7 @@ 07C0..07FF; NKo 0800..083F; Samaritan 0840..085F; Mandaic +08A0..08FF; Arabic Extended-A 0900..097F; Devanagari 0980..09FF; Bengali 0A00..0A7F; Gurmukhi @@ -86,6 +87,7 @@ 1BC0..1BFF; Batak 1C00..1C4F; Lepcha 1C50..1C7F; Ol Chiki +1CC0..1CCF; Sundanese Supplement 1CD0..1CFF; Vedic Extensions 1D00..1D7F; Phonetic Extensions 1D80..1DBF; Phonetic Extensions Supplement @@ -161,6 +163,7 @@ A980..A9DF; Javanese AA00..AA5F; Cham AA60..AA7F; Myanmar Extended-A AA80..AADF; Tai Viet +AAE0..AAFF; Meetei Mayek Extensions AB00..AB2F; Ethiopic Extended-A ABC0..ABFF; Meetei Mayek AC00..D7AF; Hangul Syllables @@ -199,6 +202,8 @@ FFF0..FFFF; Specials 10840..1085F; Imperial Aramaic 10900..1091F; Phoenician 10920..1093F; Lydian +10980..1099F; Meroitic Hieroglyphs +109A0..109FF; Meroitic Cursive 10A00..10A5F; Kharoshthi 10A60..10A7F; Old South Arabian 10B00..10B3F; Avestan @@ -208,10 +213,15 @@ FFF0..FFFF; Specials 10E60..10E7F; Rumi Numeral Symbols 11000..1107F; Brahmi 11080..110CF; Kaithi +110D0..110FF; Sora Sompeng +11100..1114F; Chakma +11180..111DF; Sharada +11680..116CF; Takri 12000..123FF; Cuneiform 12400..1247F; Cuneiform Numbers and Punctuation 13000..1342F; Egyptian Hieroglyphs 16800..16A3F; Bamum Supplement +16F00..16F9F; Miao 1B000..1B0FF; Kana Supplement 1D000..1D0FF; Byzantine Musical Symbols 1D100..1D1FF; Musical Symbols @@ -219,6 +229,7 @@ FFF0..FFFF; Specials 1D300..1D35F; Tai Xuan Jing Symbols 1D360..1D37F; Counting Rod Numerals 1D400..1D7FF; Mathematical Alphanumeric Symbols +1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols 1F000..1F02F; Mahjong Tiles 1F030..1F09F; Domino Tiles 1F0A0..1F0FF; Playing Cards diff --git a/lib/unicore/CJKRadicals.txt b/lib/unicore/CJKRadicals.txt index 32a765330d..a7debb6e1e 100644 --- a/lib/unicore/CJKRadicals.txt +++ b/lib/unicore/CJKRadicals.txt @@ -1,8 +1,8 @@ -# CJKRadicals-6.0.0.txt -# Date: 2010-01-22, 10:53:25 PDT [RC] +# CJKRadicals-6.1.0.txt +# Date: 2011-08-30, 23:14:00 GMT [RC, KW] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr38/ # @@ -24,6 +24,7 @@ # # This file was created for Unicode 5.2 by Richard Cook. # Updated for Unicode 6.0 by Richard Cook. +# Updated for Unicode 6.1 by Ken Whistler. # # #################################################### diff --git a/lib/unicore/CaseFolding.txt b/lib/unicore/CaseFolding.txt index ffe6173d75..0d9a4090cd 100644 --- a/lib/unicore/CaseFolding.txt +++ b/lib/unicore/CaseFolding.txt @@ -1,8 +1,8 @@ -# CaseFolding-6.0.0.txt -# Date: 2010-05-18, 00:48:57 GMT [MD] +# CaseFolding-6.1.0.txt +# Date: 2011-07-25, 21:21:56 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # @@ -52,7 +52,15 @@ # behavior. (The default option is to exclude them.) # # ================================================================= -# @missing 0000..10FFFF; <codepoint> + +# Property: Case_Folding + +# All code points not explicitly listed for Case_Folding +# have the value C for the status field, and the code point itself for the mapping field. + +# @missing: 0000..10FFFF; C; <code point> + +# ================================================================= 0041; C; 0061; # LATIN CAPITAL LETTER A 0042; C; 0062; # LATIN CAPITAL LETTER B 0043; C; 0063; # LATIN CAPITAL LETTER C @@ -574,6 +582,8 @@ 10C3; C; 2D23; # GEORGIAN CAPITAL LETTER WE 10C4; C; 2D24; # GEORGIAN CAPITAL LETTER HAR 10C5; C; 2D25; # GEORGIAN CAPITAL LETTER HOE +10C7; C; 2D27; # GEORGIAN CAPITAL LETTER YN +10CD; C; 2D2D; # GEORGIAN CAPITAL LETTER AEN 1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW 1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE 1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW @@ -1042,6 +1052,7 @@ 2CE2; C; 2CE3; # COPTIC CAPITAL LETTER OLD NUBIAN WAU 2CEB; C; 2CEC; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI 2CED; C; 2CEE; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA +2CF2; C; 2CF3; # COPTIC CAPITAL LETTER BOHAIRIC KHEI A640; C; A641; # CYRILLIC CAPITAL LETTER ZEMLYA A642; C; A643; # CYRILLIC CAPITAL LETTER DZELO A644; C; A645; # CYRILLIC CAPITAL LETTER REVERSED DZE @@ -1126,11 +1137,13 @@ A786; C; A787; # LATIN CAPITAL LETTER INSULAR T A78B; C; A78C; # LATIN CAPITAL LETTER SALTILLO A78D; C; 0265; # LATIN CAPITAL LETTER TURNED H A790; C; A791; # LATIN CAPITAL LETTER N WITH DESCENDER +A792; C; A793; # LATIN CAPITAL LETTER C WITH BAR A7A0; C; A7A1; # LATIN CAPITAL LETTER G WITH OBLIQUE STROKE A7A2; C; A7A3; # LATIN CAPITAL LETTER K WITH OBLIQUE STROKE A7A4; C; A7A5; # LATIN CAPITAL LETTER N WITH OBLIQUE STROKE A7A6; C; A7A7; # LATIN CAPITAL LETTER R WITH OBLIQUE STROKE A7A8; C; A7A9; # LATIN CAPITAL LETTER S WITH OBLIQUE STROKE +A7AA; C; 0266; # LATIN CAPITAL LETTER H WITH HOOK FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL diff --git a/lib/unicore/CompositionExclusions.txt b/lib/unicore/CompositionExclusions.txt index e39c651811..f12f7d61bf 100644 --- a/lib/unicore/CompositionExclusions.txt +++ b/lib/unicore/CompositionExclusions.txt @@ -1,5 +1,5 @@ -# CompositionExclusions-6.0.0.txt -# Date: 2010-06-25, 14:34:00 PDT [KW] +# CompositionExclusions-6.1.0.txt +# Date: 2011-07-12, 00:13:00 GMT [KW, LI] # # This file lists the characters for the Composition Exclusion Table # defined in UAX #15, Unicode Normalization Forms. @@ -7,7 +7,7 @@ # This file is a normative contributory data file in the # Unicode Character Database. # -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # # For more information, see @@ -169,17 +169,16 @@ FB4E # HEBREW LETTER PE WITH RAFE # FA20 CJK COMPATIBILITY IDEOGRAPH-FA20 # FA22 CJK COMPATIBILITY IDEOGRAPH-FA22 # FA25..FA26 [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26 -# FA2A..FA2D [4] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA2D -# FA30..FA6D [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +# FA2A..FA6D [68] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA6D # FA70..FAD9 [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 # 2F800..2FA1D [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 1033 +# Total code points: 1035 # ================================================ # (4) Non-Starter Decompositions # -# These characters can be derived from the UnicodeData file +# These characters can be derived from the UnicodeData.txt file # by including each expanding canonical decomposition # (i.e., those which canonically decompose to a sequence # of characters instead of a single character), such that: diff --git a/lib/unicore/DAge.txt b/lib/unicore/DAge.txt index 4293229cef..6ff0206b4c 100644 --- a/lib/unicore/DAge.txt +++ b/lib/unicore/DAge.txt @@ -1,8 +1,8 @@ -# DerivedAge-6.0.0.txt -# Date: 2010-10-05, 00:47:58 GMT [MD, KW] +# DerivedAge-6.1.0.txt +# Date: 2012-01-20, 21:47:00 GMT [MD, KW] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # @@ -42,12 +42,14 @@ # For more information, see [http://www.unicode.org/reports/tr18/]. # All code points not explicitly listed for Age -# have the value unassigned. +# have the value Unassigned (NA). -# @missing: 0000..10FFFF; unassigned +# @missing: 0000..10FFFF; Unassigned # ================================================ +# Age=V1_1 + # Assigned as of Unicode 1.1.0 (June, 1993) # [excluding removed Hangul Syllables] @@ -357,6 +359,8 @@ FFFE..FFFF ; 1.1 # [2] <noncharacter-FFFE>..<noncharacter-FFFF> # ================================================ +# Age=V2_0 + # Newly assigned in Unicode 2.0.0 (July, 1996) 0591..05A1 ; 2.0 # [17] HEBREW ACCENT ETNAHTA..HEBREW ACCENT PAZER @@ -397,6 +401,8 @@ FFFFE..FFFFF ; 2.0 # [2] <noncharacter-FFFFE>..<noncharacter-FFFFF> # ================================================ +# Age=V2_1 + # Newly assigned in Unicode 2.1.2 (May, 1998) 20AC ; 2.1 # EURO SIGN @@ -406,6 +412,8 @@ FFFC ; 2.1 # OBJECT REPLACEMENT CHARACTER # ================================================ +# Age=V3_0 + # Newly assigned in Unicode 3.0.0 (September, 1999) 01F6..01F9 ; 3.0 # [4] LATIN CAPITAL LETTER HWAIR..LATIN SMALL LETTER N WITH GRAVE @@ -493,9 +501,7 @@ FFFC ; 2.1 # OBJECT REPLACEMENT CHARACTER 1401..1676 ; 3.0 # [630] CANADIAN SYLLABICS E..CANADIAN SYLLABICS NNGAA 1680..169C ; 3.0 # [29] OGHAM SPACE MARK..OGHAM REVERSED FEATHER MARK 16A0..16F0 ; 3.0 # [81] RUNIC LETTER FEHU FEOH FE F..RUNIC BELGTHOR SYMBOL -1780..17B3 ; 3.0 # [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU -17B4..17B5 ; 3.0 # [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA -17B6..17DC ; 3.0 # [39] KHMER VOWEL SIGN AA..KHMER SIGN AVAKRAHASANYA +1780..17DC ; 3.0 # [93] KHMER LETTER KA..KHMER SIGN AVAKRAHASANYA 17E0..17E9 ; 3.0 # [10] KHMER DIGIT ZERO..KHMER DIGIT NINE 1800..180E ; 3.0 # [15] MONGOLIAN BIRGA..MONGOLIAN VOWEL SEPARATOR 1810..1819 ; 3.0 # [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE @@ -537,6 +543,8 @@ FFF9..FFFB ; 3.0 # [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATIO # ================================================ +# Age=V3_1 + # Newly assigned in Unicode 3.1.0 (March, 2001) 03F4..03F5 ; 3.1 # [2] GREEK CAPITAL THETA SYMBOL..GREEK LUNATE EPSILON SYMBOL @@ -582,6 +590,8 @@ E0020..E007F ; 3.1 # [96] TAG SPACE..CANCEL TAG # ================================================ +# Age=V3_2 + # Newly assigned in Unicode 3.2.0 (March, 2002) 0220 ; 3.2 # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG @@ -649,6 +659,8 @@ FF5F..FF60 ; 3.2 # [2] FULLWIDTH LEFT WHITE PARENTHESIS..FULLWIDTH RIGHT WH # ================================================ +# Age=V4_0 + # Newly assigned in Unicode 4.0.0 (April, 2003) 0221 ; 4.0 # LATIN SMALL LETTER D WITH CURL @@ -733,6 +745,8 @@ E0100..E01EF ; 4.0 # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 # ================================================ +# Age=V4_1 + # Newly assigned in Unicode 4.1.0 (March, 2005) 0237..0241 ; 4.1 # [11] LATIN SMALL LETTER DOTLESS J..LATIN CAPITAL LETTER GLOTTAL STOP @@ -826,6 +840,8 @@ FE10..FE19 ; 4.1 # [10] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION F # ================================================ +# Age=V5_0 + # Newly assigned in Unicode 5.0.0 (July, 2006) 0242..024F ; 5.0 # [14] LATIN SMALL LETTER GLOTTAL STOP..LATIN SMALL LETTER Y WITH STROKE @@ -868,6 +884,8 @@ A840..A877 ; 5.0 # [56] PHAGS-PA LETTER KA..PHAGS-PA MARK DOUBLE SHAD # ================================================ +# Age=V5_1 + # Newly assigned in Unicode 5.1.0 (March, 2008) 0370..0373 ; 5.1 # [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI @@ -964,6 +982,8 @@ FE24..FE26 ; 5.1 # [3] COMBINING MACRON LEFT HALF..COMBINING CONJOINING MAC # ================================================ +# Age=V5_2 + # Newly assigned in Unicode 5.2.0 (October, 2009) 0524..0525 ; 5.2 # [2] CYRILLIC CAPITAL LETTER PE WITH DESCENDER..CYRILLIC SMALL LETTER PE WITH DESCENDER @@ -1063,6 +1083,8 @@ FA6B..FA6D ; 5.2 # [3] CJK COMPATIBILITY IDEOGRAPH-FA6B..CJK COMPATIBILITY # ================================================ +# Age=V6_0 + # Newly assigned in Unicode 6.0.0 (October, 2010) 0526..0527 ; 6.0 # [2] CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER..CYRILLIC SMALL LETTER SHHA WITH DESCENDER @@ -1174,4 +1196,102 @@ FBB2..FBC1 ; 6.0 # [16] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL SMALL TAH BEL # Total code points: 2088 +# ================================================ + +# Age=V6_1 + +# Newly assigned in Unicode 6.1.0 (January, 2012) + +058F ; 6.1 # ARMENIAN DRAM SIGN +0604 ; 6.1 # ARABIC SIGN SAMVAT +08A0 ; 6.1 # ARABIC LETTER BEH WITH SMALL V BELOW +08A2..08AC ; 6.1 # [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH +08E4..08FE ; 6.1 # [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT +0AF0 ; 6.1 # GUJARATI ABBREVIATION SIGN +0EDE..0EDF ; 6.1 # [2] LAO LETTER KHMU GO..LAO LETTER KHMU NYO +10C7 ; 6.1 # GEORGIAN CAPITAL LETTER YN +10CD ; 6.1 # GEORGIAN CAPITAL LETTER AEN +10FD..10FF ; 6.1 # [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN +1BAB..1BAD ; 6.1 # [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA +1BBA..1BBF ; 6.1 # [6] SUNDANESE AVAGRAHA..SUNDANESE LETTER FINAL M +1CC0..1CC7 ; 6.1 # [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA +1CF3..1CF6 ; 6.1 # [4] VEDIC SIGN ROTATED ARDHAVISARGA..VEDIC SIGN UPADHMANIYA +27CB ; 6.1 # MATHEMATICAL RISING DIAGONAL +27CD ; 6.1 # MATHEMATICAL FALLING DIAGONAL +2CF2..2CF3 ; 6.1 # [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI +2D27 ; 6.1 # GEORGIAN SMALL LETTER YN +2D2D ; 6.1 # GEORGIAN SMALL LETTER AEN +2D66..2D67 ; 6.1 # [2] TIFINAGH LETTER YE..TIFINAGH LETTER YO +2E32..2E3B ; 6.1 # [10] TURNED COMMA..THREE-EM DASH +9FCC ; 6.1 # CJK UNIFIED IDEOGRAPH-9FCC +A674..A67B ; 6.1 # [8] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC LETTER OMEGA +A69F ; 6.1 # COMBINING CYRILLIC LETTER IOTIFIED E +A792..A793 ; 6.1 # [2] LATIN CAPITAL LETTER C WITH BAR..LATIN SMALL LETTER C WITH BAR +A7AA ; 6.1 # LATIN CAPITAL LETTER H WITH HOOK +A7F8..A7F9 ; 6.1 # [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +AAE0..AAF6 ; 6.1 # [23] MEETEI MAYEK LETTER E..MEETEI MAYEK VIRAMA +FA2E..FA2F ; 6.1 # [2] CJK COMPATIBILITY IDEOGRAPH-FA2E..CJK COMPATIBILITY IDEOGRAPH-FA2F +10980..109B7 ; 6.1 # [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; 6.1 # [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN +110D0..110E8 ; 6.1 # [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +110F0..110F9 ; 6.1 # [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11100..11134 ; 6.1 # [53] CHAKMA SIGN CANDRABINDU..CHAKMA MAAYYAA +11136..11143 ; 6.1 # [14] CHAKMA DIGIT ZERO..CHAKMA QUESTION MARK +11180..111C8 ; 6.1 # [73] SHARADA SIGN CANDRABINDU..SHARADA SEPARATOR +111D0..111D9 ; 6.1 # [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +11680..116B7 ; 6.1 # [56] TAKRI LETTER A..TAKRI SIGN NUKTA +116C0..116C9 ; 6.1 # [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE +16F00..16F44 ; 6.1 # [69] MIAO LETTER PA..MIAO LETTER HHA +16F50..16F7E ; 6.1 # [47] MIAO LETTER NASALIZATION..MIAO VOWEL SIGN NG +16F8F..16F9F ; 6.1 # [17] MIAO TONE RIGHT..MIAO LETTER REFORMED TONE-8 +1EE00..1EE03 ; 6.1 # [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; 6.1 # [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; 6.1 # [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; 6.1 # ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; 6.1 # ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; 6.1 # [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; 6.1 # [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; 6.1 # ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; 6.1 # ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; 6.1 # ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; 6.1 # ARABIC MATHEMATICAL TAILED HAH +1EE49 ; 6.1 # ARABIC MATHEMATICAL TAILED YEH +1EE4B ; 6.1 # ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; 6.1 # [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; 6.1 # [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; 6.1 # ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; 6.1 # ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; 6.1 # ARABIC MATHEMATICAL TAILED DAD +1EE5B ; 6.1 # ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; 6.1 # ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; 6.1 # ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; 6.1 # [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; 6.1 # ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; 6.1 # [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; 6.1 # [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; 6.1 # [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; 6.1 # [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; 6.1 # ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; 6.1 # [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; 6.1 # [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; 6.1 # [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; 6.1 # [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; 6.1 # [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1EEF0..1EEF1 ; 6.1 # [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL +1F16A..1F16B ; 6.1 # [2] RAISED MC SIGN..RAISED MD SIGN +1F540..1F543 ; 6.1 # [4] CIRCLED CROSS POMMEE..NOTCHED LEFT SEMICIRCLE WITH THREE DOTS +1F600 ; 6.1 # GRINNING FACE +1F611 ; 6.1 # EXPRESSIONLESS FACE +1F615 ; 6.1 # CONFUSED FACE +1F617 ; 6.1 # KISSING FACE +1F619 ; 6.1 # KISSING FACE WITH SMILING EYES +1F61B ; 6.1 # FACE WITH STUCK-OUT TONGUE +1F61F ; 6.1 # WORRIED FACE +1F626..1F627 ; 6.1 # [2] FROWNING FACE WITH OPEN MOUTH..ANGUISHED FACE +1F62C ; 6.1 # GRIMACING FACE +1F62E..1F62F ; 6.1 # [2] FACE WITH OPEN MOUTH..HUSHED FACE +1F634 ; 6.1 # SLEEPING FACE + +# Total code points: 732 + # EOF diff --git a/lib/unicore/DCoreProperties.txt b/lib/unicore/DCoreProperties.txt index 7c7a784942..abdcd2201e 100644 --- a/lib/unicore/DCoreProperties.txt +++ b/lib/unicore/DCoreProperties.txt @@ -1,8 +1,8 @@ -# DerivedCoreProperties-6.0.0.txt -# Date: 2010-08-19, 00:48:05 GMT [MD] +# DerivedCoreProperties-6.1.0.txt +# Date: 2011-12-11, 18:26:55 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -113,9 +113,7 @@ 27C0..27C4 ; Math # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET 27C5 ; Math # Ps LEFT S-SHAPED BAG DELIMITER 27C6 ; Math # Pe RIGHT S-SHAPED BAG DELIMITER -27C7..27CA ; Math # Sm [4] OR WITH DOT INSIDE..VERTICAL BAR WITH HORIZONTAL STROKE -27CC ; Math # Sm LONG DIVISION -27CE..27E5 ; Math # Sm [24] SQUARED LOGICAL AND..WHITE SQUARE WITH RIGHTWARDS TICK +27C7..27E5 ; Math # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK 27E6 ; Math # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET 27E7 ; Math # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET 27E8 ; Math # Ps MATHEMATICAL LEFT ANGLE BRACKET @@ -216,8 +214,42 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 1D7C3 ; Math # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL 1D7C4..1D7CB ; Math # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1D7CE..1D7FF ; Math # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1EE00..1EE03 ; Math # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; Math # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; Math # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; Math # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; Math # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; Math # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; Math # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; Math # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; Math # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; Math # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; Math # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; Math # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; Math # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; Math # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; Math # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; Math # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; Math # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; Math # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; Math # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; Math # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; Math # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; Math # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; Math # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; Math # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; Math # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; Math # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; Math # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; Math # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; Math # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; Math # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; Math # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; Math # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; Math # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1EEF0..1EEF1 ; Math # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL -# Total code points: 2165 +# Total code points: 2310 # ================================================ @@ -226,9 +258,9 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 0041..005A ; Alphabetic # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z 0061..007A ; Alphabetic # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z -00AA ; Alphabetic # L& FEMININE ORDINAL INDICATOR +00AA ; Alphabetic # Lo FEMININE ORDINAL INDICATOR 00B5 ; Alphabetic # L& MICRO SIGN -00BA ; Alphabetic # L& MASCULINE ORDINAL INDICATOR +00BA ; Alphabetic # Lo MASCULINE ORDINAL INDICATOR 00C0..00D6 ; Alphabetic # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS 00D8..00F6 ; Alphabetic # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS 00F8..01BA ; Alphabetic # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL @@ -303,6 +335,10 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 0828 ; Alphabetic # Lm SAMARITAN MODIFIER LETTER I 0829..082C ; Alphabetic # Mn [4] SAMARITAN VOWEL SIGN LONG I..SAMARITAN VOWEL SIGN SUKUN 0840..0858 ; Alphabetic # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +08A0 ; Alphabetic # Lo ARABIC LETTER BEH WITH SMALL V BELOW +08A2..08AC ; Alphabetic # Lo [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH +08E4..08E9 ; Alphabetic # Mn [6] ARABIC CURLY FATHA..ARABIC CURLY KASRATAN +08F0..08FE ; Alphabetic # Mn [15] ARABIC OPEN FATHATAN..ARABIC DAMMA WITH DOT 0900..0902 ; Alphabetic # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 0903 ; Alphabetic # Mc DEVANAGARI SIGN VISARGA 0904..0939 ; Alphabetic # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA @@ -500,7 +536,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 0EC0..0EC4 ; Alphabetic # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI 0EC6 ; Alphabetic # Lm LAO KO LA 0ECD ; Alphabetic # Mn LAO NIGGAHITA -0EDC..0EDD ; Alphabetic # Lo [2] LAO HO NO..LAO HO MO +0EDC..0EDF ; Alphabetic # Lo [4] LAO HO NO..LAO LETTER KHMU NYO 0F00 ; Alphabetic # Lo TIBETAN SYLLABLE OM 0F40..0F47 ; Alphabetic # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA 0F49..0F6C ; Alphabetic # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA @@ -538,9 +574,11 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 109C ; Alphabetic # Mc MYANMAR VOWEL SIGN AITON A 109D ; Alphabetic # Mn MYANMAR VOWEL SIGN AITON AI 10A0..10C5 ; Alphabetic # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Alphabetic # L& GEORGIAN CAPITAL LETTER YN +10CD ; Alphabetic # L& GEORGIAN CAPITAL LETTER AEN 10D0..10FA ; Alphabetic # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN 10FC ; Alphabetic # Lm MODIFIER LETTER GEORGIAN NAR -1100..1248 ; Alphabetic # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +10FD..1248 ; Alphabetic # Lo [332] GEORGIAN LETTER AEN..ETHIOPIC SYLLABLE QWA 124A..124D ; Alphabetic # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE 1250..1256 ; Alphabetic # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO 1258 ; Alphabetic # Lo ETHIOPIC SYLLABLE QHWA @@ -636,8 +674,9 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 1BA2..1BA5 ; Alphabetic # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU 1BA6..1BA7 ; Alphabetic # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG 1BA8..1BA9 ; Alphabetic # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAC..1BAD ; Alphabetic # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA 1BAE..1BAF ; Alphabetic # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA -1BC0..1BE5 ; Alphabetic # Lo [38] BATAK LETTER A..BATAK LETTER U +1BBA..1BE5 ; Alphabetic # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U 1BE7 ; Alphabetic # Mc BATAK VOWEL SIGN E 1BE8..1BE9 ; Alphabetic # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE 1BEA..1BEC ; Alphabetic # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O @@ -653,10 +692,11 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 1C78..1C7D ; Alphabetic # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD 1CE9..1CEC ; Alphabetic # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL 1CEE..1CF1 ; Alphabetic # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA -1CF2 ; Alphabetic # Mc VEDIC SIGN ARDHAVISARGA +1CF2..1CF3 ; Alphabetic # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF5..1CF6 ; Alphabetic # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 1D00..1D2B ; Alphabetic # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D61 ; Alphabetic # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D77 ; Alphabetic # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D2C..1D6A ; Alphabetic # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; Alphabetic # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; Alphabetic # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; Alphabetic # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; Alphabetic # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA @@ -703,12 +743,15 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 24B6..24E9 ; Alphabetic # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z 2C00..2C2E ; Alphabetic # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 2C30..2C5E ; Alphabetic # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE -2C60..2C7C ; Alphabetic # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; Alphabetic # Lm MODIFIER LETTER CAPITAL V +2C60..2C7B ; Alphabetic # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; Alphabetic # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; Alphabetic # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI 2CEB..2CEE ; Alphabetic # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; Alphabetic # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; Alphabetic # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE -2D30..2D65 ; Alphabetic # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D27 ; Alphabetic # L& GEORGIAN SMALL LETTER YN +2D2D ; Alphabetic # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; Alphabetic # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO 2D6F ; Alphabetic # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D80..2D96 ; Alphabetic # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE 2DA0..2DA6 ; Alphabetic # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO @@ -740,7 +783,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 31A0..31BA ; Alphabetic # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY 31F0..31FF ; Alphabetic # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3400..4DB5 ; Alphabetic # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 -4E00..9FCB ; Alphabetic # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB +4E00..9FCC ; Alphabetic # Lo [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC A000..A014 ; Alphabetic # Lo [21] YI SYLLABLE IT..YI SYLLABLE E A015 ; Alphabetic # Lm YI SYLLABLE WU A016..A48C ; Alphabetic # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR @@ -752,8 +795,10 @@ A610..A61F ; Alphabetic # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG A62A..A62B ; Alphabetic # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO A640..A66D ; Alphabetic # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O A66E ; Alphabetic # Lo CYRILLIC LETTER MULTIOCULAR O +A674..A67B ; Alphabetic # Mn [8] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC LETTER OMEGA A67F ; Alphabetic # Lm CYRILLIC PAYEROK A680..A697 ; Alphabetic # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE +A69F ; Alphabetic # Mn COMBINING CYRILLIC LETTER IOTIFIED E A6A0..A6E5 ; Alphabetic # Lo [70] BAMUM LETTER A..BAMUM LETTER KI A6E6..A6EF ; Alphabetic # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM A717..A71F ; Alphabetic # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK @@ -762,8 +807,9 @@ A770 ; Alphabetic # Lm MODIFIER LETTER US A771..A787 ; Alphabetic # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A788 ; Alphabetic # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; Alphabetic # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A791 ; Alphabetic # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER -A7A0..A7A9 ; Alphabetic # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE +A790..A793 ; Alphabetic # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR +A7A0..A7AA ; Alphabetic # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK +A7F8..A7F9 ; Alphabetic # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; Alphabetic # L& LATIN LETTER SMALL CAPITAL TURNED M A7FB..A801 ; Alphabetic # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I A803..A805 ; Alphabetic # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O @@ -820,6 +866,13 @@ AAC0 ; Alphabetic # Lo TAI VIET TONE MAI NUENG AAC2 ; Alphabetic # Lo TAI VIET TONE MAI SONG AADB..AADC ; Alphabetic # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG AADD ; Alphabetic # Lm TAI VIET SYMBOL SAM +AAE0..AAEA ; Alphabetic # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAEB ; Alphabetic # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; Alphabetic # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF2 ; Alphabetic # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; Alphabetic # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF5 ; Alphabetic # Mc MEETEI MAYEK VOWEL SIGN VISARGA AB01..AB06 ; Alphabetic # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO AB09..AB0E ; Alphabetic # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO AB11..AB16 ; Alphabetic # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO @@ -834,8 +887,7 @@ ABE9..ABEA ; Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MA AC00..D7A3 ; Alphabetic # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH D7B0..D7C6 ; Alphabetic # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E D7CB..D7FB ; Alphabetic # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH -F900..FA2D ; Alphabetic # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; Alphabetic # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +F900..FA6D ; Alphabetic # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; Alphabetic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB00..FB06 ; Alphabetic # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; Alphabetic # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -894,6 +946,8 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 1083F..10855 ; Alphabetic # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW 10900..10915 ; Alphabetic # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; Alphabetic # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10980..109B7 ; Alphabetic # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; Alphabetic # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; Alphabetic # Lo KHAROSHTHI LETTER A 10A01..10A03 ; Alphabetic # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R 10A05..10A06 ; Alphabetic # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O @@ -916,10 +970,33 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 110B0..110B2 ; Alphabetic # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II 110B3..110B6 ; Alphabetic # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B7..110B8 ; Alphabetic # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +110D0..110E8 ; Alphabetic # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +11100..11102 ; Alphabetic # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11103..11126 ; Alphabetic # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11127..1112B ; Alphabetic # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; Alphabetic # Mc CHAKMA VOWEL SIGN E +1112D..11132 ; Alphabetic # Mn [6] CHAKMA VOWEL SIGN AI..CHAKMA AU MARK +11180..11181 ; Alphabetic # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; Alphabetic # Mc SHARADA SIGN VISARGA +11183..111B2 ; Alphabetic # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111B3..111B5 ; Alphabetic # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; Alphabetic # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF ; Alphabetic # Mc SHARADA VOWEL SIGN AU +111C1..111C4 ; Alphabetic # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +11680..116AA ; Alphabetic # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116AB ; Alphabetic # Mn TAKRI SIGN ANUSVARA +116AC ; Alphabetic # Mc TAKRI SIGN VISARGA +116AD ; Alphabetic # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; Alphabetic # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; Alphabetic # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU 12000..1236E ; Alphabetic # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM 12400..12462 ; Alphabetic # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER 13000..1342E ; Alphabetic # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 16800..16A38 ; Alphabetic # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16F00..16F44 ; Alphabetic # Lo [69] MIAO LETTER PA..MIAO LETTER HHA +16F50 ; Alphabetic # Lo MIAO LETTER NASALIZATION +16F51..16F7E ; Alphabetic # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG +16F93..16F9F ; Alphabetic # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 1B000..1B001 ; Alphabetic # Lo [2] KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE 1D400..1D454 ; Alphabetic # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G 1D456..1D49C ; Alphabetic # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A @@ -951,12 +1028,45 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 1D78A..1D7A8 ; Alphabetic # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA 1D7AA..1D7C2 ; Alphabetic # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA 1D7C4..1D7CB ; Alphabetic # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1EE00..1EE03 ; Alphabetic # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; Alphabetic # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; Alphabetic # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; Alphabetic # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; Alphabetic # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; Alphabetic # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; Alphabetic # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; Alphabetic # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; Alphabetic # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; Alphabetic # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; Alphabetic # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; Alphabetic # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; Alphabetic # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; Alphabetic # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; Alphabetic # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; Alphabetic # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; Alphabetic # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; Alphabetic # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; Alphabetic # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; Alphabetic # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; Alphabetic # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; Alphabetic # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; Alphabetic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 20000..2A6D6 ; Alphabetic # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 2A700..2B734 ; Alphabetic # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 2B740..2B81D ; Alphabetic # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2F800..2FA1D ; Alphabetic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 101539 +# Total code points: 102159 # ================================================ @@ -964,9 +1074,9 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG # Generated from: Ll + Other_Lowercase 0061..007A ; Lowercase # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z -00AA ; Lowercase # L& FEMININE ORDINAL INDICATOR +00AA ; Lowercase # Lo FEMININE ORDINAL INDICATOR 00B5 ; Lowercase # L& MICRO SIGN -00BA ; Lowercase # L& MASCULINE ORDINAL INDICATOR +00BA ; Lowercase # Lo MASCULINE ORDINAL INDICATOR 00DF..00F6 ; Lowercase # L& [24] LATIN SMALL LETTER SHARP S..LATIN SMALL LETTER O WITH DIAERESIS 00F8..00FF ; Lowercase # L& [8] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER Y WITH DIAERESIS 0101 ; Lowercase # L& LATIN SMALL LETTER A WITH MACRON @@ -1237,8 +1347,8 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 0527 ; Lowercase # L& CYRILLIC SMALL LETTER SHHA WITH DESCENDER 0561..0587 ; Lowercase # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN 1D00..1D2B ; Lowercase # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D61 ; Lowercase # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D77 ; Lowercase # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D2C..1D6A ; Lowercase # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; Lowercase # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; Lowercase # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; Lowercase # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; Lowercase # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA @@ -1386,7 +1496,9 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 1FE0..1FE7 ; Lowercase # L& [8] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI 1FF2..1FF4 ; Lowercase # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI 1FF6..1FF7 ; Lowercase # L& [2] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI -2090..2094 ; Lowercase # Lm [5] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA +2071 ; Lowercase # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; Lowercase # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; Lowercase # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T 210A ; Lowercase # L& SCRIPT SMALL G 210E..210F ; Lowercase # L& [2] PLANCK CONSTANT..PLANCK CONSTANT OVER TWO PI 2113 ; Lowercase # L& SCRIPT SMALL L @@ -1407,8 +1519,8 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 2C6C ; Lowercase # L& LATIN SMALL LETTER Z WITH DESCENDER 2C71 ; Lowercase # L& LATIN SMALL LETTER V WITH RIGHT HOOK 2C73..2C74 ; Lowercase # L& [2] LATIN SMALL LETTER W WITH HOOK..LATIN SMALL LETTER V WITH CURL -2C76..2C7C ; Lowercase # L& [7] LATIN SMALL LETTER HALF H..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; Lowercase # Lm MODIFIER LETTER CAPITAL V +2C76..2C7B ; Lowercase # L& [6] LATIN SMALL LETTER HALF H..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; Lowercase # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C81 ; Lowercase # L& COPTIC SMALL LETTER ALFA 2C83 ; Lowercase # L& COPTIC SMALL LETTER VIDA 2C85 ; Lowercase # L& COPTIC SMALL LETTER GAMMA @@ -1461,7 +1573,10 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 2CE3..2CE4 ; Lowercase # L& [2] COPTIC SMALL LETTER OLD NUBIAN WAU..COPTIC SYMBOL KAI 2CEC ; Lowercase # L& COPTIC SMALL LETTER CRYPTOGRAMMIC SHEI 2CEE ; Lowercase # L& COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF3 ; Lowercase # L& COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; Lowercase # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Lowercase # L& GEORGIAN SMALL LETTER YN +2D2D ; Lowercase # L& GEORGIAN SMALL LETTER AEN A641 ; Lowercase # L& CYRILLIC SMALL LETTER ZEMLYA A643 ; Lowercase # L& CYRILLIC SMALL LETTER DZELO A645 ; Lowercase # L& CYRILLIC SMALL LETTER REVERSED DZE @@ -1547,11 +1662,13 @@ A787 ; Lowercase # L& LATIN SMALL LETTER INSULAR T A78C ; Lowercase # L& LATIN SMALL LETTER SALTILLO A78E ; Lowercase # L& LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A791 ; Lowercase # L& LATIN SMALL LETTER N WITH DESCENDER +A793 ; Lowercase # L& LATIN SMALL LETTER C WITH BAR A7A1 ; Lowercase # L& LATIN SMALL LETTER G WITH OBLIQUE STROKE A7A3 ; Lowercase # L& LATIN SMALL LETTER K WITH OBLIQUE STROKE A7A5 ; Lowercase # L& LATIN SMALL LETTER N WITH OBLIQUE STROKE A7A7 ; Lowercase # L& LATIN SMALL LETTER R WITH OBLIQUE STROKE A7A9 ; Lowercase # L& LATIN SMALL LETTER S WITH OBLIQUE STROKE +A7F8..A7F9 ; Lowercase # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; Lowercase # L& LATIN LETTER SMALL CAPITAL TURNED M FB00..FB06 ; Lowercase # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; Lowercase # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -1586,7 +1703,7 @@ FF41..FF5A ; Lowercase # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 1D7C4..1D7C9 ; Lowercase # L& [6] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC PI SYMBOL 1D7CB ; Lowercase # L& MATHEMATICAL BOLD SMALL DIGAMMA -# Total code points: 1918 +# Total code points: 1934 # ================================================ @@ -1861,6 +1978,8 @@ FF41..FF5A ; Lowercase # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 0526 ; Uppercase # L& CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER 0531..0556 ; Uppercase # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH 10A0..10C5 ; Uppercase # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Uppercase # L& GEORGIAN CAPITAL LETTER YN +10CD ; Uppercase # L& GEORGIAN CAPITAL LETTER AEN 1E00 ; Uppercase # L& LATIN CAPITAL LETTER A WITH RING BELOW 1E02 ; Uppercase # L& LATIN CAPITAL LETTER B WITH DOT ABOVE 1E04 ; Uppercase # L& LATIN CAPITAL LETTER B WITH DOT BELOW @@ -2077,6 +2196,7 @@ FF41..FF5A ; Lowercase # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 2CE2 ; Uppercase # L& COPTIC CAPITAL LETTER OLD NUBIAN WAU 2CEB ; Uppercase # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI 2CED ; Uppercase # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA +2CF2 ; Uppercase # L& COPTIC CAPITAL LETTER BOHAIRIC KHEI A640 ; Uppercase # L& CYRILLIC CAPITAL LETTER ZEMLYA A642 ; Uppercase # L& CYRILLIC CAPITAL LETTER DZELO A644 ; Uppercase # L& CYRILLIC CAPITAL LETTER REVERSED DZE @@ -2160,11 +2280,13 @@ A786 ; Uppercase # L& LATIN CAPITAL LETTER INSULAR T A78B ; Uppercase # L& LATIN CAPITAL LETTER SALTILLO A78D ; Uppercase # L& LATIN CAPITAL LETTER TURNED H A790 ; Uppercase # L& LATIN CAPITAL LETTER N WITH DESCENDER +A792 ; Uppercase # L& LATIN CAPITAL LETTER C WITH BAR A7A0 ; Uppercase # L& LATIN CAPITAL LETTER G WITH OBLIQUE STROKE A7A2 ; Uppercase # L& LATIN CAPITAL LETTER K WITH OBLIQUE STROKE A7A4 ; Uppercase # L& LATIN CAPITAL LETTER N WITH OBLIQUE STROKE A7A6 ; Uppercase # L& LATIN CAPITAL LETTER R WITH OBLIQUE STROKE A7A8 ; Uppercase # L& LATIN CAPITAL LETTER S WITH OBLIQUE STROKE +A7AA ; Uppercase # L& LATIN CAPITAL LETTER H WITH HOOK FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z 10400..10427 ; Uppercase # L& [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW 1D400..1D419 ; Uppercase # L& [26] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL BOLD CAPITAL Z @@ -2199,7 +2321,7 @@ FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH 1D790..1D7A8 ; Uppercase # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA 1D7CA ; Uppercase # L& MATHEMATICAL BOLD CAPITAL DIGAMMA -# Total code points: 1478 +# Total code points: 1483 # ================================================ @@ -2209,9 +2331,9 @@ FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH 0041..005A ; Cased # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z 0061..007A ; Cased # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z -00AA ; Cased # L& FEMININE ORDINAL INDICATOR +00AA ; Cased # Lo FEMININE ORDINAL INDICATOR 00B5 ; Cased # L& MICRO SIGN -00BA ; Cased # L& MASCULINE ORDINAL INDICATOR +00BA ; Cased # Lo MASCULINE ORDINAL INDICATOR 00C0..00D6 ; Cased # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS 00D8..00F6 ; Cased # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS 00F8..01BA ; Cased # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL @@ -2236,9 +2358,11 @@ FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH 0531..0556 ; Cased # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH 0561..0587 ; Cased # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN 10A0..10C5 ; Cased # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Cased # L& GEORGIAN CAPITAL LETTER YN +10CD ; Cased # L& GEORGIAN CAPITAL LETTER AEN 1D00..1D2B ; Cased # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D61 ; Cased # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D77 ; Cased # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D2C..1D6A ; Cased # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; Cased # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; Cased # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; Cased # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; Cased # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA @@ -2261,7 +2385,9 @@ FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH 1FE0..1FEC ; Cased # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA 1FF2..1FF4 ; Cased # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI 1FF6..1FFC ; Cased # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI -2090..2094 ; Cased # Lm [5] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA +2071 ; Cased # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; Cased # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; Cased # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T 2102 ; Cased # L& DOUBLE-STRUCK CAPITAL C 2107 ; Cased # L& EULER CONSTANT 210A..2113 ; Cased # L& [10] SCRIPT SMALL G..SCRIPT SMALL L @@ -2281,19 +2407,23 @@ FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH 24B6..24E9 ; Cased # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z 2C00..2C2E ; Cased # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 2C30..2C5E ; Cased # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE -2C60..2C7C ; Cased # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; Cased # Lm MODIFIER LETTER CAPITAL V +2C60..2C7B ; Cased # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; Cased # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; Cased # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI 2CEB..2CEE ; Cased # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; Cased # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; Cased # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Cased # L& GEORGIAN SMALL LETTER YN +2D2D ; Cased # L& GEORGIAN SMALL LETTER AEN A640..A66D ; Cased # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O A680..A697 ; Cased # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE A722..A76F ; Cased # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON A770 ; Cased # Lm MODIFIER LETTER US A771..A787 ; Cased # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A78B..A78E ; Cased # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A791 ; Cased # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER -A7A0..A7A9 ; Cased # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE +A790..A793 ; Cased # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR +A7A0..A7AA ; Cased # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK +A7F8..A7F9 ; Cased # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; Cased # L& LATIN LETTER SMALL CAPITAL TURNED M FB00..FB06 ; Cased # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; Cased # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -2331,7 +2461,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1D7AA..1D7C2 ; Cased # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA 1D7C4..1D7CB ; Cased # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA -# Total code points: 3427 +# Total code points: 3448 # ================================================ @@ -2377,7 +2507,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 05C4..05C5 ; Case_Ignorable # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT 05C7 ; Case_Ignorable # Mn HEBREW POINT QAMATS QATAN 05F4 ; Case_Ignorable # Po HEBREW PUNCTUATION GERSHAYIM -0600..0603 ; Case_Ignorable # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA +0600..0604 ; Case_Ignorable # Cf [5] ARABIC NUMBER SIGN..ARABIC SIGN SAMVAT 0610..061A ; Case_Ignorable # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA 0640 ; Case_Ignorable # Lm ARABIC TATWEEL 064B..065F ; Case_Ignorable # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW @@ -2403,6 +2533,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 0828 ; Case_Ignorable # Lm SAMARITAN MODIFIER LETTER I 0829..082D ; Case_Ignorable # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; Case_Ignorable # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +08E4..08FE ; Case_Ignorable # Mn [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT 0900..0902 ; Case_Ignorable # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 093A ; Case_Ignorable # Mn DEVANAGARI VOWEL SIGN OE 093C ; Case_Ignorable # Mn DEVANAGARI SIGN NUKTA @@ -2492,7 +2623,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1732..1734 ; Case_Ignorable # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD 1752..1753 ; Case_Ignorable # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U 1772..1773 ; Case_Ignorable # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U -17B4..17B5 ; Case_Ignorable # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B4..17B5 ; Case_Ignorable # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 17B7..17BD ; Case_Ignorable # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA 17C6 ; Case_Ignorable # Mn KHMER SIGN NIKAHIT 17C9..17D3 ; Case_Ignorable # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT @@ -2523,6 +2654,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1B80..1B81 ; Case_Ignorable # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR 1BA2..1BA5 ; Case_Ignorable # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU 1BA8..1BA9 ; Case_Ignorable # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAB ; Case_Ignorable # Mn SUNDANESE SIGN VIRAMA 1BE6 ; Case_Ignorable # Mn BATAK SIGN TOMPI 1BE8..1BE9 ; Case_Ignorable # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE 1BED ; Case_Ignorable # Mn BATAK VOWEL SIGN KARO O @@ -2534,7 +2666,8 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1CD4..1CE0 ; Case_Ignorable # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA 1CE2..1CE8 ; Case_Ignorable # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL 1CED ; Case_Ignorable # Mn VEDIC SIGN TIRYAK -1D2C..1D61 ; Case_Ignorable # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI +1CF4 ; Case_Ignorable # Mn VEDIC TONE CANDRA ABOVE +1D2C..1D6A ; Case_Ignorable # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI 1D78 ; Case_Ignorable # Lm MODIFIER LETTER CYRILLIC EN 1D9B..1DBF ; Case_Ignorable # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA 1DC0..1DE6 ; Case_Ignorable # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z @@ -2561,14 +2694,14 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 20E1 ; Case_Ignorable # Mn COMBINING LEFT RIGHT ARROW ABOVE 20E2..20E4 ; Case_Ignorable # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE 20E5..20F0 ; Case_Ignorable # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE -2C7D ; Case_Ignorable # Lm MODIFIER LETTER CAPITAL V +2C7C..2C7D ; Case_Ignorable # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2CEF..2CF1 ; Case_Ignorable # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS 2D6F ; Case_Ignorable # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D7F ; Case_Ignorable # Mn TIFINAGH CONSONANT JOINER 2DE0..2DFF ; Case_Ignorable # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS 2E2F ; Case_Ignorable # Lm VERTICAL TILDE 3005 ; Case_Ignorable # Lm IDEOGRAPHIC ITERATION MARK -302A..302F ; Case_Ignorable # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK +302A..302D ; Case_Ignorable # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK 3031..3035 ; Case_Ignorable # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF 303B ; Case_Ignorable # Lm VERTICAL IDEOGRAPHIC ITERATION MARK 3099..309A ; Case_Ignorable # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK @@ -2580,8 +2713,9 @@ A4F8..A4FD ; Case_Ignorable # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER T A60C ; Case_Ignorable # Lm VAI SYLLABLE LENGTHENER A66F ; Case_Ignorable # Mn COMBINING CYRILLIC VZMET A670..A672 ; Case_Ignorable # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN -A67C..A67D ; Case_Ignorable # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A674..A67D ; Case_Ignorable # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK A67F ; Case_Ignorable # Lm CYRILLIC PAYEROK +A69F ; Case_Ignorable # Mn COMBINING CYRILLIC LETTER IOTIFIED E A6F0..A6F1 ; Case_Ignorable # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS A700..A716 ; Case_Ignorable # Sk [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR A717..A71F ; Case_Ignorable # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK @@ -2589,6 +2723,7 @@ A720..A721 ; Case_Ignorable # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE.. A770 ; Case_Ignorable # Lm MODIFIER LETTER US A788 ; Case_Ignorable # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; Case_Ignorable # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN +A7F8..A7F9 ; Case_Ignorable # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A802 ; Case_Ignorable # Mn SYLOTI NAGRI SIGN DVISVARA A806 ; Case_Ignorable # Mn SYLOTI NAGRI SIGN HASANTA A80B ; Case_Ignorable # Mn SYLOTI NAGRI SIGN ANUSVARA @@ -2614,6 +2749,9 @@ AAB7..AAB8 ; Case_Ignorable # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA AABE..AABF ; Case_Ignorable # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK AAC1 ; Case_Ignorable # Mn TAI VIET TONE MAI THO AADD ; Case_Ignorable # Lm TAI VIET SYMBOL SAM +AAEC..AAED ; Case_Ignorable # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAF3..AAF4 ; Case_Ignorable # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF6 ; Case_Ignorable # Mn MEETEI MAYEK VIRAMA ABE5 ; Case_Ignorable # Mn MEETEI MAYEK VOWEL SIGN ANAP ABE8 ; Case_Ignorable # Mn MEETEI MAYEK VOWEL SIGN UNAP ABED ; Case_Ignorable # Mn MEETEI MAYEK APUN IYEK @@ -2646,6 +2784,17 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 110B3..110B6 ; Case_Ignorable # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B9..110BA ; Case_Ignorable # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA 110BD ; Case_Ignorable # Cf KAITHI NUMBER SIGN +11100..11102 ; Case_Ignorable # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; Case_Ignorable # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112D..11134 ; Case_Ignorable # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11180..11181 ; Case_Ignorable # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +111B6..111BE ; Case_Ignorable # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +116AB ; Case_Ignorable # Mn TAKRI SIGN ANUSVARA +116AD ; Case_Ignorable # Mn TAKRI VOWEL SIGN AA +116B0..116B5 ; Case_Ignorable # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B7 ; Case_Ignorable # Mn TAKRI SIGN NUKTA +16F8F..16F92 ; Case_Ignorable # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16F93..16F9F ; Case_Ignorable # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 1D167..1D169 ; Case_Ignorable # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 1D173..1D17A ; Case_Ignorable # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE 1D17B..1D182 ; Case_Ignorable # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE @@ -2656,7 +2805,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1692 +# Total code points: 1799 # ================================================ @@ -2932,6 +3081,8 @@ E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELEC 0526 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER 0531..0556 ; Changes_When_Lowercased # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH 10A0..10C5 ; Changes_When_Lowercased # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Changes_When_Lowercased # L& GEORGIAN CAPITAL LETTER YN +10CD ; Changes_When_Lowercased # L& GEORGIAN CAPITAL LETTER AEN 1E00 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH RING BELOW 1E02 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER B WITH DOT ABOVE 1E04 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER B WITH DOT BELOW @@ -3141,6 +3292,7 @@ E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELEC 2CE2 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER OLD NUBIAN WAU 2CEB ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI 2CED ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA +2CF2 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER BOHAIRIC KHEI A640 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER ZEMLYA A642 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER DZELO A644 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER REVERSED DZE @@ -3224,15 +3376,17 @@ A786 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER INSULAR A78B ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER SALTILLO A78D ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER TURNED H A790 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER N WITH DESCENDER +A792 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER C WITH BAR A7A0 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER G WITH OBLIQUE STROKE A7A2 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER K WITH OBLIQUE STROKE A7A4 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER N WITH OBLIQUE STROKE A7A6 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER R WITH OBLIQUE STROKE A7A8 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER S WITH OBLIQUE STROKE +A7AA ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER H WITH HOOK FF21..FF3A ; Changes_When_Lowercased # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z 10400..10427 ; Changes_When_Lowercased # L& [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW -# Total code points: 1038 +# Total code points: 1043 # ================================================ @@ -3390,7 +3544,7 @@ FF21..FF3A ; Changes_When_Lowercased # L& [26] FULLWIDTH LATIN CAPITAL LETTE 025B ; Changes_When_Uppercased # L& LATIN SMALL LETTER OPEN E 0260 ; Changes_When_Uppercased # L& LATIN SMALL LETTER G WITH HOOK 0263 ; Changes_When_Uppercased # L& LATIN SMALL LETTER GAMMA -0265 ; Changes_When_Uppercased # L& LATIN SMALL LETTER TURNED H +0265..0266 ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER TURNED H..LATIN SMALL LETTER H WITH HOOK 0268..0269 ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER I WITH STROKE..LATIN SMALL LETTER IOTA 026B ; Changes_When_Uppercased # L& LATIN SMALL LETTER L WITH MIDDLE TILDE 026F ; Changes_When_Uppercased # L& LATIN SMALL LETTER TURNED M @@ -3731,7 +3885,10 @@ FF21..FF3A ; Changes_When_Lowercased # L& [26] FULLWIDTH LATIN CAPITAL LETTE 2CE3 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER OLD NUBIAN WAU 2CEC ; Changes_When_Uppercased # L& COPTIC SMALL LETTER CRYPTOGRAMMIC SHEI 2CEE ; Changes_When_Uppercased # L& COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF3 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; Changes_When_Uppercased # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Changes_When_Uppercased # L& GEORGIAN SMALL LETTER YN +2D2D ; Changes_When_Uppercased # L& GEORGIAN SMALL LETTER AEN A641 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER ZEMLYA A643 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER DZELO A645 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER REVERSED DZE @@ -3814,6 +3971,7 @@ A785 ; Changes_When_Uppercased # L& LATIN SMALL LETTER INSULAR S A787 ; Changes_When_Uppercased # L& LATIN SMALL LETTER INSULAR T A78C ; Changes_When_Uppercased # L& LATIN SMALL LETTER SALTILLO A791 ; Changes_When_Uppercased # L& LATIN SMALL LETTER N WITH DESCENDER +A793 ; Changes_When_Uppercased # L& LATIN SMALL LETTER C WITH BAR A7A1 ; Changes_When_Uppercased # L& LATIN SMALL LETTER G WITH OBLIQUE STROKE A7A3 ; Changes_When_Uppercased # L& LATIN SMALL LETTER K WITH OBLIQUE STROKE A7A5 ; Changes_When_Uppercased # L& LATIN SMALL LETTER N WITH OBLIQUE STROKE @@ -3824,7 +3982,7 @@ FB13..FB17 ; Changes_When_Uppercased # L& [5] ARMENIAN SMALL LIGATURE MEN N FF41..FF5A ; Changes_When_Uppercased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z 10428..1044F ; Changes_When_Uppercased # L& [40] DESERET SMALL LETTER LONG I..DESERET SMALL LETTER EW -# Total code points: 1121 +# Total code points: 1126 # ================================================ @@ -3983,7 +4141,7 @@ FF41..FF5A ; Changes_When_Uppercased # L& [26] FULLWIDTH LATIN SMALL LETTER 025B ; Changes_When_Titlecased # L& LATIN SMALL LETTER OPEN E 0260 ; Changes_When_Titlecased # L& LATIN SMALL LETTER G WITH HOOK 0263 ; Changes_When_Titlecased # L& LATIN SMALL LETTER GAMMA -0265 ; Changes_When_Titlecased # L& LATIN SMALL LETTER TURNED H +0265..0266 ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER TURNED H..LATIN SMALL LETTER H WITH HOOK 0268..0269 ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER I WITH STROKE..LATIN SMALL LETTER IOTA 026B ; Changes_When_Titlecased # L& LATIN SMALL LETTER L WITH MIDDLE TILDE 026F ; Changes_When_Titlecased # L& LATIN SMALL LETTER TURNED M @@ -4324,7 +4482,10 @@ FF41..FF5A ; Changes_When_Uppercased # L& [26] FULLWIDTH LATIN SMALL LETTER 2CE3 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER OLD NUBIAN WAU 2CEC ; Changes_When_Titlecased # L& COPTIC SMALL LETTER CRYPTOGRAMMIC SHEI 2CEE ; Changes_When_Titlecased # L& COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF3 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; Changes_When_Titlecased # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Changes_When_Titlecased # L& GEORGIAN SMALL LETTER YN +2D2D ; Changes_When_Titlecased # L& GEORGIAN SMALL LETTER AEN A641 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER ZEMLYA A643 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER DZELO A645 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER REVERSED DZE @@ -4407,6 +4568,7 @@ A785 ; Changes_When_Titlecased # L& LATIN SMALL LETTER INSULAR S A787 ; Changes_When_Titlecased # L& LATIN SMALL LETTER INSULAR T A78C ; Changes_When_Titlecased # L& LATIN SMALL LETTER SALTILLO A791 ; Changes_When_Titlecased # L& LATIN SMALL LETTER N WITH DESCENDER +A793 ; Changes_When_Titlecased # L& LATIN SMALL LETTER C WITH BAR A7A1 ; Changes_When_Titlecased # L& LATIN SMALL LETTER G WITH OBLIQUE STROKE A7A3 ; Changes_When_Titlecased # L& LATIN SMALL LETTER K WITH OBLIQUE STROKE A7A5 ; Changes_When_Titlecased # L& LATIN SMALL LETTER N WITH OBLIQUE STROKE @@ -4417,7 +4579,7 @@ FB13..FB17 ; Changes_When_Titlecased # L& [5] ARMENIAN SMALL LIGATURE MEN N FF41..FF5A ; Changes_When_Titlecased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z 10428..1044F ; Changes_When_Titlecased # L& [40] DESERET SMALL LETTER LONG I..DESERET SMALL LETTER EW -# Total code points: 1094 +# Total code points: 1099 # ================================================ @@ -4700,6 +4862,8 @@ FF41..FF5A ; Changes_When_Titlecased # L& [26] FULLWIDTH LATIN SMALL LETTER 0531..0556 ; Changes_When_Casefolded # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH 0587 ; Changes_When_Casefolded # L& ARMENIAN SMALL LIGATURE ECH YIWN 10A0..10C5 ; Changes_When_Casefolded # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Changes_When_Casefolded # L& GEORGIAN CAPITAL LETTER YN +10CD ; Changes_When_Casefolded # L& GEORGIAN CAPITAL LETTER AEN 1E00 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH RING BELOW 1E02 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER B WITH DOT ABOVE 1E04 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER B WITH DOT BELOW @@ -4911,6 +5075,7 @@ FF41..FF5A ; Changes_When_Titlecased # L& [26] FULLWIDTH LATIN SMALL LETTER 2CE2 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER OLD NUBIAN WAU 2CEB ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI 2CED ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA +2CF2 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER BOHAIRIC KHEI A640 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER ZEMLYA A642 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER DZELO A644 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER REVERSED DZE @@ -4994,17 +5159,19 @@ A786 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER INSULAR A78B ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER SALTILLO A78D ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER TURNED H A790 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER N WITH DESCENDER +A792 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER C WITH BAR A7A0 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER G WITH OBLIQUE STROKE A7A2 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER K WITH OBLIQUE STROKE A7A4 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER N WITH OBLIQUE STROKE A7A6 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER R WITH OBLIQUE STROKE A7A8 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER S WITH OBLIQUE STROKE +A7AA ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER H WITH HOOK FB00..FB06 ; Changes_When_Casefolded # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; Changes_When_Casefolded # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH FF21..FF3A ; Changes_When_Casefolded # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z 10400..10427 ; Changes_When_Casefolded # L& [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW -# Total code points: 1102 +# Total code points: 1107 # ================================================ @@ -5033,7 +5200,7 @@ FF21..FF3A ; Changes_When_Casefolded # L& [26] FULLWIDTH LATIN CAPITAL LETTE 025B ; Changes_When_Casemapped # L& LATIN SMALL LETTER OPEN E 0260 ; Changes_When_Casemapped # L& LATIN SMALL LETTER G WITH HOOK 0263 ; Changes_When_Casemapped # L& LATIN SMALL LETTER GAMMA -0265 ; Changes_When_Casemapped # L& LATIN SMALL LETTER TURNED H +0265..0266 ; Changes_When_Casemapped # L& [2] LATIN SMALL LETTER TURNED H..LATIN SMALL LETTER H WITH HOOK 0268..0269 ; Changes_When_Casemapped # L& [2] LATIN SMALL LETTER I WITH STROKE..LATIN SMALL LETTER IOTA 026B ; Changes_When_Casemapped # L& LATIN SMALL LETTER L WITH MIDDLE TILDE 026F ; Changes_When_Casemapped # L& LATIN SMALL LETTER TURNED M @@ -5061,6 +5228,8 @@ FF21..FF3A ; Changes_When_Casefolded # L& [26] FULLWIDTH LATIN CAPITAL LETTE 0531..0556 ; Changes_When_Casemapped # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH 0561..0587 ; Changes_When_Casemapped # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN 10A0..10C5 ; Changes_When_Casemapped # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Changes_When_Casemapped # L& GEORGIAN CAPITAL LETTER YN +10CD ; Changes_When_Casemapped # L& GEORGIAN CAPITAL LETTER AEN 1D79 ; Changes_When_Casemapped # L& LATIN SMALL LETTER INSULAR G 1D7D ; Changes_When_Casemapped # L& LATIN SMALL LETTER P WITH STROKE 1E00..1E9B ; Changes_When_Casemapped # L& [156] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER LONG S WITH DOT ABOVE @@ -5098,22 +5267,25 @@ FF21..FF3A ; Changes_When_Casefolded # L& [26] FULLWIDTH LATIN CAPITAL LETTE 2C75..2C76 ; Changes_When_Casemapped # L& [2] LATIN CAPITAL LETTER HALF H..LATIN SMALL LETTER HALF H 2C7E..2CE3 ; Changes_When_Casemapped # L& [102] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SMALL LETTER OLD NUBIAN WAU 2CEB..2CEE ; Changes_When_Casemapped # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; Changes_When_Casemapped # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; Changes_When_Casemapped # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Changes_When_Casemapped # L& GEORGIAN SMALL LETTER YN +2D2D ; Changes_When_Casemapped # L& GEORGIAN SMALL LETTER AEN A640..A66D ; Changes_When_Casemapped # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O A680..A697 ; Changes_When_Casemapped # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE A722..A72F ; Changes_When_Casemapped # L& [14] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CUATRILLO WITH COMMA A732..A76F ; Changes_When_Casemapped # L& [62] LATIN CAPITAL LETTER AA..LATIN SMALL LETTER CON A779..A787 ; Changes_When_Casemapped # L& [15] LATIN CAPITAL LETTER INSULAR D..LATIN SMALL LETTER INSULAR T A78B..A78D ; Changes_When_Casemapped # L& [3] LATIN CAPITAL LETTER SALTILLO..LATIN CAPITAL LETTER TURNED H -A790..A791 ; Changes_When_Casemapped # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER -A7A0..A7A9 ; Changes_When_Casemapped # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE +A790..A793 ; Changes_When_Casemapped # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR +A7A0..A7AA ; Changes_When_Casemapped # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK FB00..FB06 ; Changes_When_Casemapped # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; Changes_When_Casemapped # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH FF21..FF3A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z 10400..1044F ; Changes_When_Casemapped # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW -# Total code points: 2128 +# Total code points: 2138 # ================================================ @@ -5128,9 +5300,9 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 0041..005A ; ID_Start # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z 0061..007A ; ID_Start # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z -00AA ; ID_Start # L& FEMININE ORDINAL INDICATOR +00AA ; ID_Start # Lo FEMININE ORDINAL INDICATOR 00B5 ; ID_Start # L& MICRO SIGN -00BA ; ID_Start # L& MASCULINE ORDINAL INDICATOR +00BA ; ID_Start # Lo MASCULINE ORDINAL INDICATOR 00C0..00D6 ; ID_Start # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS 00D8..00F6 ; ID_Start # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS 00F8..01BA ; ID_Start # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL @@ -5184,6 +5356,8 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 0824 ; ID_Start # Lm SAMARITAN MODIFIER LETTER SHORT A 0828 ; ID_Start # Lm SAMARITAN MODIFIER LETTER I 0840..0858 ; ID_Start # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +08A0 ; ID_Start # Lo ARABIC LETTER BEH WITH SMALL V BELOW +08A2..08AC ; ID_Start # Lo [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH 0904..0939 ; ID_Start # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA 093D ; ID_Start # Lo DEVANAGARI SIGN AVAGRAHA 0950 ; ID_Start # Lo DEVANAGARI OM @@ -5291,7 +5465,7 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 0EBD ; ID_Start # Lo LAO SEMIVOWEL SIGN NYO 0EC0..0EC4 ; ID_Start # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI 0EC6 ; ID_Start # Lm LAO KO LA -0EDC..0EDD ; ID_Start # Lo [2] LAO HO NO..LAO HO MO +0EDC..0EDF ; ID_Start # Lo [4] LAO HO NO..LAO LETTER KHMU NYO 0F00 ; ID_Start # Lo TIBETAN SYLLABLE OM 0F40..0F47 ; ID_Start # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA 0F49..0F6C ; ID_Start # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA @@ -5306,9 +5480,11 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 1075..1081 ; ID_Start # Lo [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA 108E ; ID_Start # Lo MYANMAR LETTER RUMAI PALAUNG FA 10A0..10C5 ; ID_Start # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; ID_Start # L& GEORGIAN CAPITAL LETTER YN +10CD ; ID_Start # L& GEORGIAN CAPITAL LETTER AEN 10D0..10FA ; ID_Start # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN 10FC ; ID_Start # Lm MODIFIER LETTER GEORGIAN NAR -1100..1248 ; ID_Start # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +10FD..1248 ; ID_Start # Lo [332] GEORGIAN LETTER AEN..ETHIOPIC SYLLABLE QWA 124A..124D ; ID_Start # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE 1250..1256 ; ID_Start # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO 1258 ; ID_Start # Lo ETHIOPIC SYLLABLE QHWA @@ -5358,16 +5534,17 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 1B45..1B4B ; ID_Start # Lo [7] BALINESE LETTER KAF SASAK..BALINESE LETTER ASYURA SASAK 1B83..1BA0 ; ID_Start # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA 1BAE..1BAF ; ID_Start # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA -1BC0..1BE5 ; ID_Start # Lo [38] BATAK LETTER A..BATAK LETTER U +1BBA..1BE5 ; ID_Start # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U 1C00..1C23 ; ID_Start # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A 1C4D..1C4F ; ID_Start # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA 1C5A..1C77 ; ID_Start # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; ID_Start # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD 1CE9..1CEC ; ID_Start # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL 1CEE..1CF1 ; ID_Start # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA +1CF5..1CF6 ; ID_Start # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 1D00..1D2B ; ID_Start # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D61 ; ID_Start # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D77 ; ID_Start # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D2C..1D6A ; ID_Start # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; ID_Start # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; ID_Start # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; ID_Start # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; ID_Start # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA @@ -5415,12 +5592,15 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 2185..2188 ; ID_Start # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND 2C00..2C2E ; ID_Start # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 2C30..2C5E ; ID_Start # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE -2C60..2C7C ; ID_Start # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; ID_Start # Lm MODIFIER LETTER CAPITAL V +2C60..2C7B ; ID_Start # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; ID_Start # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; ID_Start # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI 2CEB..2CEE ; ID_Start # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; ID_Start # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; ID_Start # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE -2D30..2D65 ; ID_Start # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D27 ; ID_Start # L& GEORGIAN SMALL LETTER YN +2D2D ; ID_Start # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; ID_Start # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO 2D6F ; ID_Start # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D80..2D96 ; ID_Start # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE 2DA0..2DA6 ; ID_Start # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO @@ -5451,7 +5631,7 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 31A0..31BA ; ID_Start # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY 31F0..31FF ; ID_Start # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3400..4DB5 ; ID_Start # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 -4E00..9FCB ; ID_Start # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB +4E00..9FCC ; ID_Start # Lo [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC A000..A014 ; ID_Start # Lo [21] YI SYLLABLE IT..YI SYLLABLE E A015 ; ID_Start # Lm YI SYLLABLE WU A016..A48C ; ID_Start # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR @@ -5473,8 +5653,9 @@ A770 ; ID_Start # Lm MODIFIER LETTER US A771..A787 ; ID_Start # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A788 ; ID_Start # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; ID_Start # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A791 ; ID_Start # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER -A7A0..A7A9 ; ID_Start # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE +A790..A793 ; ID_Start # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR +A7A0..A7AA ; ID_Start # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK +A7F8..A7F9 ; ID_Start # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; ID_Start # L& LATIN LETTER SMALL CAPITAL TURNED M A7FB..A801 ; ID_Start # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I A803..A805 ; ID_Start # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O @@ -5504,6 +5685,9 @@ AAC0 ; ID_Start # Lo TAI VIET TONE MAI NUENG AAC2 ; ID_Start # Lo TAI VIET TONE MAI SONG AADB..AADC ; ID_Start # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG AADD ; ID_Start # Lm TAI VIET SYMBOL SAM +AAE0..AAEA ; ID_Start # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAF2 ; ID_Start # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; ID_Start # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK AB01..AB06 ; ID_Start # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO AB09..AB0E ; ID_Start # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO AB11..AB16 ; ID_Start # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO @@ -5513,8 +5697,7 @@ ABC0..ABE2 ; ID_Start # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER AC00..D7A3 ; ID_Start # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH D7B0..D7C6 ; ID_Start # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E D7CB..D7FB ; ID_Start # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH -F900..FA2D ; ID_Start # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; ID_Start # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +F900..FA6D ; ID_Start # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; ID_Start # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB00..FB06 ; ID_Start # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; ID_Start # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -5572,6 +5755,8 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1083F..10855 ; ID_Start # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW 10900..10915 ; ID_Start # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; ID_Start # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10980..109B7 ; ID_Start # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; ID_Start # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; ID_Start # Lo KHAROSHTHI LETTER A 10A10..10A13 ; ID_Start # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA 10A15..10A17 ; ID_Start # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA @@ -5583,10 +5768,18 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10C00..10C48 ; ID_Start # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH 11003..11037 ; ID_Start # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA 11083..110AF ; ID_Start # Lo [45] KAITHI LETTER A..KAITHI LETTER HA +110D0..110E8 ; ID_Start # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +11103..11126 ; ID_Start # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11183..111B2 ; ID_Start # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111C1..111C4 ; ID_Start # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +11680..116AA ; ID_Start # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA 12000..1236E ; ID_Start # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM 12400..12462 ; ID_Start # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER 13000..1342E ; ID_Start # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 16800..16A38 ; ID_Start # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16F00..16F44 ; ID_Start # Lo [69] MIAO LETTER PA..MIAO LETTER HHA +16F50 ; ID_Start # Lo MIAO LETTER NASALIZATION +16F93..16F9F ; ID_Start # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 1B000..1B001 ; ID_Start # Lo [2] KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE 1D400..1D454 ; ID_Start # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G 1D456..1D49C ; ID_Start # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A @@ -5618,12 +5811,45 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1D78A..1D7A8 ; ID_Start # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA 1D7AA..1D7C2 ; ID_Start # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA 1D7C4..1D7CB ; ID_Start # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1EE00..1EE03 ; ID_Start # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; ID_Start # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; ID_Start # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; ID_Start # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; ID_Start # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; ID_Start # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; ID_Start # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; ID_Start # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; ID_Start # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; ID_Start # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; ID_Start # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; ID_Start # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; ID_Start # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; ID_Start # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; ID_Start # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; ID_Start # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; ID_Start # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; ID_Start # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; ID_Start # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; ID_Start # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; ID_Start # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; ID_Start # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; ID_Start # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; ID_Start # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; ID_Start # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; ID_Start # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; ID_Start # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; ID_Start # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; ID_Start # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; ID_Start # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; ID_Start # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; ID_Start # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; ID_Start # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 20000..2A6D6 ; ID_Start # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 2A700..2B734 ; ID_Start # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 2B740..2B81D ; ID_Start # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2F800..2FA1D ; ID_Start # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 100747 +# Total code points: 101240 # ================================================ @@ -5641,10 +5867,10 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 0041..005A ; ID_Continue # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z 005F ; ID_Continue # Pc LOW LINE 0061..007A ; ID_Continue # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z -00AA ; ID_Continue # L& FEMININE ORDINAL INDICATOR +00AA ; ID_Continue # Lo FEMININE ORDINAL INDICATOR 00B5 ; ID_Continue # L& MICRO SIGN 00B7 ; ID_Continue # Po MIDDLE DOT -00BA ; ID_Continue # L& MASCULINE ORDINAL INDICATOR +00BA ; ID_Continue # Lo MASCULINE ORDINAL INDICATOR 00C0..00D6 ; ID_Continue # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS 00D8..00F6 ; ID_Continue # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS 00F8..01BA ; ID_Continue # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL @@ -5725,6 +5951,9 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 0829..082D ; ID_Continue # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0840..0858 ; ID_Continue # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 0859..085B ; ID_Continue # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +08A0 ; ID_Continue # Lo ARABIC LETTER BEH WITH SMALL V BELOW +08A2..08AC ; ID_Continue # Lo [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH +08E4..08FE ; ID_Continue # Mn [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT 0900..0902 ; ID_Continue # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 0903 ; ID_Continue # Mc DEVANAGARI SIGN VISARGA 0904..0939 ; ID_Continue # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA @@ -5946,7 +6175,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 0EC6 ; ID_Continue # Lm LAO KO LA 0EC8..0ECD ; ID_Continue # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA 0ED0..0ED9 ; ID_Continue # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE -0EDC..0EDD ; ID_Continue # Lo [2] LAO HO NO..LAO HO MO +0EDC..0EDF ; ID_Continue # Lo [4] LAO HO NO..LAO LETTER KHMU NYO 0F00 ; ID_Continue # Lo TIBETAN SYLLABLE OM 0F18..0F19 ; ID_Continue # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS 0F20..0F29 ; ID_Continue # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE @@ -5998,9 +6227,11 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 109A..109C ; ID_Continue # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A 109D ; ID_Continue # Mn MYANMAR VOWEL SIGN AITON AI 10A0..10C5 ; ID_Continue # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; ID_Continue # L& GEORGIAN CAPITAL LETTER YN +10CD ; ID_Continue # L& GEORGIAN CAPITAL LETTER AEN 10D0..10FA ; ID_Continue # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN 10FC ; ID_Continue # Lm MODIFIER LETTER GEORGIAN NAR -1100..1248 ; ID_Continue # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +10FD..1248 ; ID_Continue # Lo [332] GEORGIAN LETTER AEN..ETHIOPIC SYLLABLE QWA 124A..124D ; ID_Continue # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE 1250..1256 ; ID_Continue # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO 1258 ; ID_Continue # Lo ETHIOPIC SYLLABLE QHWA @@ -6036,6 +6267,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 176E..1770 ; ID_Continue # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA 1772..1773 ; ID_Continue # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U 1780..17B3 ; ID_Continue # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU +17B4..17B5 ; ID_Continue # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 17B6 ; ID_Continue # Mc KHMER VOWEL SIGN AA 17B7..17BD ; ID_Continue # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA 17BE..17C5 ; ID_Continue # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU @@ -6114,9 +6346,11 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1BA6..1BA7 ; ID_Continue # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG 1BA8..1BA9 ; ID_Continue # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG 1BAA ; ID_Continue # Mc SUNDANESE SIGN PAMAAEH +1BAB ; ID_Continue # Mn SUNDANESE SIGN VIRAMA +1BAC..1BAD ; ID_Continue # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA 1BAE..1BAF ; ID_Continue # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA 1BB0..1BB9 ; ID_Continue # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE -1BC0..1BE5 ; ID_Continue # Lo [38] BATAK LETTER A..BATAK LETTER U +1BBA..1BE5 ; ID_Continue # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U 1BE6 ; ID_Continue # Mn BATAK SIGN TOMPI 1BE7 ; ID_Continue # Mc BATAK VOWEL SIGN E 1BE8..1BE9 ; ID_Continue # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE @@ -6142,10 +6376,12 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1CE9..1CEC ; ID_Continue # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL 1CED ; ID_Continue # Mn VEDIC SIGN TIRYAK 1CEE..1CF1 ; ID_Continue # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA -1CF2 ; ID_Continue # Mc VEDIC SIGN ARDHAVISARGA +1CF2..1CF3 ; ID_Continue # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF4 ; ID_Continue # Mn VEDIC TONE CANDRA ABOVE +1CF5..1CF6 ; ID_Continue # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 1D00..1D2B ; ID_Continue # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D61 ; ID_Continue # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D77 ; ID_Continue # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D2C..1D6A ; ID_Continue # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; ID_Continue # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; ID_Continue # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; ID_Continue # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; ID_Continue # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA @@ -6200,13 +6436,16 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 2185..2188 ; ID_Continue # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND 2C00..2C2E ; ID_Continue # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 2C30..2C5E ; ID_Continue # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE -2C60..2C7C ; ID_Continue # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; ID_Continue # Lm MODIFIER LETTER CAPITAL V +2C60..2C7B ; ID_Continue # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; ID_Continue # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; ID_Continue # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI 2CEB..2CEE ; ID_Continue # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA 2CEF..2CF1 ; ID_Continue # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2CF2..2CF3 ; ID_Continue # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; ID_Continue # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE -2D30..2D65 ; ID_Continue # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D27 ; ID_Continue # L& GEORGIAN SMALL LETTER YN +2D2D ; ID_Continue # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; ID_Continue # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO 2D6F ; ID_Continue # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D7F ; ID_Continue # Mn TIFINAGH CONSONANT JOINER 2D80..2D96 ; ID_Continue # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE @@ -6223,7 +6462,8 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 3006 ; ID_Continue # Lo IDEOGRAPHIC CLOSING MARK 3007 ; ID_Continue # Nl IDEOGRAPHIC NUMBER ZERO 3021..3029 ; ID_Continue # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE -302A..302F ; ID_Continue # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK +302A..302D ; ID_Continue # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; ID_Continue # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK 3031..3035 ; ID_Continue # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF 3038..303A ; ID_Continue # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY 303B ; ID_Continue # Lm VERTICAL IDEOGRAPHIC ITERATION MARK @@ -6241,7 +6481,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 31A0..31BA ; ID_Continue # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY 31F0..31FF ; ID_Continue # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3400..4DB5 ; ID_Continue # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 -4E00..9FCB ; ID_Continue # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB +4E00..9FCC ; ID_Continue # Lo [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC A000..A014 ; ID_Continue # Lo [21] YI SYLLABLE IT..YI SYLLABLE E A015 ; ID_Continue # Lm YI SYLLABLE WU A016..A48C ; ID_Continue # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR @@ -6255,9 +6495,10 @@ A62A..A62B ; ID_Continue # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE A640..A66D ; ID_Continue # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O A66E ; ID_Continue # Lo CYRILLIC LETTER MULTIOCULAR O A66F ; ID_Continue # Mn COMBINING CYRILLIC VZMET -A67C..A67D ; ID_Continue # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A674..A67D ; ID_Continue # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK A67F ; ID_Continue # Lm CYRILLIC PAYEROK A680..A697 ; ID_Continue # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE +A69F ; ID_Continue # Mn COMBINING CYRILLIC LETTER IOTIFIED E A6A0..A6E5 ; ID_Continue # Lo [70] BAMUM LETTER A..BAMUM LETTER KI A6E6..A6EF ; ID_Continue # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM A6F0..A6F1 ; ID_Continue # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS @@ -6267,8 +6508,9 @@ A770 ; ID_Continue # Lm MODIFIER LETTER US A771..A787 ; ID_Continue # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A788 ; ID_Continue # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; ID_Continue # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A791 ; ID_Continue # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER -A7A0..A7A9 ; ID_Continue # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE +A790..A793 ; ID_Continue # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR +A7A0..A7AA ; ID_Continue # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK +A7F8..A7F9 ; ID_Continue # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; ID_Continue # L& LATIN LETTER SMALL CAPITAL TURNED M A7FB..A801 ; ID_Continue # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I A802 ; ID_Continue # Mn SYLOTI NAGRI SIGN DVISVARA @@ -6337,6 +6579,14 @@ AAC1 ; ID_Continue # Mn TAI VIET TONE MAI THO AAC2 ; ID_Continue # Lo TAI VIET TONE MAI SONG AADB..AADC ; ID_Continue # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG AADD ; ID_Continue # Lm TAI VIET SYMBOL SAM +AAE0..AAEA ; ID_Continue # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAEB ; ID_Continue # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; ID_Continue # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; ID_Continue # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF2 ; ID_Continue # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; ID_Continue # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF5 ; ID_Continue # Mc MEETEI MAYEK VOWEL SIGN VISARGA +AAF6 ; ID_Continue # Mn MEETEI MAYEK VIRAMA AB01..AB06 ; ID_Continue # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO AB09..AB0E ; ID_Continue # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO AB11..AB16 ; ID_Continue # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO @@ -6354,8 +6604,7 @@ ABF0..ABF9 ; ID_Continue # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIG AC00..D7A3 ; ID_Continue # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH D7B0..D7C6 ; ID_Continue # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E D7CB..D7FB ; ID_Continue # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH -F900..FA2D ; ID_Continue # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; ID_Continue # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +F900..FA6D ; ID_Continue # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; ID_Continue # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB00..FB06 ; ID_Continue # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; ID_Continue # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -6422,6 +6671,8 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 1083F..10855 ; ID_Continue # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW 10900..10915 ; ID_Continue # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; ID_Continue # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10980..109B7 ; ID_Continue # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; ID_Continue # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; ID_Continue # Lo KHAROSHTHI LETTER A 10A01..10A03 ; ID_Continue # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R 10A05..10A06 ; ID_Continue # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O @@ -6449,10 +6700,40 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 110B3..110B6 ; ID_Continue # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B7..110B8 ; ID_Continue # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU 110B9..110BA ; ID_Continue # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +110D0..110E8 ; ID_Continue # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +110F0..110F9 ; ID_Continue # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11100..11102 ; ID_Continue # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11103..11126 ; ID_Continue # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11127..1112B ; ID_Continue # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; ID_Continue # Mc CHAKMA VOWEL SIGN E +1112D..11134 ; ID_Continue # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11136..1113F ; ID_Continue # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +11180..11181 ; ID_Continue # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; ID_Continue # Mc SHARADA SIGN VISARGA +11183..111B2 ; ID_Continue # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111B3..111B5 ; ID_Continue # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; ID_Continue # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF..111C0 ; ID_Continue # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +111C1..111C4 ; ID_Continue # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111D0..111D9 ; ID_Continue # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +11680..116AA ; ID_Continue # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116AB ; ID_Continue # Mn TAKRI SIGN ANUSVARA +116AC ; ID_Continue # Mc TAKRI SIGN VISARGA +116AD ; ID_Continue # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; ID_Continue # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; ID_Continue # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B6 ; ID_Continue # Mc TAKRI SIGN VIRAMA +116B7 ; ID_Continue # Mn TAKRI SIGN NUKTA +116C0..116C9 ; ID_Continue # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 12000..1236E ; ID_Continue # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM 12400..12462 ; ID_Continue # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER 13000..1342E ; ID_Continue # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 16800..16A38 ; ID_Continue # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16F00..16F44 ; ID_Continue # Lo [69] MIAO LETTER PA..MIAO LETTER HHA +16F50 ; ID_Continue # Lo MIAO LETTER NASALIZATION +16F51..16F7E ; ID_Continue # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG +16F8F..16F92 ; ID_Continue # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16F93..16F9F ; ID_Continue # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 1B000..1B001 ; ID_Continue # Lo [2] KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE 1D165..1D166 ; ID_Continue # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM 1D167..1D169 ; ID_Continue # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 @@ -6492,13 +6773,46 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 1D7AA..1D7C2 ; ID_Continue # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA 1D7C4..1D7CB ; ID_Continue # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1D7CE..1D7FF ; ID_Continue # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1EE00..1EE03 ; ID_Continue # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; ID_Continue # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; ID_Continue # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; ID_Continue # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; ID_Continue # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; ID_Continue # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; ID_Continue # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; ID_Continue # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; ID_Continue # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; ID_Continue # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; ID_Continue # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; ID_Continue # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; ID_Continue # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; ID_Continue # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; ID_Continue # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; ID_Continue # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; ID_Continue # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; ID_Continue # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; ID_Continue # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; ID_Continue # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; ID_Continue # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; ID_Continue # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; ID_Continue # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 20000..2A6D6 ; ID_Continue # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 2A700..2B734 ; ID_Continue # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 2B740..2B81D ; ID_Continue # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2F800..2FA1D ; ID_Continue # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 102675 +# Total code points: 103355 # ================================================ @@ -6511,9 +6825,9 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR 0041..005A ; XID_Start # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z 0061..007A ; XID_Start # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z -00AA ; XID_Start # L& FEMININE ORDINAL INDICATOR +00AA ; XID_Start # Lo FEMININE ORDINAL INDICATOR 00B5 ; XID_Start # L& MICRO SIGN -00BA ; XID_Start # L& MASCULINE ORDINAL INDICATOR +00BA ; XID_Start # Lo MASCULINE ORDINAL INDICATOR 00C0..00D6 ; XID_Start # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS 00D8..00F6 ; XID_Start # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS 00F8..01BA ; XID_Start # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL @@ -6566,6 +6880,8 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR 0824 ; XID_Start # Lm SAMARITAN MODIFIER LETTER SHORT A 0828 ; XID_Start # Lm SAMARITAN MODIFIER LETTER I 0840..0858 ; XID_Start # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +08A0 ; XID_Start # Lo ARABIC LETTER BEH WITH SMALL V BELOW +08A2..08AC ; XID_Start # Lo [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH 0904..0939 ; XID_Start # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA 093D ; XID_Start # Lo DEVANAGARI SIGN AVAGRAHA 0950 ; XID_Start # Lo DEVANAGARI OM @@ -6673,7 +6989,7 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR 0EBD ; XID_Start # Lo LAO SEMIVOWEL SIGN NYO 0EC0..0EC4 ; XID_Start # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI 0EC6 ; XID_Start # Lm LAO KO LA -0EDC..0EDD ; XID_Start # Lo [2] LAO HO NO..LAO HO MO +0EDC..0EDF ; XID_Start # Lo [4] LAO HO NO..LAO LETTER KHMU NYO 0F00 ; XID_Start # Lo TIBETAN SYLLABLE OM 0F40..0F47 ; XID_Start # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA 0F49..0F6C ; XID_Start # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA @@ -6688,9 +7004,11 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR 1075..1081 ; XID_Start # Lo [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA 108E ; XID_Start # Lo MYANMAR LETTER RUMAI PALAUNG FA 10A0..10C5 ; XID_Start # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; XID_Start # L& GEORGIAN CAPITAL LETTER YN +10CD ; XID_Start # L& GEORGIAN CAPITAL LETTER AEN 10D0..10FA ; XID_Start # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN 10FC ; XID_Start # Lm MODIFIER LETTER GEORGIAN NAR -1100..1248 ; XID_Start # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +10FD..1248 ; XID_Start # Lo [332] GEORGIAN LETTER AEN..ETHIOPIC SYLLABLE QWA 124A..124D ; XID_Start # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE 1250..1256 ; XID_Start # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO 1258 ; XID_Start # Lo ETHIOPIC SYLLABLE QHWA @@ -6740,16 +7058,17 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR 1B45..1B4B ; XID_Start # Lo [7] BALINESE LETTER KAF SASAK..BALINESE LETTER ASYURA SASAK 1B83..1BA0 ; XID_Start # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA 1BAE..1BAF ; XID_Start # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA -1BC0..1BE5 ; XID_Start # Lo [38] BATAK LETTER A..BATAK LETTER U +1BBA..1BE5 ; XID_Start # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U 1C00..1C23 ; XID_Start # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A 1C4D..1C4F ; XID_Start # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA 1C5A..1C77 ; XID_Start # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; XID_Start # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD 1CE9..1CEC ; XID_Start # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL 1CEE..1CF1 ; XID_Start # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA +1CF5..1CF6 ; XID_Start # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 1D00..1D2B ; XID_Start # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D61 ; XID_Start # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D77 ; XID_Start # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D2C..1D6A ; XID_Start # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; XID_Start # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; XID_Start # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; XID_Start # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; XID_Start # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA @@ -6797,12 +7116,15 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR 2185..2188 ; XID_Start # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND 2C00..2C2E ; XID_Start # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 2C30..2C5E ; XID_Start # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE -2C60..2C7C ; XID_Start # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; XID_Start # Lm MODIFIER LETTER CAPITAL V +2C60..2C7B ; XID_Start # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; XID_Start # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; XID_Start # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI 2CEB..2CEE ; XID_Start # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; XID_Start # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; XID_Start # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE -2D30..2D65 ; XID_Start # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D27 ; XID_Start # L& GEORGIAN SMALL LETTER YN +2D2D ; XID_Start # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; XID_Start # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO 2D6F ; XID_Start # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D80..2D96 ; XID_Start # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE 2DA0..2DA6 ; XID_Start # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO @@ -6832,7 +7154,7 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR 31A0..31BA ; XID_Start # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY 31F0..31FF ; XID_Start # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3400..4DB5 ; XID_Start # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 -4E00..9FCB ; XID_Start # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB +4E00..9FCC ; XID_Start # Lo [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC A000..A014 ; XID_Start # Lo [21] YI SYLLABLE IT..YI SYLLABLE E A015 ; XID_Start # Lm YI SYLLABLE WU A016..A48C ; XID_Start # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR @@ -6854,8 +7176,9 @@ A770 ; XID_Start # Lm MODIFIER LETTER US A771..A787 ; XID_Start # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A788 ; XID_Start # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; XID_Start # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A791 ; XID_Start # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER -A7A0..A7A9 ; XID_Start # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE +A790..A793 ; XID_Start # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR +A7A0..A7AA ; XID_Start # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK +A7F8..A7F9 ; XID_Start # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; XID_Start # L& LATIN LETTER SMALL CAPITAL TURNED M A7FB..A801 ; XID_Start # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I A803..A805 ; XID_Start # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O @@ -6885,6 +7208,9 @@ AAC0 ; XID_Start # Lo TAI VIET TONE MAI NUENG AAC2 ; XID_Start # Lo TAI VIET TONE MAI SONG AADB..AADC ; XID_Start # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG AADD ; XID_Start # Lm TAI VIET SYMBOL SAM +AAE0..AAEA ; XID_Start # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAF2 ; XID_Start # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; XID_Start # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK AB01..AB06 ; XID_Start # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO AB09..AB0E ; XID_Start # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO AB11..AB16 ; XID_Start # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO @@ -6894,8 +7220,7 @@ ABC0..ABE2 ; XID_Start # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTE AC00..D7A3 ; XID_Start # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH D7B0..D7C6 ; XID_Start # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E D7CB..D7FB ; XID_Start # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH -F900..FA2D ; XID_Start # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; XID_Start # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +F900..FA6D ; XID_Start # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; XID_Start # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB00..FB06 ; XID_Start # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; XID_Start # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -6958,6 +7283,8 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 1083F..10855 ; XID_Start # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW 10900..10915 ; XID_Start # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; XID_Start # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10980..109B7 ; XID_Start # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; XID_Start # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; XID_Start # Lo KHAROSHTHI LETTER A 10A10..10A13 ; XID_Start # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA 10A15..10A17 ; XID_Start # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA @@ -6969,10 +7296,18 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 10C00..10C48 ; XID_Start # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH 11003..11037 ; XID_Start # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA 11083..110AF ; XID_Start # Lo [45] KAITHI LETTER A..KAITHI LETTER HA +110D0..110E8 ; XID_Start # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +11103..11126 ; XID_Start # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11183..111B2 ; XID_Start # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111C1..111C4 ; XID_Start # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +11680..116AA ; XID_Start # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA 12000..1236E ; XID_Start # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM 12400..12462 ; XID_Start # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER 13000..1342E ; XID_Start # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 16800..16A38 ; XID_Start # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16F00..16F44 ; XID_Start # Lo [69] MIAO LETTER PA..MIAO LETTER HHA +16F50 ; XID_Start # Lo MIAO LETTER NASALIZATION +16F93..16F9F ; XID_Start # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 1B000..1B001 ; XID_Start # Lo [2] KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE 1D400..1D454 ; XID_Start # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G 1D456..1D49C ; XID_Start # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A @@ -7004,19 +7339,51 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 1D78A..1D7A8 ; XID_Start # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA 1D7AA..1D7C2 ; XID_Start # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA 1D7C4..1D7CB ; XID_Start # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1EE00..1EE03 ; XID_Start # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; XID_Start # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; XID_Start # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; XID_Start # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; XID_Start # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; XID_Start # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; XID_Start # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; XID_Start # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; XID_Start # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; XID_Start # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; XID_Start # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; XID_Start # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; XID_Start # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; XID_Start # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; XID_Start # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; XID_Start # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; XID_Start # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; XID_Start # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; XID_Start # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; XID_Start # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; XID_Start # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; XID_Start # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; XID_Start # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; XID_Start # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; XID_Start # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; XID_Start # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; XID_Start # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; XID_Start # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; XID_Start # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; XID_Start # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; XID_Start # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; XID_Start # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; XID_Start # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 20000..2A6D6 ; XID_Start # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 2A700..2B734 ; XID_Start # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 2B740..2B81D ; XID_Start # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2F800..2FA1D ; XID_Start # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 100724 +# Total code points: 101217 # ================================================ # Derived Property: XID_Continue # Mod_ID_Continue modified for closure under NFKx # Modified as described in UAX #15 -# NOTE: Cf characters should be filtered out. # NOTE: Does NOT remove the non-NFKx characters. # Merely ensures that if isIdentifer(string) then isIdentifier(NFKx(string)) # NOTE: See UAX #31 for more information @@ -7025,10 +7392,10 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 0041..005A ; XID_Continue # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z 005F ; XID_Continue # Pc LOW LINE 0061..007A ; XID_Continue # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z -00AA ; XID_Continue # L& FEMININE ORDINAL INDICATOR +00AA ; XID_Continue # Lo FEMININE ORDINAL INDICATOR 00B5 ; XID_Continue # L& MICRO SIGN 00B7 ; XID_Continue # Po MIDDLE DOT -00BA ; XID_Continue # L& MASCULINE ORDINAL INDICATOR +00BA ; XID_Continue # Lo MASCULINE ORDINAL INDICATOR 00C0..00D6 ; XID_Continue # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS 00D8..00F6 ; XID_Continue # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS 00F8..01BA ; XID_Continue # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL @@ -7108,6 +7475,9 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 0829..082D ; XID_Continue # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0840..0858 ; XID_Continue # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 0859..085B ; XID_Continue # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +08A0 ; XID_Continue # Lo ARABIC LETTER BEH WITH SMALL V BELOW +08A2..08AC ; XID_Continue # Lo [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH +08E4..08FE ; XID_Continue # Mn [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT 0900..0902 ; XID_Continue # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 0903 ; XID_Continue # Mc DEVANAGARI SIGN VISARGA 0904..0939 ; XID_Continue # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA @@ -7329,7 +7699,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 0EC6 ; XID_Continue # Lm LAO KO LA 0EC8..0ECD ; XID_Continue # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA 0ED0..0ED9 ; XID_Continue # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE -0EDC..0EDD ; XID_Continue # Lo [2] LAO HO NO..LAO HO MO +0EDC..0EDF ; XID_Continue # Lo [4] LAO HO NO..LAO LETTER KHMU NYO 0F00 ; XID_Continue # Lo TIBETAN SYLLABLE OM 0F18..0F19 ; XID_Continue # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS 0F20..0F29 ; XID_Continue # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE @@ -7381,9 +7751,11 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 109A..109C ; XID_Continue # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A 109D ; XID_Continue # Mn MYANMAR VOWEL SIGN AITON AI 10A0..10C5 ; XID_Continue # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; XID_Continue # L& GEORGIAN CAPITAL LETTER YN +10CD ; XID_Continue # L& GEORGIAN CAPITAL LETTER AEN 10D0..10FA ; XID_Continue # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN 10FC ; XID_Continue # Lm MODIFIER LETTER GEORGIAN NAR -1100..1248 ; XID_Continue # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +10FD..1248 ; XID_Continue # Lo [332] GEORGIAN LETTER AEN..ETHIOPIC SYLLABLE QWA 124A..124D ; XID_Continue # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE 1250..1256 ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO 1258 ; XID_Continue # Lo ETHIOPIC SYLLABLE QHWA @@ -7419,6 +7791,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 176E..1770 ; XID_Continue # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA 1772..1773 ; XID_Continue # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U 1780..17B3 ; XID_Continue # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU +17B4..17B5 ; XID_Continue # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 17B6 ; XID_Continue # Mc KHMER VOWEL SIGN AA 17B7..17BD ; XID_Continue # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA 17BE..17C5 ; XID_Continue # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU @@ -7497,9 +7870,11 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 1BA6..1BA7 ; XID_Continue # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG 1BA8..1BA9 ; XID_Continue # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG 1BAA ; XID_Continue # Mc SUNDANESE SIGN PAMAAEH +1BAB ; XID_Continue # Mn SUNDANESE SIGN VIRAMA +1BAC..1BAD ; XID_Continue # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA 1BAE..1BAF ; XID_Continue # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA 1BB0..1BB9 ; XID_Continue # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE -1BC0..1BE5 ; XID_Continue # Lo [38] BATAK LETTER A..BATAK LETTER U +1BBA..1BE5 ; XID_Continue # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U 1BE6 ; XID_Continue # Mn BATAK SIGN TOMPI 1BE7 ; XID_Continue # Mc BATAK VOWEL SIGN E 1BE8..1BE9 ; XID_Continue # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE @@ -7525,10 +7900,12 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 1CE9..1CEC ; XID_Continue # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL 1CED ; XID_Continue # Mn VEDIC SIGN TIRYAK 1CEE..1CF1 ; XID_Continue # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA -1CF2 ; XID_Continue # Mc VEDIC SIGN ARDHAVISARGA +1CF2..1CF3 ; XID_Continue # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF4 ; XID_Continue # Mn VEDIC TONE CANDRA ABOVE +1CF5..1CF6 ; XID_Continue # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 1D00..1D2B ; XID_Continue # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D61 ; XID_Continue # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D77 ; XID_Continue # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D2C..1D6A ; XID_Continue # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; XID_Continue # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; XID_Continue # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; XID_Continue # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; XID_Continue # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA @@ -7583,13 +7960,16 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 2185..2188 ; XID_Continue # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND 2C00..2C2E ; XID_Continue # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 2C30..2C5E ; XID_Continue # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE -2C60..2C7C ; XID_Continue # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; XID_Continue # Lm MODIFIER LETTER CAPITAL V +2C60..2C7B ; XID_Continue # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; XID_Continue # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; XID_Continue # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI 2CEB..2CEE ; XID_Continue # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA 2CEF..2CF1 ; XID_Continue # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2CF2..2CF3 ; XID_Continue # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; XID_Continue # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE -2D30..2D65 ; XID_Continue # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D27 ; XID_Continue # L& GEORGIAN SMALL LETTER YN +2D2D ; XID_Continue # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; XID_Continue # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO 2D6F ; XID_Continue # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D7F ; XID_Continue # Mn TIFINAGH CONSONANT JOINER 2D80..2D96 ; XID_Continue # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE @@ -7606,7 +7986,8 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 3006 ; XID_Continue # Lo IDEOGRAPHIC CLOSING MARK 3007 ; XID_Continue # Nl IDEOGRAPHIC NUMBER ZERO 3021..3029 ; XID_Continue # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE -302A..302F ; XID_Continue # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK +302A..302D ; XID_Continue # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; XID_Continue # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK 3031..3035 ; XID_Continue # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF 3038..303A ; XID_Continue # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY 303B ; XID_Continue # Lm VERTICAL IDEOGRAPHIC ITERATION MARK @@ -7623,7 +8004,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 31A0..31BA ; XID_Continue # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY 31F0..31FF ; XID_Continue # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3400..4DB5 ; XID_Continue # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 -4E00..9FCB ; XID_Continue # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB +4E00..9FCC ; XID_Continue # Lo [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC A000..A014 ; XID_Continue # Lo [21] YI SYLLABLE IT..YI SYLLABLE E A015 ; XID_Continue # Lm YI SYLLABLE WU A016..A48C ; XID_Continue # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR @@ -7637,9 +8018,10 @@ A62A..A62B ; XID_Continue # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOL A640..A66D ; XID_Continue # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O A66E ; XID_Continue # Lo CYRILLIC LETTER MULTIOCULAR O A66F ; XID_Continue # Mn COMBINING CYRILLIC VZMET -A67C..A67D ; XID_Continue # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A674..A67D ; XID_Continue # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK A67F ; XID_Continue # Lm CYRILLIC PAYEROK A680..A697 ; XID_Continue # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE +A69F ; XID_Continue # Mn COMBINING CYRILLIC LETTER IOTIFIED E A6A0..A6E5 ; XID_Continue # Lo [70] BAMUM LETTER A..BAMUM LETTER KI A6E6..A6EF ; XID_Continue # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM A6F0..A6F1 ; XID_Continue # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS @@ -7649,8 +8031,9 @@ A770 ; XID_Continue # Lm MODIFIER LETTER US A771..A787 ; XID_Continue # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A788 ; XID_Continue # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; XID_Continue # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A791 ; XID_Continue # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER -A7A0..A7A9 ; XID_Continue # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE +A790..A793 ; XID_Continue # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR +A7A0..A7AA ; XID_Continue # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK +A7F8..A7F9 ; XID_Continue # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; XID_Continue # L& LATIN LETTER SMALL CAPITAL TURNED M A7FB..A801 ; XID_Continue # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I A802 ; XID_Continue # Mn SYLOTI NAGRI SIGN DVISVARA @@ -7719,6 +8102,14 @@ AAC1 ; XID_Continue # Mn TAI VIET TONE MAI THO AAC2 ; XID_Continue # Lo TAI VIET TONE MAI SONG AADB..AADC ; XID_Continue # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG AADD ; XID_Continue # Lm TAI VIET SYMBOL SAM +AAE0..AAEA ; XID_Continue # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAEB ; XID_Continue # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; XID_Continue # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; XID_Continue # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF2 ; XID_Continue # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; XID_Continue # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF5 ; XID_Continue # Mc MEETEI MAYEK VOWEL SIGN VISARGA +AAF6 ; XID_Continue # Mn MEETEI MAYEK VIRAMA AB01..AB06 ; XID_Continue # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO AB09..AB0E ; XID_Continue # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO AB11..AB16 ; XID_Continue # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO @@ -7736,8 +8127,7 @@ ABF0..ABF9 ; XID_Continue # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI AC00..D7A3 ; XID_Continue # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH D7B0..D7C6 ; XID_Continue # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E D7CB..D7FB ; XID_Continue # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH -F900..FA2D ; XID_Continue # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; XID_Continue # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +F900..FA6D ; XID_Continue # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; XID_Continue # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB00..FB06 ; XID_Continue # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; XID_Continue # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -7810,6 +8200,8 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 1083F..10855 ; XID_Continue # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW 10900..10915 ; XID_Continue # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; XID_Continue # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10980..109B7 ; XID_Continue # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; XID_Continue # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; XID_Continue # Lo KHAROSHTHI LETTER A 10A01..10A03 ; XID_Continue # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R 10A05..10A06 ; XID_Continue # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O @@ -7837,10 +8229,40 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 110B3..110B6 ; XID_Continue # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B7..110B8 ; XID_Continue # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU 110B9..110BA ; XID_Continue # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +110D0..110E8 ; XID_Continue # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +110F0..110F9 ; XID_Continue # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11100..11102 ; XID_Continue # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11103..11126 ; XID_Continue # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11127..1112B ; XID_Continue # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; XID_Continue # Mc CHAKMA VOWEL SIGN E +1112D..11134 ; XID_Continue # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11136..1113F ; XID_Continue # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +11180..11181 ; XID_Continue # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; XID_Continue # Mc SHARADA SIGN VISARGA +11183..111B2 ; XID_Continue # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111B3..111B5 ; XID_Continue # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; XID_Continue # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF..111C0 ; XID_Continue # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +111C1..111C4 ; XID_Continue # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111D0..111D9 ; XID_Continue # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +11680..116AA ; XID_Continue # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116AB ; XID_Continue # Mn TAKRI SIGN ANUSVARA +116AC ; XID_Continue # Mc TAKRI SIGN VISARGA +116AD ; XID_Continue # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; XID_Continue # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; XID_Continue # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B6 ; XID_Continue # Mc TAKRI SIGN VIRAMA +116B7 ; XID_Continue # Mn TAKRI SIGN NUKTA +116C0..116C9 ; XID_Continue # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 12000..1236E ; XID_Continue # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM 12400..12462 ; XID_Continue # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER 13000..1342E ; XID_Continue # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 16800..16A38 ; XID_Continue # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16F00..16F44 ; XID_Continue # Lo [69] MIAO LETTER PA..MIAO LETTER HHA +16F50 ; XID_Continue # Lo MIAO LETTER NASALIZATION +16F51..16F7E ; XID_Continue # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG +16F8F..16F92 ; XID_Continue # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16F93..16F9F ; XID_Continue # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 1B000..1B001 ; XID_Continue # Lo [2] KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE 1D165..1D166 ; XID_Continue # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM 1D167..1D169 ; XID_Continue # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 @@ -7880,13 +8302,46 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 1D7AA..1D7C2 ; XID_Continue # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA 1D7C4..1D7CB ; XID_Continue # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1D7CE..1D7FF ; XID_Continue # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1EE00..1EE03 ; XID_Continue # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; XID_Continue # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; XID_Continue # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; XID_Continue # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; XID_Continue # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; XID_Continue # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; XID_Continue # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; XID_Continue # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; XID_Continue # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; XID_Continue # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; XID_Continue # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; XID_Continue # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; XID_Continue # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; XID_Continue # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; XID_Continue # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; XID_Continue # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; XID_Continue # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; XID_Continue # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; XID_Continue # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; XID_Continue # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; XID_Continue # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; XID_Continue # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; XID_Continue # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 20000..2A6D6 ; XID_Continue # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 2A700..2B734 ; XID_Continue # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 2B740..2B81D ; XID_Continue # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2F800..2FA1D ; XID_Continue # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 102656 +# Total code points: 103336 # ================================================ @@ -7897,12 +8352,12 @@ E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTO # + Variation_Selector # - White_Space # - FFF9..FFFB (Annotation Characters) -# - 0600..0603, 06DD, 070F, 110BD (exceptional Cf characters that should be visible) +# - 0600..0604, 06DD, 070F, 110BD (exceptional Cf characters that should be visible) 00AD ; Default_Ignorable_Code_Point # Cf SOFT HYPHEN 034F ; Default_Ignorable_Code_Point # Mn COMBINING GRAPHEME JOINER 115F..1160 ; Default_Ignorable_Code_Point # Lo [2] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG FILLER -17B4..17B5 ; Default_Ignorable_Code_Point # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B4..17B5 ; Default_Ignorable_Code_Point # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 180B..180D ; Default_Ignorable_Code_Point # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE 200B..200F ; Default_Ignorable_Code_Point # Cf [5] ZERO WIDTH SPACE..RIGHT-TO-LEFT MARK 202A..202E ; Default_Ignorable_Code_Point # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE @@ -7956,6 +8411,7 @@ E01F0..E0FFF ; Default_Ignorable_Code_Point # Cn [3600] <reserved-E01F0>..<rese 0825..0827 ; Grapheme_Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; Grapheme_Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; Grapheme_Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +08E4..08FE ; Grapheme_Extend # Mn [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT 0900..0902 ; Grapheme_Extend # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 093A ; Grapheme_Extend # Mn DEVANAGARI VOWEL SIGN OE 093C ; Grapheme_Extend # Mn DEVANAGARI SIGN NUKTA @@ -8053,6 +8509,7 @@ E01F0..E0FFF ; Default_Ignorable_Code_Point # Cn [3600] <reserved-E01F0>..<rese 1732..1734 ; Grapheme_Extend # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD 1752..1753 ; Grapheme_Extend # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U 1772..1773 ; Grapheme_Extend # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +17B4..17B5 ; Grapheme_Extend # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 17B7..17BD ; Grapheme_Extend # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA 17C6 ; Grapheme_Extend # Mn KHMER SIGN NIKAHIT 17C9..17D3 ; Grapheme_Extend # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT @@ -8080,6 +8537,7 @@ E01F0..E0FFF ; Default_Ignorable_Code_Point # Cn [3600] <reserved-E01F0>..<rese 1B80..1B81 ; Grapheme_Extend # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR 1BA2..1BA5 ; Grapheme_Extend # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU 1BA8..1BA9 ; Grapheme_Extend # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAB ; Grapheme_Extend # Mn SUNDANESE SIGN VIRAMA 1BE6 ; Grapheme_Extend # Mn BATAK SIGN TOMPI 1BE8..1BE9 ; Grapheme_Extend # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE 1BED ; Grapheme_Extend # Mn BATAK VOWEL SIGN KARO O @@ -8090,6 +8548,7 @@ E01F0..E0FFF ; Default_Ignorable_Code_Point # Cn [3600] <reserved-E01F0>..<rese 1CD4..1CE0 ; Grapheme_Extend # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA 1CE2..1CE8 ; Grapheme_Extend # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL 1CED ; Grapheme_Extend # Mn VEDIC SIGN TIRYAK +1CF4 ; Grapheme_Extend # Mn VEDIC TONE CANDRA ABOVE 1DC0..1DE6 ; Grapheme_Extend # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z 1DFC..1DFF ; Grapheme_Extend # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW 200C..200D ; Grapheme_Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER @@ -8101,11 +8560,13 @@ E01F0..E0FFF ; Default_Ignorable_Code_Point # Cn [3600] <reserved-E01F0>..<rese 2CEF..2CF1 ; Grapheme_Extend # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS 2D7F ; Grapheme_Extend # Mn TIFINAGH CONSONANT JOINER 2DE0..2DFF ; Grapheme_Extend # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS -302A..302F ; Grapheme_Extend # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK +302A..302D ; Grapheme_Extend # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; Grapheme_Extend # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK 3099..309A ; Grapheme_Extend # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK A66F ; Grapheme_Extend # Mn COMBINING CYRILLIC VZMET A670..A672 ; Grapheme_Extend # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN -A67C..A67D ; Grapheme_Extend # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A674..A67D ; Grapheme_Extend # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A69F ; Grapheme_Extend # Mn COMBINING CYRILLIC LETTER IOTIFIED E A6F0..A6F1 ; Grapheme_Extend # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS A802 ; Grapheme_Extend # Mn SYLOTI NAGRI SIGN DVISVARA A806 ; Grapheme_Extend # Mn SYLOTI NAGRI SIGN HASANTA @@ -8129,6 +8590,8 @@ AAB2..AAB4 ; Grapheme_Extend # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U AAB7..AAB8 ; Grapheme_Extend # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA AABE..AABF ; Grapheme_Extend # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK AAC1 ; Grapheme_Extend # Mn TAI VIET TONE MAI THO +AAEC..AAED ; Grapheme_Extend # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAF6 ; Grapheme_Extend # Mn MEETEI MAYEK VIRAMA ABE5 ; Grapheme_Extend # Mn MEETEI MAYEK VOWEL SIGN ANAP ABE8 ; Grapheme_Extend # Mn MEETEI MAYEK VOWEL SIGN UNAP ABED ; Grapheme_Extend # Mn MEETEI MAYEK APUN IYEK @@ -8147,6 +8610,16 @@ FF9E..FF9F ; Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK. 11080..11081 ; Grapheme_Extend # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA 110B3..110B6 ; Grapheme_Extend # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B9..110BA ; Grapheme_Extend # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +11100..11102 ; Grapheme_Extend # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; Grapheme_Extend # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112D..11134 ; Grapheme_Extend # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11180..11181 ; Grapheme_Extend # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +111B6..111BE ; Grapheme_Extend # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +116AB ; Grapheme_Extend # Mn TAKRI SIGN ANUSVARA +116AD ; Grapheme_Extend # Mn TAKRI VOWEL SIGN AA +116B0..116B5 ; Grapheme_Extend # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B7 ; Grapheme_Extend # Mn TAKRI SIGN NUKTA +16F8F..16F92 ; Grapheme_Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 1D165 ; Grapheme_Extend # Mc MUSICAL SYMBOL COMBINING STEM 1D167..1D169 ; Grapheme_Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 1D16E..1D172 ; Grapheme_Extend # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5 @@ -8156,7 +8629,7 @@ FF9E..FF9F ; Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK. 1D242..1D244 ; Grapheme_Extend # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1234 +# Total code points: 1317 # ================================================ @@ -8195,10 +8668,11 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 00A0 ; Grapheme_Base # Zs NO-BREAK SPACE 00A1 ; Grapheme_Base # Po INVERTED EXCLAMATION MARK 00A2..00A5 ; Grapheme_Base # Sc [4] CENT SIGN..YEN SIGN -00A6..00A7 ; Grapheme_Base # So [2] BROKEN BAR..SECTION SIGN +00A6 ; Grapheme_Base # So BROKEN BAR +00A7 ; Grapheme_Base # Po SECTION SIGN 00A8 ; Grapheme_Base # Sk DIAERESIS 00A9 ; Grapheme_Base # So COPYRIGHT SIGN -00AA ; Grapheme_Base # L& FEMININE ORDINAL INDICATOR +00AA ; Grapheme_Base # Lo FEMININE ORDINAL INDICATOR 00AB ; Grapheme_Base # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 00AC ; Grapheme_Base # Sm NOT SIGN 00AE ; Grapheme_Base # So REGISTERED SIGN @@ -8208,11 +8682,10 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 00B2..00B3 ; Grapheme_Base # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE 00B4 ; Grapheme_Base # Sk ACUTE ACCENT 00B5 ; Grapheme_Base # L& MICRO SIGN -00B6 ; Grapheme_Base # So PILCROW SIGN -00B7 ; Grapheme_Base # Po MIDDLE DOT +00B6..00B7 ; Grapheme_Base # Po [2] PILCROW SIGN..MIDDLE DOT 00B8 ; Grapheme_Base # Sk CEDILLA 00B9 ; Grapheme_Base # No SUPERSCRIPT ONE -00BA ; Grapheme_Base # L& MASCULINE ORDINAL INDICATOR +00BA ; Grapheme_Base # Lo MASCULINE ORDINAL INDICATOR 00BB ; Grapheme_Base # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 00BC..00BE ; Grapheme_Base # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS 00BF ; Grapheme_Base # Po INVERTED QUESTION MARK @@ -8261,6 +8734,7 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 0561..0587 ; Grapheme_Base # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN 0589 ; Grapheme_Base # Po ARMENIAN FULL STOP 058A ; Grapheme_Base # Pd ARMENIAN HYPHEN +058F ; Grapheme_Base # Sc ARMENIAN DRAM SIGN 05BE ; Grapheme_Base # Pd HEBREW PUNCTUATION MAQAF 05C0 ; Grapheme_Base # Po HEBREW PUNCTUATION PASEQ 05C3 ; Grapheme_Base # Po HEBREW PUNCTUATION SOF PASUQ @@ -8310,6 +8784,8 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 0830..083E ; Grapheme_Base # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU 0840..0858 ; Grapheme_Base # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 085E ; Grapheme_Base # Po MANDAIC PUNCTUATION +08A0 ; Grapheme_Base # Lo ARABIC LETTER BEH WITH SMALL V BELOW +08A2..08AC ; Grapheme_Base # Lo [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH 0903 ; Grapheme_Base # Mc DEVANAGARI SIGN VISARGA 0904..0939 ; Grapheme_Base # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA 093B ; Grapheme_Base # Mc DEVANAGARI VOWEL SIGN OOE @@ -8372,6 +8848,7 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 0AD0 ; Grapheme_Base # Lo GUJARATI OM 0AE0..0AE1 ; Grapheme_Base # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL 0AE6..0AEF ; Grapheme_Base # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE +0AF0 ; Grapheme_Base # Po GUJARATI ABBREVIATION SIGN 0AF1 ; Grapheme_Base # Sc GUJARATI RUPEE SIGN 0B02..0B03 ; Grapheme_Base # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA 0B05..0B0C ; Grapheme_Base # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L @@ -8488,11 +8965,13 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 0EC0..0EC4 ; Grapheme_Base # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI 0EC6 ; Grapheme_Base # Lm LAO KO LA 0ED0..0ED9 ; Grapheme_Base # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE -0EDC..0EDD ; Grapheme_Base # Lo [2] LAO HO NO..LAO HO MO +0EDC..0EDF ; Grapheme_Base # Lo [4] LAO HO NO..LAO LETTER KHMU NYO 0F00 ; Grapheme_Base # Lo TIBETAN SYLLABLE OM 0F01..0F03 ; Grapheme_Base # So [3] TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA 0F04..0F12 ; Grapheme_Base # Po [15] TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK RGYA GRAM SHAD -0F13..0F17 ; Grapheme_Base # So [5] TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS +0F13 ; Grapheme_Base # So TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN +0F14 ; Grapheme_Base # Po TIBETAN MARK GTER TSHEG +0F15..0F17 ; Grapheme_Base # So [3] TIBETAN LOGOTYPE SIGN CHAD RTAGS..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS 0F1A..0F1F ; Grapheme_Base # So [6] TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RDEL DKAR RDEL NAG 0F20..0F29 ; Grapheme_Base # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE 0F2A..0F33 ; Grapheme_Base # No [10] TIBETAN DIGIT HALF ONE..TIBETAN DIGIT HALF ZERO @@ -8540,10 +9019,12 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 109A..109C ; Grapheme_Base # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A 109E..109F ; Grapheme_Base # So [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION 10A0..10C5 ; Grapheme_Base # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Grapheme_Base # L& GEORGIAN CAPITAL LETTER YN +10CD ; Grapheme_Base # L& GEORGIAN CAPITAL LETTER AEN 10D0..10FA ; Grapheme_Base # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN 10FB ; Grapheme_Base # Po GEORGIAN PARAGRAPH SEPARATOR 10FC ; Grapheme_Base # Lm MODIFIER LETTER GEORGIAN NAR -1100..1248 ; Grapheme_Base # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +10FD..1248 ; Grapheme_Base # Lo [332] GEORGIAN LETTER AEN..ETHIOPIC SYLLABLE QWA 124A..124D ; Grapheme_Base # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE 1250..1256 ; Grapheme_Base # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO 1258 ; Grapheme_Base # Lo ETHIOPIC SYLLABLE QHWA @@ -8559,8 +9040,7 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 12D8..1310 ; Grapheme_Base # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA 1312..1315 ; Grapheme_Base # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE 1318..135A ; Grapheme_Base # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA -1360 ; Grapheme_Base # So ETHIOPIC SECTION MARK -1361..1368 ; Grapheme_Base # Po [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR +1360..1368 ; Grapheme_Base # Po [9] ETHIOPIC SECTION MARK..ETHIOPIC PARAGRAPH SEPARATOR 1369..137C ; Grapheme_Base # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND 1380..138F ; Grapheme_Base # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE 1390..1399 ; Grapheme_Base # So [10] ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT @@ -8652,9 +9132,10 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 1BA1 ; Grapheme_Base # Mc SUNDANESE CONSONANT SIGN PAMINGKAL 1BA6..1BA7 ; Grapheme_Base # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG 1BAA ; Grapheme_Base # Mc SUNDANESE SIGN PAMAAEH +1BAC..1BAD ; Grapheme_Base # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA 1BAE..1BAF ; Grapheme_Base # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA 1BB0..1BB9 ; Grapheme_Base # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE -1BC0..1BE5 ; Grapheme_Base # Lo [38] BATAK LETTER A..BATAK LETTER U +1BBA..1BE5 ; Grapheme_Base # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U 1BE7 ; Grapheme_Base # Mc BATAK VOWEL SIGN E 1BEA..1BEC ; Grapheme_Base # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O 1BEE ; Grapheme_Base # Mc BATAK VOWEL SIGN U @@ -8670,14 +9151,16 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 1C5A..1C77 ; Grapheme_Base # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; Grapheme_Base # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD 1C7E..1C7F ; Grapheme_Base # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD +1CC0..1CC7 ; Grapheme_Base # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA 1CD3 ; Grapheme_Base # Po VEDIC SIGN NIHSHVASA 1CE1 ; Grapheme_Base # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA 1CE9..1CEC ; Grapheme_Base # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL 1CEE..1CF1 ; Grapheme_Base # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA -1CF2 ; Grapheme_Base # Mc VEDIC SIGN ARDHAVISARGA +1CF2..1CF3 ; Grapheme_Base # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF5..1CF6 ; Grapheme_Base # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 1D00..1D2B ; Grapheme_Base # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D61 ; Grapheme_Base # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D77 ; Grapheme_Base # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D2C..1D6A ; Grapheme_Base # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; Grapheme_Base # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; Grapheme_Base # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; Grapheme_Base # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; Grapheme_Base # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA @@ -8850,9 +9333,7 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 27C0..27C4 ; Grapheme_Base # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET 27C5 ; Grapheme_Base # Ps LEFT S-SHAPED BAG DELIMITER 27C6 ; Grapheme_Base # Pe RIGHT S-SHAPED BAG DELIMITER -27C7..27CA ; Grapheme_Base # Sm [4] OR WITH DOT INSIDE..VERTICAL BAR WITH HORIZONTAL STROKE -27CC ; Grapheme_Base # Sm LONG DIVISION -27CE..27E5 ; Grapheme_Base # Sm [24] SQUARED LOGICAL AND..WHITE SQUARE WITH RIGHTWARDS TICK +27C7..27E5 ; Grapheme_Base # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK 27E6 ; Grapheme_Base # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET 27E7 ; Grapheme_Base # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET 27E8 ; Grapheme_Base # Ps MATHEMATICAL LEFT ANGLE BRACKET @@ -8904,16 +9385,19 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 2B50..2B59 ; Grapheme_Base # So [10] WHITE MEDIUM STAR..HEAVY CIRCLED SALTIRE 2C00..2C2E ; Grapheme_Base # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 2C30..2C5E ; Grapheme_Base # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE -2C60..2C7C ; Grapheme_Base # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; Grapheme_Base # Lm MODIFIER LETTER CAPITAL V +2C60..2C7B ; Grapheme_Base # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; Grapheme_Base # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; Grapheme_Base # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI 2CE5..2CEA ; Grapheme_Base # So [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA 2CEB..2CEE ; Grapheme_Base # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; Grapheme_Base # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2CF9..2CFC ; Grapheme_Base # Po [4] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN VERSE DIVIDER 2CFD ; Grapheme_Base # No COPTIC FRACTION ONE HALF 2CFE..2CFF ; Grapheme_Base # Po [2] COPTIC FULL STOP..COPTIC MORPHOLOGICAL DIVIDER 2D00..2D25 ; Grapheme_Base # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE -2D30..2D65 ; Grapheme_Base # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D27 ; Grapheme_Base # L& GEORGIAN SMALL LETTER YN +2D2D ; Grapheme_Base # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; Grapheme_Base # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO 2D6F ; Grapheme_Base # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D70 ; Grapheme_Base # Po TIFINAGH SEPARATOR MARK 2D80..2D96 ; Grapheme_Base # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE @@ -8956,7 +9440,8 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 2E29 ; Grapheme_Base # Pe RIGHT DOUBLE PARENTHESIS 2E2A..2E2E ; Grapheme_Base # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK 2E2F ; Grapheme_Base # Lm VERTICAL TILDE -2E30..2E31 ; Grapheme_Base # Po [2] RING POINT..WORD SEPARATOR MIDDLE DOT +2E30..2E39 ; Grapheme_Base # Po [10] RING POINT..TOP HALF SECTION SIGN +2E3A..2E3B ; Grapheme_Base # Pd [2] TWO-EM DASH..THREE-EM DASH 2E80..2E99 ; Grapheme_Base # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP 2E9B..2EF3 ; Grapheme_Base # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE 2F00..2FD5 ; Grapheme_Base # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE @@ -9018,7 +9503,9 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 31F0..31FF ; Grapheme_Base # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3200..321E ; Grapheme_Base # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU 3220..3229 ; Grapheme_Base # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN -322A..3250 ; Grapheme_Base # So [39] PARENTHESIZED IDEOGRAPH MOON..PARTNERSHIP SIGN +322A..3247 ; Grapheme_Base # So [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO +3248..324F ; Grapheme_Base # No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE +3250 ; Grapheme_Base # So PARTNERSHIP SIGN 3251..325F ; Grapheme_Base # No [15] CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE 3260..327F ; Grapheme_Base # So [32] CIRCLED HANGUL KIYEOK..KOREAN STANDARD SYMBOL 3280..3289 ; Grapheme_Base # No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN @@ -9028,7 +9515,7 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 3300..33FF ; Grapheme_Base # So [256] SQUARE APAATO..SQUARE GAL 3400..4DB5 ; Grapheme_Base # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 4DC0..4DFF ; Grapheme_Base # So [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION -4E00..9FCB ; Grapheme_Base # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB +4E00..9FCC ; Grapheme_Base # Lo [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC A000..A014 ; Grapheme_Base # Lo [21] YI SYLLABLE IT..YI SYLLABLE E A015 ; Grapheme_Base # Lm YI SYLLABLE WU A016..A48C ; Grapheme_Base # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR @@ -9060,8 +9547,9 @@ A771..A787 ; Grapheme_Base # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LET A788 ; Grapheme_Base # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; Grapheme_Base # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; Grapheme_Base # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A791 ; Grapheme_Base # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER -A7A0..A7A9 ; Grapheme_Base # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE +A790..A793 ; Grapheme_Base # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR +A7A0..A7AA ; Grapheme_Base # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK +A7F8..A7F9 ; Grapheme_Base # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; Grapheme_Base # L& LATIN LETTER SMALL CAPITAL TURNED M A7FB..A801 ; Grapheme_Base # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I A803..A805 ; Grapheme_Base # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O @@ -9123,6 +9611,13 @@ AAC2 ; Grapheme_Base # Lo TAI VIET TONE MAI SONG AADB..AADC ; Grapheme_Base # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG AADD ; Grapheme_Base # Lm TAI VIET SYMBOL SAM AADE..AADF ; Grapheme_Base # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI +AAE0..AAEA ; Grapheme_Base # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAEB ; Grapheme_Base # Mc MEETEI MAYEK VOWEL SIGN II +AAEE..AAEF ; Grapheme_Base # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF0..AAF1 ; Grapheme_Base # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM +AAF2 ; Grapheme_Base # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; Grapheme_Base # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF5 ; Grapheme_Base # Mc MEETEI MAYEK VOWEL SIGN VISARGA AB01..AB06 ; Grapheme_Base # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO AB09..AB0E ; Grapheme_Base # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO AB11..AB16 ; Grapheme_Base # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO @@ -9138,8 +9633,7 @@ ABF0..ABF9 ; Grapheme_Base # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK D AC00..D7A3 ; Grapheme_Base # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH D7B0..D7C6 ; Grapheme_Base # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E D7CB..D7FB ; Grapheme_Base # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH -F900..FA2D ; Grapheme_Base # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; Grapheme_Base # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +F900..FA6D ; Grapheme_Base # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; Grapheme_Base # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB00..FB06 ; Grapheme_Base # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; Grapheme_Base # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -9263,8 +9757,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 1003F..1004D ; Grapheme_Base # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO 10050..1005D ; Grapheme_Base # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 10080..100FA ; Grapheme_Base # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 -10100..10101 ; Grapheme_Base # Po [2] AEGEAN WORD SEPARATOR LINE..AEGEAN WORD SEPARATOR DOT -10102 ; Grapheme_Base # So AEGEAN CHECK MARK +10100..10102 ; Grapheme_Base # Po [3] AEGEAN WORD SEPARATOR LINE..AEGEAN CHECK MARK 10107..10133 ; Grapheme_Base # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND 10137..1013F ; Grapheme_Base # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT 10140..10174 ; Grapheme_Base # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS @@ -9303,6 +9796,8 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 1091F ; Grapheme_Base # Po PHOENICIAN WORD SEPARATOR 10920..10939 ; Grapheme_Base # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C 1093F ; Grapheme_Base # Po LYDIAN TRIANGULAR MARK +10980..109B7 ; Grapheme_Base # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; Grapheme_Base # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; Grapheme_Base # Lo KHAROSHTHI LETTER A 10A10..10A13 ; Grapheme_Base # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA 10A15..10A17 ; Grapheme_Base # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA @@ -9332,11 +9827,33 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 110B7..110B8 ; Grapheme_Base # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU 110BB..110BC ; Grapheme_Base # Po [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN 110BE..110C1 ; Grapheme_Base # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +110D0..110E8 ; Grapheme_Base # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +110F0..110F9 ; Grapheme_Base # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11103..11126 ; Grapheme_Base # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +1112C ; Grapheme_Base # Mc CHAKMA VOWEL SIGN E +11136..1113F ; Grapheme_Base # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +11140..11143 ; Grapheme_Base # Po [4] CHAKMA SECTION MARK..CHAKMA QUESTION MARK +11182 ; Grapheme_Base # Mc SHARADA SIGN VISARGA +11183..111B2 ; Grapheme_Base # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111B3..111B5 ; Grapheme_Base # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111BF..111C0 ; Grapheme_Base # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +111C1..111C4 ; Grapheme_Base # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111C5..111C8 ; Grapheme_Base # Po [4] SHARADA DANDA..SHARADA SEPARATOR +111D0..111D9 ; Grapheme_Base # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +11680..116AA ; Grapheme_Base # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116AC ; Grapheme_Base # Mc TAKRI SIGN VISARGA +116AE..116AF ; Grapheme_Base # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B6 ; Grapheme_Base # Mc TAKRI SIGN VIRAMA +116C0..116C9 ; Grapheme_Base # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 12000..1236E ; Grapheme_Base # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM 12400..12462 ; Grapheme_Base # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER 12470..12473 ; Grapheme_Base # Po [4] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON 13000..1342E ; Grapheme_Base # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 16800..16A38 ; Grapheme_Base # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16F00..16F44 ; Grapheme_Base # Lo [69] MIAO LETTER PA..MIAO LETTER HHA +16F50 ; Grapheme_Base # Lo MIAO LETTER NASALIZATION +16F51..16F7E ; Grapheme_Base # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG +16F93..16F9F ; Grapheme_Base # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 1B000..1B001 ; Grapheme_Base # Lo [2] KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE 1D000..1D0F5 ; Grapheme_Base # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO 1D100..1D126 ; Grapheme_Base # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 @@ -9392,6 +9909,40 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 1D7C3 ; Grapheme_Base # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL 1D7C4..1D7CB ; Grapheme_Base # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1D7CE..1D7FF ; Grapheme_Base # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1EE00..1EE03 ; Grapheme_Base # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; Grapheme_Base # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; Grapheme_Base # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; Grapheme_Base # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; Grapheme_Base # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; Grapheme_Base # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; Grapheme_Base # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; Grapheme_Base # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; Grapheme_Base # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; Grapheme_Base # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; Grapheme_Base # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; Grapheme_Base # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; Grapheme_Base # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; Grapheme_Base # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; Grapheme_Base # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; Grapheme_Base # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; Grapheme_Base # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; Grapheme_Base # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; Grapheme_Base # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; Grapheme_Base # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; Grapheme_Base # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; Grapheme_Base # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; Grapheme_Base # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1EEF0..1EEF1 ; Grapheme_Base # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL 1F000..1F02B ; Grapheme_Base # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK 1F030..1F093 ; Grapheme_Base # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06 1F0A0..1F0AE ; Grapheme_Base # So [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES @@ -9400,7 +9951,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 1F0D1..1F0DF ; Grapheme_Base # So [15] PLAYING CARD ACE OF CLUBS..PLAYING CARD WHITE JOKER 1F100..1F10A ; Grapheme_Base # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA 1F110..1F12E ; Grapheme_Base # So [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ -1F130..1F169 ; Grapheme_Base # So [58] SQUARED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F130..1F16B ; Grapheme_Base # So [60] SQUARED LATIN CAPITAL LETTER A..RAISED MD SIGN 1F170..1F19A ; Grapheme_Base # So [43] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VS 1F1E6..1F202 ; Grapheme_Base # So [29] REGIONAL INDICATOR SYMBOL LETTER A..SQUARED KATAKANA SA 1F210..1F23A ; Grapheme_Base # So [43] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-55B6 @@ -9418,19 +9969,9 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 1F442..1F4F7 ; Grapheme_Base # So [182] EAR..CAMERA 1F4F9..1F4FC ; Grapheme_Base # So [4] VIDEO CAMERA..VIDEOCASSETTE 1F500..1F53D ; Grapheme_Base # So [62] TWISTED RIGHTWARDS ARROWS..DOWN-POINTING SMALL RED TRIANGLE +1F540..1F543 ; Grapheme_Base # So [4] CIRCLED CROSS POMMEE..NOTCHED LEFT SEMICIRCLE WITH THREE DOTS 1F550..1F567 ; Grapheme_Base # So [24] CLOCK FACE ONE OCLOCK..CLOCK FACE TWELVE-THIRTY -1F5FB..1F5FF ; Grapheme_Base # So [5] MOUNT FUJI..MOYAI -1F601..1F610 ; Grapheme_Base # So [16] GRINNING FACE WITH SMILING EYES..NEUTRAL FACE -1F612..1F614 ; Grapheme_Base # So [3] UNAMUSED FACE..PENSIVE FACE -1F616 ; Grapheme_Base # So CONFOUNDED FACE -1F618 ; Grapheme_Base # So FACE THROWING A KISS -1F61A ; Grapheme_Base # So KISSING FACE WITH CLOSED EYES -1F61C..1F61E ; Grapheme_Base # So [3] FACE WITH STUCK-OUT TONGUE AND WINKING EYE..DISAPPOINTED FACE -1F620..1F625 ; Grapheme_Base # So [6] ANGRY FACE..DISAPPOINTED BUT RELIEVED FACE -1F628..1F62B ; Grapheme_Base # So [4] FEARFUL FACE..TIRED FACE -1F62D ; Grapheme_Base # So LOUDLY CRYING FACE -1F630..1F633 ; Grapheme_Base # So [4] FACE WITH OPEN MOUTH AND COLD SWEAT..FLUSHED FACE -1F635..1F640 ; Grapheme_Base # So [12] DIZZY FACE..WEARY CAT FACE +1F5FB..1F640 ; Grapheme_Base # So [70] MOUNT FUJI..WEARY CAT FACE 1F645..1F64F ; Grapheme_Base # So [11] FACE WITH NO GOOD GESTURE..PERSON WITH FOLDED HANDS 1F680..1F6C5 ; Grapheme_Base # So [70] ROCKET..LEFT LUGGAGE 1F700..1F773 ; Grapheme_Base # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE @@ -9439,7 +9980,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 2B740..2B81D ; Grapheme_Base # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2F800..2FA1D ; Grapheme_Base # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 108010 +# Total code points: 108660 # ================================================ @@ -9466,17 +10007,22 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 1A60 ; Grapheme_Link # Mn TAI THAM SIGN SAKOT 1B44 ; Grapheme_Link # Mc BALINESE ADEG ADEG 1BAA ; Grapheme_Link # Mc SUNDANESE SIGN PAMAAEH +1BAB ; Grapheme_Link # Mn SUNDANESE SIGN VIRAMA 1BF2..1BF3 ; Grapheme_Link # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN 2D7F ; Grapheme_Link # Mn TIFINAGH CONSONANT JOINER A806 ; Grapheme_Link # Mn SYLOTI NAGRI SIGN HASANTA A8C4 ; Grapheme_Link # Mn SAURASHTRA SIGN VIRAMA A953 ; Grapheme_Link # Mc REJANG VIRAMA A9C0 ; Grapheme_Link # Mc JAVANESE PANGKON +AAF6 ; Grapheme_Link # Mn MEETEI MAYEK VIRAMA ABED ; Grapheme_Link # Mn MEETEI MAYEK APUN IYEK 10A3F ; Grapheme_Link # Mn KHAROSHTHI VIRAMA 11046 ; Grapheme_Link # Mn BRAHMI VIRAMA 110B9 ; Grapheme_Link # Mn KAITHI SIGN VIRAMA +11133..11134 ; Grapheme_Link # Mn [2] CHAKMA VIRAMA..CHAKMA MAAYYAA +111C0 ; Grapheme_Link # Mc SHARADA SIGN VIRAMA +116B6 ; Grapheme_Link # Mc TAKRI SIGN VIRAMA -# Total code points: 31 +# Total code points: 37 # EOF diff --git a/lib/unicore/DNormalizationProps.txt b/lib/unicore/DNormalizationProps.txt index e67276d090..2d71747767 100644 --- a/lib/unicore/DNormalizationProps.txt +++ b/lib/unicore/DNormalizationProps.txt @@ -1,8 +1,8 @@ -# DerivedNormalizationProps-6.0.0.txt -# Date: 2010-05-20, 15:14:12 GMT [MD] +# DerivedNormalizationProps-6.1.0.txt +# Date: 2011-07-26, 04:18:07 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -131,6 +131,7 @@ 33DD ; FC_NFKC; 0077 0062 # So SQUARE WB 33DE ; FC_NFKC; 0076 2215 006D # So SQUARE V OVER M 33DF ; FC_NFKC; 0061 2215 006D # So SQUARE A OVER M +A7F8 ; FC_NFKC; 0127 # Lm MODIFIER LETTER CAPITAL H WITH STROKE 1D400 ; FC_NFKC; 0061 # L& MATHEMATICAL BOLD CAPITAL A 1D401 ; FC_NFKC; 0062 # L& MATHEMATICAL BOLD CAPITAL B 1D402 ; FC_NFKC; 0063 # L& MATHEMATICAL BOLD CAPITAL C @@ -643,9 +644,11 @@ 1F14D ; FC_NFKC; 0073 0073 # So SQUARED SS 1F14E ; FC_NFKC; 0070 0070 0076 # So SQUARED PPV 1F14F ; FC_NFKC; 0077 0063 # So SQUARED WC +1F16A ; FC_NFKC; 006D 0063 # So RAISED MC SIGN +1F16B ; FC_NFKC; 006D 0064 # So RAISED MD SIGN 1F190 ; FC_NFKC; 0064 006A # So SQUARE DJ -# Total code points: 630 +# Total code points: 633 # ================================================ @@ -713,8 +716,7 @@ FA15..FA1E ; Full_Composition_Exclusion # Lo [10] CJK COMPATIBILITY IDEOGRAP FA20 ; Full_Composition_Exclusion # Lo CJK COMPATIBILITY IDEOGRAPH-FA20 FA22 ; Full_Composition_Exclusion # Lo CJK COMPATIBILITY IDEOGRAPH-FA22 FA25..FA26 ; Full_Composition_Exclusion # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26 -FA2A..FA2D ; Full_Composition_Exclusion # Lo [4] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; Full_Composition_Exclusion # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA2A..FA6D ; Full_Composition_Exclusion # Lo [68] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; Full_Composition_Exclusion # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB1D ; Full_Composition_Exclusion # Lo HEBREW LETTER YOD WITH HIRIQ FB1F ; Full_Composition_Exclusion # Lo HEBREW LIGATURE YIDDISH YOD YOD PATAH @@ -728,7 +730,7 @@ FB46..FB4E ; Full_Composition_Exclusion # Lo [9] HEBREW LETTER TSADI WITH D 1D1BB..1D1C0 ; Full_Composition_Exclusion # So [6] MUSICAL SYMBOL MINIMA..MUSICAL SYMBOL FUSA BLACK 2F800..2FA1D ; Full_Composition_Exclusion # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 1118 +# Total code points: 1120 # ================================================ @@ -964,8 +966,7 @@ FA15..FA1E ; NFD_QC; N # Lo [10] CJK COMPATIBILITY IDEOGRAPH-FA15..CJK COMPA FA20 ; NFD_QC; N # Lo CJK COMPATIBILITY IDEOGRAPH-FA20 FA22 ; NFD_QC; N # Lo CJK COMPATIBILITY IDEOGRAPH-FA22 FA25..FA26 ; NFD_QC; N # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26 -FA2A..FA2D ; NFD_QC; N # Lo [4] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; NFD_QC; N # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA2A..FA6D ; NFD_QC; N # Lo [68] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; NFD_QC; N # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB1D ; NFD_QC; N # Lo HEBREW LETTER YOD WITH HIRIQ FB1F ; NFD_QC; N # Lo HEBREW LIGATURE YIDDISH YOD YOD PATAH @@ -978,11 +979,12 @@ FB46..FB4E ; NFD_QC; N # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBREW LET 1109A ; NFD_QC; N # Lo KAITHI LETTER DDDHA 1109C ; NFD_QC; N # Lo KAITHI LETTER RHA 110AB ; NFD_QC; N # Lo KAITHI LETTER VA +1112E..1112F ; NFD_QC; N # Mn [2] CHAKMA VOWEL SIGN O..CHAKMA VOWEL SIGN AU 1D15E..1D164 ; NFD_QC; N # So [7] MUSICAL SYMBOL HALF NOTE..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE 1D1BB..1D1C0 ; NFD_QC; N # So [6] MUSICAL SYMBOL MINIMA..MUSICAL SYMBOL FUSA BLACK 2F800..2FA1D ; NFD_QC; N # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 13221 +# Total code points: 13225 # ================================================ @@ -1058,8 +1060,7 @@ FA15..FA1E ; NFC_QC; N # Lo [10] CJK COMPATIBILITY IDEOGRAPH-FA15..CJK COMPA FA20 ; NFC_QC; N # Lo CJK COMPATIBILITY IDEOGRAPH-FA20 FA22 ; NFC_QC; N # Lo CJK COMPATIBILITY IDEOGRAPH-FA22 FA25..FA26 ; NFC_QC; N # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26 -FA2A..FA2D ; NFC_QC; N # Lo [4] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; NFC_QC; N # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA2A..FA6D ; NFC_QC; N # Lo [68] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; NFC_QC; N # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB1D ; NFC_QC; N # Lo HEBREW LETTER YOD WITH HIRIQ FB1F ; NFC_QC; N # Lo HEBREW LIGATURE YIDDISH YOD YOD PATAH @@ -1073,7 +1074,7 @@ FB46..FB4E ; NFC_QC; N # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBREW LET 1D1BB..1D1C0 ; NFC_QC; N # So [6] MUSICAL SYMBOL MINIMA..MUSICAL SYMBOL FUSA BLACK 2F800..2FA1D ; NFC_QC; N # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 1118 +# Total code points: 1120 # ================================================ @@ -1114,8 +1115,9 @@ FB46..FB4E ; NFC_QC; N # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBREW LET 1B35 ; NFC_QC; M # Mc BALINESE VOWEL SIGN TEDUNG 3099..309A ; NFC_QC; M # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK 110BA ; NFC_QC; M # Mn KAITHI SIGN NUKTA +11127 ; NFC_QC; M # Mn CHAKMA VOWEL SIGN A -# Total code points: 103 +# Total code points: 104 # ================================================ @@ -1132,14 +1134,14 @@ FB46..FB4E ; NFC_QC; N # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBREW LET 00A0 ; NFKD_QC; N # Zs NO-BREAK SPACE 00A8 ; NFKD_QC; N # Sk DIAERESIS -00AA ; NFKD_QC; N # L& FEMININE ORDINAL INDICATOR +00AA ; NFKD_QC; N # Lo FEMININE ORDINAL INDICATOR 00AF ; NFKD_QC; N # Sk MACRON 00B2..00B3 ; NFKD_QC; N # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE 00B4 ; NFKD_QC; N # Sk ACUTE ACCENT 00B5 ; NFKD_QC; N # L& MICRO SIGN 00B8 ; NFKD_QC; N # Sk CEDILLA 00B9 ; NFKD_QC; N # No SUPERSCRIPT ONE -00BA ; NFKD_QC; N # L& MASCULINE ORDINAL INDICATOR +00BA ; NFKD_QC; N # Lo MASCULINE ORDINAL INDICATOR 00BC..00BE ; NFKD_QC; N # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS 00C0..00C5 ; NFKD_QC; N # L& [6] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER A WITH RING ABOVE 00C7..00CF ; NFKD_QC; N # L& [9] LATIN CAPITAL LETTER C WITH CEDILLA..LATIN CAPITAL LETTER I WITH DIAERESIS @@ -1267,8 +1269,7 @@ FB46..FB4E ; NFC_QC; N # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBREW LET 1D2C..1D2E ; NFKD_QC; N # Lm [3] MODIFIER LETTER CAPITAL A..MODIFIER LETTER CAPITAL B 1D30..1D3A ; NFKD_QC; N # Lm [11] MODIFIER LETTER CAPITAL D..MODIFIER LETTER CAPITAL N 1D3C..1D4D ; NFKD_QC; N # Lm [18] MODIFIER LETTER CAPITAL O..MODIFIER LETTER SMALL G -1D4F..1D61 ; NFKD_QC; N # Lm [19] MODIFIER LETTER SMALL K..MODIFIER LETTER SMALL CHI -1D62..1D6A ; NFKD_QC; N # L& [9] LATIN SUBSCRIPT SMALL LETTER I..GREEK SUBSCRIPT SMALL LETTER CHI +1D4F..1D6A ; NFKD_QC; N # Lm [28] MODIFIER LETTER SMALL K..GREEK SUBSCRIPT SMALL LETTER CHI 1D78 ; NFKD_QC; N # Lm MODIFIER LETTER CYRILLIC EN 1D9B..1DBF ; NFKD_QC; N # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA 1E00..1E9B ; NFKD_QC; N # L& [156] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER LONG S WITH DOT ABOVE @@ -1383,8 +1384,7 @@ FB46..FB4E ; NFC_QC; N # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBREW LET 2A0C ; NFKD_QC; N # Sm QUADRUPLE INTEGRAL OPERATOR 2A74..2A76 ; NFKD_QC; N # Sm [3] DOUBLE COLON EQUAL..THREE CONSECUTIVE EQUALS SIGNS 2ADC ; NFKD_QC; N # Sm FORKING -2C7C ; NFKD_QC; N # L& LATIN SUBSCRIPT SMALL LETTER J -2C7D ; NFKD_QC; N # Lm MODIFIER LETTER CAPITAL V +2C7C..2C7D ; NFKD_QC; N # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2D6F ; NFKD_QC; N # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2E9F ; NFKD_QC; N # So CJK RADICAL MOTHER 2EF3 ; NFKD_QC; N # So CJK RADICAL C-SIMPLIFIED TURTLE @@ -1455,6 +1455,7 @@ FB46..FB4E ; NFC_QC; N # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBREW LET 32C0..32FE ; NFKD_QC; N # So [63] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..CIRCLED KATAKANA WO 3300..33FF ; NFKD_QC; N # So [256] SQUARE APAATO..SQUARE GAL A770 ; NFKD_QC; N # Lm MODIFIER LETTER US +A7F8..A7F9 ; NFKD_QC; N # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE AC00..D7A3 ; NFKD_QC; N # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH F900..FA0D ; NFKD_QC; N # Lo [270] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA0D FA10 ; NFKD_QC; N # Lo CJK COMPATIBILITY IDEOGRAPH-FA10 @@ -1463,8 +1464,7 @@ FA15..FA1E ; NFKD_QC; N # Lo [10] CJK COMPATIBILITY IDEOGRAPH-FA15..CJK COMP FA20 ; NFKD_QC; N # Lo CJK COMPATIBILITY IDEOGRAPH-FA20 FA22 ; NFKD_QC; N # Lo CJK COMPATIBILITY IDEOGRAPH-FA22 FA25..FA26 ; NFKD_QC; N # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26 -FA2A..FA2D ; NFKD_QC; N # Lo [4] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; NFKD_QC; N # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA2A..FA6D ; NFKD_QC; N # Lo [68] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; NFKD_QC; N # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB00..FB06 ; NFKD_QC; N # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; NFKD_QC; N # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -1580,6 +1580,7 @@ FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 1109A ; NFKD_QC; N # Lo KAITHI LETTER DDDHA 1109C ; NFKD_QC; N # Lo KAITHI LETTER RHA 110AB ; NFKD_QC; N # Lo KAITHI LETTER VA +1112E..1112F ; NFKD_QC; N # Mn [2] CHAKMA VOWEL SIGN O..CHAKMA VOWEL SIGN AU 1D15E..1D164 ; NFKD_QC; N # So [7] MUSICAL SYMBOL HALF NOTE..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE 1D1BB..1D1C0 ; NFKD_QC; N # So [6] MUSICAL SYMBOL MINIMA..MUSICAL SYMBOL FUSA BLACK 1D400..1D454 ; NFKD_QC; N # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G @@ -1623,9 +1624,43 @@ FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 1D7C3 ; NFKD_QC; N # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL 1D7C4..1D7CB ; NFKD_QC; N # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1D7CE..1D7FF ; NFKD_QC; N # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1EE00..1EE03 ; NFKD_QC; N # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; NFKD_QC; N # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; NFKD_QC; N # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; NFKD_QC; N # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; NFKD_QC; N # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; NFKD_QC; N # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; NFKD_QC; N # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; NFKD_QC; N # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; NFKD_QC; N # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; NFKD_QC; N # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; NFKD_QC; N # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; NFKD_QC; N # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; NFKD_QC; N # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; NFKD_QC; N # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; NFKD_QC; N # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; NFKD_QC; N # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; NFKD_QC; N # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; NFKD_QC; N # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; NFKD_QC; N # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; NFKD_QC; N # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; NFKD_QC; N # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; NFKD_QC; N # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; NFKD_QC; N # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; NFKD_QC; N # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; NFKD_QC; N # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; NFKD_QC; N # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; NFKD_QC; N # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; NFKD_QC; N # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; NFKD_QC; N # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; NFKD_QC; N # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; NFKD_QC; N # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; NFKD_QC; N # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; NFKD_QC; N # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 1F100..1F10A ; NFKD_QC; N # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA 1F110..1F12E ; NFKD_QC; N # So [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ 1F130..1F14F ; NFKD_QC; N # So [32] SQUARED LATIN CAPITAL LETTER A..SQUARED WC +1F16A..1F16B ; NFKD_QC; N # So [2] RAISED MC SIGN..RAISED MD SIGN 1F190 ; NFKD_QC; N # So SQUARE DJ 1F200..1F202 ; NFKD_QC; N # So [3] SQUARE HIRAGANA HOKA..SQUARED KATAKANA SA 1F210..1F23A ; NFKD_QC; N # So [43] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-55B6 @@ -1633,7 +1668,7 @@ FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 1F250..1F251 ; NFKD_QC; N # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT 2F800..2FA1D ; NFKD_QC; N # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 16731 +# Total code points: 16880 # ================================================ @@ -1650,14 +1685,14 @@ FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 00A0 ; NFKC_QC; N # Zs NO-BREAK SPACE 00A8 ; NFKC_QC; N # Sk DIAERESIS -00AA ; NFKC_QC; N # L& FEMININE ORDINAL INDICATOR +00AA ; NFKC_QC; N # Lo FEMININE ORDINAL INDICATOR 00AF ; NFKC_QC; N # Sk MACRON 00B2..00B3 ; NFKC_QC; N # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE 00B4 ; NFKC_QC; N # Sk ACUTE ACCENT 00B5 ; NFKC_QC; N # L& MICRO SIGN 00B8 ; NFKC_QC; N # Sk CEDILLA 00B9 ; NFKC_QC; N # No SUPERSCRIPT ONE -00BA ; NFKC_QC; N # L& MASCULINE ORDINAL INDICATOR +00BA ; NFKC_QC; N # Lo MASCULINE ORDINAL INDICATOR 00BC..00BE ; NFKC_QC; N # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS 0132..0133 ; NFKC_QC; N # L& [2] LATIN CAPITAL LIGATURE IJ..LATIN SMALL LIGATURE IJ 013F..0140 ; NFKC_QC; N # L& [2] LATIN CAPITAL LETTER L WITH MIDDLE DOT..LATIN SMALL LETTER L WITH MIDDLE DOT @@ -1712,8 +1747,7 @@ FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 1D2C..1D2E ; NFKC_QC; N # Lm [3] MODIFIER LETTER CAPITAL A..MODIFIER LETTER CAPITAL B 1D30..1D3A ; NFKC_QC; N # Lm [11] MODIFIER LETTER CAPITAL D..MODIFIER LETTER CAPITAL N 1D3C..1D4D ; NFKC_QC; N # Lm [18] MODIFIER LETTER CAPITAL O..MODIFIER LETTER SMALL G -1D4F..1D61 ; NFKC_QC; N # Lm [19] MODIFIER LETTER SMALL K..MODIFIER LETTER SMALL CHI -1D62..1D6A ; NFKC_QC; N # L& [9] LATIN SUBSCRIPT SMALL LETTER I..GREEK SUBSCRIPT SMALL LETTER CHI +1D4F..1D6A ; NFKC_QC; N # Lm [28] MODIFIER LETTER SMALL K..GREEK SUBSCRIPT SMALL LETTER CHI 1D78 ; NFKC_QC; N # Lm MODIFIER LETTER CYRILLIC EN 1D9B..1DBF ; NFKC_QC; N # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA 1E9A..1E9B ; NFKC_QC; N # L& [2] LATIN SMALL LETTER A WITH RIGHT HALF RING..LATIN SMALL LETTER LONG S WITH DOT ABOVE @@ -1801,8 +1835,7 @@ FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 2A0C ; NFKC_QC; N # Sm QUADRUPLE INTEGRAL OPERATOR 2A74..2A76 ; NFKC_QC; N # Sm [3] DOUBLE COLON EQUAL..THREE CONSECUTIVE EQUALS SIGNS 2ADC ; NFKC_QC; N # Sm FORKING -2C7C ; NFKC_QC; N # L& LATIN SUBSCRIPT SMALL LETTER J -2C7D ; NFKC_QC; N # Lm MODIFIER LETTER CAPITAL V +2C7C..2C7D ; NFKC_QC; N # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2D6F ; NFKC_QC; N # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2E9F ; NFKC_QC; N # So CJK RADICAL MOTHER 2EF3 ; NFKC_QC; N # So CJK RADICAL C-SIMPLIFIED TURTLE @@ -1828,6 +1861,7 @@ FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 32C0..32FE ; NFKC_QC; N # So [63] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..CIRCLED KATAKANA WO 3300..33FF ; NFKC_QC; N # So [256] SQUARE APAATO..SQUARE GAL A770 ; NFKC_QC; N # Lm MODIFIER LETTER US +A7F8..A7F9 ; NFKC_QC; N # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE F900..FA0D ; NFKC_QC; N # Lo [270] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA0D FA10 ; NFKC_QC; N # Lo CJK COMPATIBILITY IDEOGRAPH-FA10 FA12 ; NFKC_QC; N # Lo CJK COMPATIBILITY IDEOGRAPH-FA12 @@ -1835,8 +1869,7 @@ FA15..FA1E ; NFKC_QC; N # Lo [10] CJK COMPATIBILITY IDEOGRAPH-FA15..CJK COMP FA20 ; NFKC_QC; N # Lo CJK COMPATIBILITY IDEOGRAPH-FA20 FA22 ; NFKC_QC; N # Lo CJK COMPATIBILITY IDEOGRAPH-FA22 FA25..FA26 ; NFKC_QC; N # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26 -FA2A..FA2D ; NFKC_QC; N # Lo [4] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; NFKC_QC; N # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA2A..FA6D ; NFKC_QC; N # Lo [68] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; NFKC_QC; N # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB00..FB06 ; NFKC_QC; N # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; NFKC_QC; N # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -1992,9 +2025,43 @@ FFED..FFEE ; NFKC_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 1D7C3 ; NFKC_QC; N # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL 1D7C4..1D7CB ; NFKC_QC; N # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1D7CE..1D7FF ; NFKC_QC; N # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1EE00..1EE03 ; NFKC_QC; N # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; NFKC_QC; N # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; NFKC_QC; N # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; NFKC_QC; N # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; NFKC_QC; N # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; NFKC_QC; N # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; NFKC_QC; N # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; NFKC_QC; N # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; NFKC_QC; N # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; NFKC_QC; N # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; NFKC_QC; N # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; NFKC_QC; N # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; NFKC_QC; N # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; NFKC_QC; N # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; NFKC_QC; N # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; NFKC_QC; N # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; NFKC_QC; N # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; NFKC_QC; N # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; NFKC_QC; N # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; NFKC_QC; N # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; NFKC_QC; N # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; NFKC_QC; N # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; NFKC_QC; N # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; NFKC_QC; N # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; NFKC_QC; N # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; NFKC_QC; N # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; NFKC_QC; N # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; NFKC_QC; N # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; NFKC_QC; N # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; NFKC_QC; N # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; NFKC_QC; N # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; NFKC_QC; N # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; NFKC_QC; N # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 1F100..1F10A ; NFKC_QC; N # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA 1F110..1F12E ; NFKC_QC; N # So [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ 1F130..1F14F ; NFKC_QC; N # So [32] SQUARED LATIN CAPITAL LETTER A..SQUARED WC +1F16A..1F16B ; NFKC_QC; N # So [2] RAISED MC SIGN..RAISED MD SIGN 1F190 ; NFKC_QC; N # So SQUARE DJ 1F200..1F202 ; NFKC_QC; N # So [3] SQUARE HIRAGANA HOKA..SQUARED KATAKANA SA 1F210..1F23A ; NFKC_QC; N # So [43] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-55B6 @@ -2002,7 +2069,7 @@ FFED..FFEE ; NFKC_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 1F250..1F251 ; NFKC_QC; N # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT 2F800..2FA1D ; NFKC_QC; N # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 4640 +# Total code points: 4787 # ================================================ @@ -2043,8 +2110,9 @@ FFED..FFEE ; NFKC_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 1B35 ; NFKC_QC; M # Mc BALINESE VOWEL SIGN TEDUNG 3099..309A ; NFKC_QC; M # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK 110BA ; NFKC_QC; M # Mn KAITHI SIGN NUKTA +11127 ; NFKC_QC; M # Mn CHAKMA VOWEL SIGN A -# Total code points: 103 +# Total code points: 104 # ================================================ @@ -2269,10 +2337,11 @@ FB46..FB4E ; Expands_On_NFD # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBRE 1109A ; Expands_On_NFD # Lo KAITHI LETTER DDDHA 1109C ; Expands_On_NFD # Lo KAITHI LETTER RHA 110AB ; Expands_On_NFD # Lo KAITHI LETTER VA +1112E..1112F ; Expands_On_NFD # Mn [2] CHAKMA VOWEL SIGN O..CHAKMA VOWEL SIGN AU 1D15E..1D164 ; Expands_On_NFD # So [7] MUSICAL SYMBOL HALF NOTE..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE 1D1BB..1D1C0 ; Expands_On_NFD # So [6] MUSICAL SYMBOL MINIMA..MUSICAL SYMBOL FUSA BLACK -# Total code points: 12206 +# Total code points: 12208 # ================================================ @@ -2617,18 +2686,20 @@ FFE3 ; Expands_On_NFKD # Sk FULLWIDTH MACRON 1109A ; Expands_On_NFKD # Lo KAITHI LETTER DDDHA 1109C ; Expands_On_NFKD # Lo KAITHI LETTER RHA 110AB ; Expands_On_NFKD # Lo KAITHI LETTER VA +1112E..1112F ; Expands_On_NFKD # Mn [2] CHAKMA VOWEL SIGN O..CHAKMA VOWEL SIGN AU 1D15E..1D164 ; Expands_On_NFKD # So [7] MUSICAL SYMBOL HALF NOTE..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE 1D1BB..1D1C0 ; Expands_On_NFKD # So [6] MUSICAL SYMBOL MINIMA..MUSICAL SYMBOL FUSA BLACK 1F100..1F10A ; Expands_On_NFKD # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA 1F110..1F12A ; Expands_On_NFKD # So [27] PARENTHESIZED LATIN CAPITAL LETTER A..TORTOISE SHELL BRACKETED LATIN CAPITAL LETTER S 1F12D..1F12E ; Expands_On_NFKD # So [2] CIRCLED CD..CIRCLED WZ 1F14A..1F14F ; Expands_On_NFKD # So [6] SQUARED HV..SQUARED WC +1F16A..1F16B ; Expands_On_NFKD # So [2] RAISED MC SIGN..RAISED MD SIGN 1F190 ; Expands_On_NFKD # So SQUARE DJ 1F200..1F201 ; Expands_On_NFKD # So [2] SQUARE HIRAGANA HOKA..SQUARED KATAKANA KOKO 1F213 ; Expands_On_NFKD # So SQUARED KATAKANA DE 1F240..1F248 ; Expands_On_NFKD # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 -# Total code points: 13376 +# Total code points: 13380 # ================================================ @@ -2761,11 +2832,12 @@ FFE3 ; Expands_On_NFKC # Sk FULLWIDTH MACRON 1F110..1F12A ; Expands_On_NFKC # So [27] PARENTHESIZED LATIN CAPITAL LETTER A..TORTOISE SHELL BRACKETED LATIN CAPITAL LETTER S 1F12D..1F12E ; Expands_On_NFKC # So [2] CIRCLED CD..CIRCLED WZ 1F14A..1F14F ; Expands_On_NFKC # So [6] SQUARED HV..SQUARED WC +1F16A..1F16B ; Expands_On_NFKC # So [2] RAISED MC SIGN..RAISED MD SIGN 1F190 ; Expands_On_NFKC # So SQUARE DJ 1F200..1F201 ; Expands_On_NFKC # So [2] SQUARE HIRAGANA HOKA..SQUARED KATAKANA KOKO 1F240..1F248 ; Expands_On_NFKC # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 -# Total code points: 1233 +# Total code points: 1235 # ================================================ @@ -2810,7 +2882,7 @@ FFE3 ; Expands_On_NFKC # Sk FULLWIDTH MACRON 005A ; NFKC_CF; 007A # L& LATIN CAPITAL LETTER Z 00A0 ; NFKC_CF; 0020 # Zs NO-BREAK SPACE 00A8 ; NFKC_CF; 0020 0308 # Sk DIAERESIS -00AA ; NFKC_CF; 0061 # L& FEMININE ORDINAL INDICATOR +00AA ; NFKC_CF; 0061 # Lo FEMININE ORDINAL INDICATOR 00AD ; NFKC_CF; # Cf SOFT HYPHEN 00AF ; NFKC_CF; 0020 0304 # Sk MACRON 00B2 ; NFKC_CF; 0032 # No SUPERSCRIPT TWO @@ -2819,7 +2891,7 @@ FFE3 ; Expands_On_NFKC # Sk FULLWIDTH MACRON 00B5 ; NFKC_CF; 03BC # L& MICRO SIGN 00B8 ; NFKC_CF; 0020 0327 # Sk CEDILLA 00B9 ; NFKC_CF; 0031 # No SUPERSCRIPT ONE -00BA ; NFKC_CF; 006F # L& MASCULINE ORDINAL INDICATOR +00BA ; NFKC_CF; 006F # Lo MASCULINE ORDINAL INDICATOR 00BC ; NFKC_CF; 0031 2044 0034 # No VULGAR FRACTION ONE QUARTER 00BD ; NFKC_CF; 0031 2044 0032 # No VULGAR FRACTION ONE HALF 00BE ; NFKC_CF; 0033 2044 0034 # No VULGAR FRACTION THREE QUARTERS @@ -3390,9 +3462,11 @@ FFE3 ; Expands_On_NFKC # Sk FULLWIDTH MACRON 10C3 ; NFKC_CF; 2D23 # L& GEORGIAN CAPITAL LETTER WE 10C4 ; NFKC_CF; 2D24 # L& GEORGIAN CAPITAL LETTER HAR 10C5 ; NFKC_CF; 2D25 # L& GEORGIAN CAPITAL LETTER HOE +10C7 ; NFKC_CF; 2D27 # L& GEORGIAN CAPITAL LETTER YN +10CD ; NFKC_CF; 2D2D # L& GEORGIAN CAPITAL LETTER AEN 10FC ; NFKC_CF; 10DC # Lm MODIFIER LETTER GEORGIAN NAR 115F..1160 ; NFKC_CF; # Lo [2] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG FILLER -17B4..17B5 ; NFKC_CF; # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B4..17B5 ; NFKC_CF; # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 180B..180D ; NFKC_CF; # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE 1D2C ; NFKC_CF; 0061 # Lm MODIFIER LETTER CAPITAL A 1D2D ; NFKC_CF; 00E6 # Lm MODIFIER LETTER CAPITAL AE @@ -3445,15 +3519,15 @@ FFE3 ; Expands_On_NFKC # Sk FULLWIDTH MACRON 1D5F ; NFKC_CF; 03B4 # Lm MODIFIER LETTER SMALL DELTA 1D60 ; NFKC_CF; 03C6 # Lm MODIFIER LETTER SMALL GREEK PHI 1D61 ; NFKC_CF; 03C7 # Lm MODIFIER LETTER SMALL CHI -1D62 ; NFKC_CF; 0069 # L& LATIN SUBSCRIPT SMALL LETTER I -1D63 ; NFKC_CF; 0072 # L& LATIN SUBSCRIPT SMALL LETTER R -1D64 ; NFKC_CF; 0075 # L& LATIN SUBSCRIPT SMALL LETTER U -1D65 ; NFKC_CF; 0076 # L& LATIN SUBSCRIPT SMALL LETTER V -1D66 ; NFKC_CF; 03B2 # L& GREEK SUBSCRIPT SMALL LETTER BETA -1D67 ; NFKC_CF; 03B3 # L& GREEK SUBSCRIPT SMALL LETTER GAMMA -1D68 ; NFKC_CF; 03C1 # L& GREEK SUBSCRIPT SMALL LETTER RHO -1D69 ; NFKC_CF; 03C6 # L& GREEK SUBSCRIPT SMALL LETTER PHI -1D6A ; NFKC_CF; 03C7 # L& GREEK SUBSCRIPT SMALL LETTER CHI +1D62 ; NFKC_CF; 0069 # Lm LATIN SUBSCRIPT SMALL LETTER I +1D63 ; NFKC_CF; 0072 # Lm LATIN SUBSCRIPT SMALL LETTER R +1D64 ; NFKC_CF; 0075 # Lm LATIN SUBSCRIPT SMALL LETTER U +1D65 ; NFKC_CF; 0076 # Lm LATIN SUBSCRIPT SMALL LETTER V +1D66 ; NFKC_CF; 03B2 # Lm GREEK SUBSCRIPT SMALL LETTER BETA +1D67 ; NFKC_CF; 03B3 # Lm GREEK SUBSCRIPT SMALL LETTER GAMMA +1D68 ; NFKC_CF; 03C1 # Lm GREEK SUBSCRIPT SMALL LETTER RHO +1D69 ; NFKC_CF; 03C6 # Lm GREEK SUBSCRIPT SMALL LETTER PHI +1D6A ; NFKC_CF; 03C7 # Lm GREEK SUBSCRIPT SMALL LETTER CHI 1D78 ; NFKC_CF; 043D # Lm MODIFIER LETTER CYRILLIC EN 1D9B ; NFKC_CF; 0252 # Lm MODIFIER LETTER SMALL TURNED ALPHA 1D9C ; NFKC_CF; 0063 # Lm MODIFIER LETTER SMALL C @@ -4148,7 +4222,7 @@ FFE3 ; Expands_On_NFKC # Sk FULLWIDTH MACRON 2C70 ; NFKC_CF; 0252 # L& LATIN CAPITAL LETTER TURNED ALPHA 2C72 ; NFKC_CF; 2C73 # L& LATIN CAPITAL LETTER W WITH HOOK 2C75 ; NFKC_CF; 2C76 # L& LATIN CAPITAL LETTER HALF H -2C7C ; NFKC_CF; 006A # L& LATIN SUBSCRIPT SMALL LETTER J +2C7C ; NFKC_CF; 006A # Lm LATIN SUBSCRIPT SMALL LETTER J 2C7D ; NFKC_CF; 0076 # Lm MODIFIER LETTER CAPITAL V 2C7E ; NFKC_CF; 023F # L& LATIN CAPITAL LETTER S WITH SWASH TAIL 2C7F ; NFKC_CF; 0240 # L& LATIN CAPITAL LETTER Z WITH SWASH TAIL @@ -4204,6 +4278,7 @@ FFE3 ; Expands_On_NFKC # Sk FULLWIDTH MACRON 2CE2 ; NFKC_CF; 2CE3 # L& COPTIC CAPITAL LETTER OLD NUBIAN WAU 2CEB ; NFKC_CF; 2CEC # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI 2CED ; NFKC_CF; 2CEE # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA +2CF2 ; NFKC_CF; 2CF3 # L& COPTIC CAPITAL LETTER BOHAIRIC KHEI 2D6F ; NFKC_CF; 2D61 # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2E9F ; NFKC_CF; 6BCD # So CJK RADICAL MOTHER 2EF3 ; NFKC_CF; 9F9F # So CJK RADICAL C-SIMPLIFIED TURTLE @@ -5124,11 +5199,15 @@ A786 ; NFKC_CF; A787 # L& LATIN CAPITAL LETTER INSULAR A78B ; NFKC_CF; A78C # L& LATIN CAPITAL LETTER SALTILLO A78D ; NFKC_CF; 0265 # L& LATIN CAPITAL LETTER TURNED H A790 ; NFKC_CF; A791 # L& LATIN CAPITAL LETTER N WITH DESCENDER +A792 ; NFKC_CF; A793 # L& LATIN CAPITAL LETTER C WITH BAR A7A0 ; NFKC_CF; A7A1 # L& LATIN CAPITAL LETTER G WITH OBLIQUE STROKE A7A2 ; NFKC_CF; A7A3 # L& LATIN CAPITAL LETTER K WITH OBLIQUE STROKE A7A4 ; NFKC_CF; A7A5 # L& LATIN CAPITAL LETTER N WITH OBLIQUE STROKE A7A6 ; NFKC_CF; A7A7 # L& LATIN CAPITAL LETTER R WITH OBLIQUE STROKE A7A8 ; NFKC_CF; A7A9 # L& LATIN CAPITAL LETTER S WITH OBLIQUE STROKE +A7AA ; NFKC_CF; 0266 # L& LATIN CAPITAL LETTER H WITH HOOK +A7F8 ; NFKC_CF; 0127 # Lm MODIFIER LETTER CAPITAL H WITH STROKE +A7F9 ; NFKC_CF; 0153 # Lm MODIFIER LETTER SMALL LIGATURE OE F900 ; NFKC_CF; 8C48 # Lo CJK COMPATIBILITY IDEOGRAPH-F900 F901 ; NFKC_CF; 66F4 # Lo CJK COMPATIBILITY IDEOGRAPH-F901 F902 ; NFKC_CF; 8ECA # Lo CJK COMPATIBILITY IDEOGRAPH-F902 @@ -5418,6 +5497,8 @@ FA2A ; NFKC_CF; 98EF # Lo CJK COMPATIBILITY IDEOGRAPH-F FA2B ; NFKC_CF; 98FC # Lo CJK COMPATIBILITY IDEOGRAPH-FA2B FA2C ; NFKC_CF; 9928 # Lo CJK COMPATIBILITY IDEOGRAPH-FA2C FA2D ; NFKC_CF; 9DB4 # Lo CJK COMPATIBILITY IDEOGRAPH-FA2D +FA2E ; NFKC_CF; 90DE # Lo CJK COMPATIBILITY IDEOGRAPH-FA2E +FA2F ; NFKC_CF; 96B7 # Lo CJK COMPATIBILITY IDEOGRAPH-FA2F FA30 ; NFKC_CF; 4FAE # Lo CJK COMPATIBILITY IDEOGRAPH-FA30 FA31 ; NFKC_CF; 50E7 # Lo CJK COMPATIBILITY IDEOGRAPH-FA31 FA32 ; NFKC_CF; 514D # Lo CJK COMPATIBILITY IDEOGRAPH-FA32 @@ -7507,6 +7588,147 @@ FFF0..FFF8 ; NFKC_CF; # Cn [9] <reserved-FFF0>..<reserved-FF 1D7FD ; NFKC_CF; 0037 # Nd MATHEMATICAL MONOSPACE DIGIT SEVEN 1D7FE ; NFKC_CF; 0038 # Nd MATHEMATICAL MONOSPACE DIGIT EIGHT 1D7FF ; NFKC_CF; 0039 # Nd MATHEMATICAL MONOSPACE DIGIT NINE +1EE00 ; NFKC_CF; 0627 # Lo ARABIC MATHEMATICAL ALEF +1EE01 ; NFKC_CF; 0628 # Lo ARABIC MATHEMATICAL BEH +1EE02 ; NFKC_CF; 062C # Lo ARABIC MATHEMATICAL JEEM +1EE03 ; NFKC_CF; 062F # Lo ARABIC MATHEMATICAL DAL +1EE05 ; NFKC_CF; 0648 # Lo ARABIC MATHEMATICAL WAW +1EE06 ; NFKC_CF; 0632 # Lo ARABIC MATHEMATICAL ZAIN +1EE07 ; NFKC_CF; 062D # Lo ARABIC MATHEMATICAL HAH +1EE08 ; NFKC_CF; 0637 # Lo ARABIC MATHEMATICAL TAH +1EE09 ; NFKC_CF; 064A # Lo ARABIC MATHEMATICAL YEH +1EE0A ; NFKC_CF; 0643 # Lo ARABIC MATHEMATICAL KAF +1EE0B ; NFKC_CF; 0644 # Lo ARABIC MATHEMATICAL LAM +1EE0C ; NFKC_CF; 0645 # Lo ARABIC MATHEMATICAL MEEM +1EE0D ; NFKC_CF; 0646 # Lo ARABIC MATHEMATICAL NOON +1EE0E ; NFKC_CF; 0633 # Lo ARABIC MATHEMATICAL SEEN +1EE0F ; NFKC_CF; 0639 # Lo ARABIC MATHEMATICAL AIN +1EE10 ; NFKC_CF; 0641 # Lo ARABIC MATHEMATICAL FEH +1EE11 ; NFKC_CF; 0635 # Lo ARABIC MATHEMATICAL SAD +1EE12 ; NFKC_CF; 0642 # Lo ARABIC MATHEMATICAL QAF +1EE13 ; NFKC_CF; 0631 # Lo ARABIC MATHEMATICAL REH +1EE14 ; NFKC_CF; 0634 # Lo ARABIC MATHEMATICAL SHEEN +1EE15 ; NFKC_CF; 062A # Lo ARABIC MATHEMATICAL TEH +1EE16 ; NFKC_CF; 062B # Lo ARABIC MATHEMATICAL THEH +1EE17 ; NFKC_CF; 062E # Lo ARABIC MATHEMATICAL KHAH +1EE18 ; NFKC_CF; 0630 # Lo ARABIC MATHEMATICAL THAL +1EE19 ; NFKC_CF; 0636 # Lo ARABIC MATHEMATICAL DAD +1EE1A ; NFKC_CF; 0638 # Lo ARABIC MATHEMATICAL ZAH +1EE1B ; NFKC_CF; 063A # Lo ARABIC MATHEMATICAL GHAIN +1EE1C ; NFKC_CF; 066E # Lo ARABIC MATHEMATICAL DOTLESS BEH +1EE1D ; NFKC_CF; 06BA # Lo ARABIC MATHEMATICAL DOTLESS NOON +1EE1E ; NFKC_CF; 06A1 # Lo ARABIC MATHEMATICAL DOTLESS FEH +1EE1F ; NFKC_CF; 066F # Lo ARABIC MATHEMATICAL DOTLESS QAF +1EE21 ; NFKC_CF; 0628 # Lo ARABIC MATHEMATICAL INITIAL BEH +1EE22 ; NFKC_CF; 062C # Lo ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; NFKC_CF; 0647 # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; NFKC_CF; 062D # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29 ; NFKC_CF; 064A # Lo ARABIC MATHEMATICAL INITIAL YEH +1EE2A ; NFKC_CF; 0643 # Lo ARABIC MATHEMATICAL INITIAL KAF +1EE2B ; NFKC_CF; 0644 # Lo ARABIC MATHEMATICAL INITIAL LAM +1EE2C ; NFKC_CF; 0645 # Lo ARABIC MATHEMATICAL INITIAL MEEM +1EE2D ; NFKC_CF; 0646 # Lo ARABIC MATHEMATICAL INITIAL NOON +1EE2E ; NFKC_CF; 0633 # Lo ARABIC MATHEMATICAL INITIAL SEEN +1EE2F ; NFKC_CF; 0639 # Lo ARABIC MATHEMATICAL INITIAL AIN +1EE30 ; NFKC_CF; 0641 # Lo ARABIC MATHEMATICAL INITIAL FEH +1EE31 ; NFKC_CF; 0635 # Lo ARABIC MATHEMATICAL INITIAL SAD +1EE32 ; NFKC_CF; 0642 # Lo ARABIC MATHEMATICAL INITIAL QAF +1EE34 ; NFKC_CF; 0634 # Lo ARABIC MATHEMATICAL INITIAL SHEEN +1EE35 ; NFKC_CF; 062A # Lo ARABIC MATHEMATICAL INITIAL TEH +1EE36 ; NFKC_CF; 062B # Lo ARABIC MATHEMATICAL INITIAL THEH +1EE37 ; NFKC_CF; 062E # Lo ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; NFKC_CF; 0636 # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; NFKC_CF; 063A # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; NFKC_CF; 062C # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; NFKC_CF; 062D # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; NFKC_CF; 064A # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; NFKC_CF; 0644 # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D ; NFKC_CF; 0646 # Lo ARABIC MATHEMATICAL TAILED NOON +1EE4E ; NFKC_CF; 0633 # Lo ARABIC MATHEMATICAL TAILED SEEN +1EE4F ; NFKC_CF; 0639 # Lo ARABIC MATHEMATICAL TAILED AIN +1EE51 ; NFKC_CF; 0635 # Lo ARABIC MATHEMATICAL TAILED SAD +1EE52 ; NFKC_CF; 0642 # Lo ARABIC MATHEMATICAL TAILED QAF +1EE54 ; NFKC_CF; 0634 # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; NFKC_CF; 062E # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; NFKC_CF; 0636 # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; NFKC_CF; 063A # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; NFKC_CF; 06BA # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; NFKC_CF; 066F # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61 ; NFKC_CF; 0628 # Lo ARABIC MATHEMATICAL STRETCHED BEH +1EE62 ; NFKC_CF; 062C # Lo ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; NFKC_CF; 0647 # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67 ; NFKC_CF; 062D # Lo ARABIC MATHEMATICAL STRETCHED HAH +1EE68 ; NFKC_CF; 0637 # Lo ARABIC MATHEMATICAL STRETCHED TAH +1EE69 ; NFKC_CF; 064A # Lo ARABIC MATHEMATICAL STRETCHED YEH +1EE6A ; NFKC_CF; 0643 # Lo ARABIC MATHEMATICAL STRETCHED KAF +1EE6C ; NFKC_CF; 0645 # Lo ARABIC MATHEMATICAL STRETCHED MEEM +1EE6D ; NFKC_CF; 0646 # Lo ARABIC MATHEMATICAL STRETCHED NOON +1EE6E ; NFKC_CF; 0633 # Lo ARABIC MATHEMATICAL STRETCHED SEEN +1EE6F ; NFKC_CF; 0639 # Lo ARABIC MATHEMATICAL STRETCHED AIN +1EE70 ; NFKC_CF; 0641 # Lo ARABIC MATHEMATICAL STRETCHED FEH +1EE71 ; NFKC_CF; 0635 # Lo ARABIC MATHEMATICAL STRETCHED SAD +1EE72 ; NFKC_CF; 0642 # Lo ARABIC MATHEMATICAL STRETCHED QAF +1EE74 ; NFKC_CF; 0634 # Lo ARABIC MATHEMATICAL STRETCHED SHEEN +1EE75 ; NFKC_CF; 062A # Lo ARABIC MATHEMATICAL STRETCHED TEH +1EE76 ; NFKC_CF; 062B # Lo ARABIC MATHEMATICAL STRETCHED THEH +1EE77 ; NFKC_CF; 062E # Lo ARABIC MATHEMATICAL STRETCHED KHAH +1EE79 ; NFKC_CF; 0636 # Lo ARABIC MATHEMATICAL STRETCHED DAD +1EE7A ; NFKC_CF; 0638 # Lo ARABIC MATHEMATICAL STRETCHED ZAH +1EE7B ; NFKC_CF; 063A # Lo ARABIC MATHEMATICAL STRETCHED GHAIN +1EE7C ; NFKC_CF; 066E # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; NFKC_CF; 06A1 # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80 ; NFKC_CF; 0627 # Lo ARABIC MATHEMATICAL LOOPED ALEF +1EE81 ; NFKC_CF; 0628 # Lo ARABIC MATHEMATICAL LOOPED BEH +1EE82 ; NFKC_CF; 062C # Lo ARABIC MATHEMATICAL LOOPED JEEM +1EE83 ; NFKC_CF; 062F # Lo ARABIC MATHEMATICAL LOOPED DAL +1EE84 ; NFKC_CF; 0647 # Lo ARABIC MATHEMATICAL LOOPED HEH +1EE85 ; NFKC_CF; 0648 # Lo ARABIC MATHEMATICAL LOOPED WAW +1EE86 ; NFKC_CF; 0632 # Lo ARABIC MATHEMATICAL LOOPED ZAIN +1EE87 ; NFKC_CF; 062D # Lo ARABIC MATHEMATICAL LOOPED HAH +1EE88 ; NFKC_CF; 0637 # Lo ARABIC MATHEMATICAL LOOPED TAH +1EE89 ; NFKC_CF; 064A # Lo ARABIC MATHEMATICAL LOOPED YEH +1EE8B ; NFKC_CF; 0644 # Lo ARABIC MATHEMATICAL LOOPED LAM +1EE8C ; NFKC_CF; 0645 # Lo ARABIC MATHEMATICAL LOOPED MEEM +1EE8D ; NFKC_CF; 0646 # Lo ARABIC MATHEMATICAL LOOPED NOON +1EE8E ; NFKC_CF; 0633 # Lo ARABIC MATHEMATICAL LOOPED SEEN +1EE8F ; NFKC_CF; 0639 # Lo ARABIC MATHEMATICAL LOOPED AIN +1EE90 ; NFKC_CF; 0641 # Lo ARABIC MATHEMATICAL LOOPED FEH +1EE91 ; NFKC_CF; 0635 # Lo ARABIC MATHEMATICAL LOOPED SAD +1EE92 ; NFKC_CF; 0642 # Lo ARABIC MATHEMATICAL LOOPED QAF +1EE93 ; NFKC_CF; 0631 # Lo ARABIC MATHEMATICAL LOOPED REH +1EE94 ; NFKC_CF; 0634 # Lo ARABIC MATHEMATICAL LOOPED SHEEN +1EE95 ; NFKC_CF; 062A # Lo ARABIC MATHEMATICAL LOOPED TEH +1EE96 ; NFKC_CF; 062B # Lo ARABIC MATHEMATICAL LOOPED THEH +1EE97 ; NFKC_CF; 062E # Lo ARABIC MATHEMATICAL LOOPED KHAH +1EE98 ; NFKC_CF; 0630 # Lo ARABIC MATHEMATICAL LOOPED THAL +1EE99 ; NFKC_CF; 0636 # Lo ARABIC MATHEMATICAL LOOPED DAD +1EE9A ; NFKC_CF; 0638 # Lo ARABIC MATHEMATICAL LOOPED ZAH +1EE9B ; NFKC_CF; 063A # Lo ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1 ; NFKC_CF; 0628 # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK BEH +1EEA2 ; NFKC_CF; 062C # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK JEEM +1EEA3 ; NFKC_CF; 062F # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5 ; NFKC_CF; 0648 # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK WAW +1EEA6 ; NFKC_CF; 0632 # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK ZAIN +1EEA7 ; NFKC_CF; 062D # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK HAH +1EEA8 ; NFKC_CF; 0637 # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK TAH +1EEA9 ; NFKC_CF; 064A # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB ; NFKC_CF; 0644 # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK LAM +1EEAC ; NFKC_CF; 0645 # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK MEEM +1EEAD ; NFKC_CF; 0646 # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK NOON +1EEAE ; NFKC_CF; 0633 # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK SEEN +1EEAF ; NFKC_CF; 0639 # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK AIN +1EEB0 ; NFKC_CF; 0641 # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK FEH +1EEB1 ; NFKC_CF; 0635 # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK SAD +1EEB2 ; NFKC_CF; 0642 # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK QAF +1EEB3 ; NFKC_CF; 0631 # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK REH +1EEB4 ; NFKC_CF; 0634 # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK SHEEN +1EEB5 ; NFKC_CF; 062A # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK TEH +1EEB6 ; NFKC_CF; 062B # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK THEH +1EEB7 ; NFKC_CF; 062E # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK KHAH +1EEB8 ; NFKC_CF; 0630 # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK THAL +1EEB9 ; NFKC_CF; 0636 # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK DAD +1EEBA ; NFKC_CF; 0638 # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK ZAH +1EEBB ; NFKC_CF; 063A # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 1F100 ; NFKC_CF; 0030 002E # No DIGIT ZERO FULL STOP 1F101 ; NFKC_CF; 0030 002C # No DIGIT ZERO COMMA 1F102 ; NFKC_CF; 0031 002C # No DIGIT ONE COMMA @@ -7581,6 +7803,8 @@ FFF0..FFF8 ; NFKC_CF; # Cn [9] <reserved-FFF0>..<reserved-FF 1F14D ; NFKC_CF; 0073 0073 # So SQUARED SS 1F14E ; NFKC_CF; 0070 0070 0076 # So SQUARED PPV 1F14F ; NFKC_CF; 0077 0063 # So SQUARED WC +1F16A ; NFKC_CF; 006D 0063 # So RAISED MC SIGN +1F16B ; NFKC_CF; 006D 0064 # So RAISED MD SIGN 1F190 ; NFKC_CF; 0064 006A # So SQUARE DJ 1F200 ; NFKC_CF; 307B 304B # So SQUARE HIRAGANA HOKA 1F201 ; NFKC_CF; 30B3 30B3 # So SQUARED KATAKANA KOKO @@ -8179,7 +8403,7 @@ E0080..E00FF ; NFKC_CF; # Cn [128] <reserved-E0080>..<reserved-E E0100..E01EF ; NFKC_CF; # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 E01F0..E0FFF ; NFKC_CF; # Cn [3600] <reserved-E01F0>..<reserved-E0FFF> -# Total code points: 9792 +# Total code points: 9944 # ================================================ @@ -8190,7 +8414,7 @@ E01F0..E0FFF ; NFKC_CF; # Cn [3600] <reserved-E01F0>..<reserved- 0041..005A ; Changes_When_NFKC_Casefolded # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z 00A0 ; Changes_When_NFKC_Casefolded # Zs NO-BREAK SPACE 00A8 ; Changes_When_NFKC_Casefolded # Sk DIAERESIS -00AA ; Changes_When_NFKC_Casefolded # L& FEMININE ORDINAL INDICATOR +00AA ; Changes_When_NFKC_Casefolded # Lo FEMININE ORDINAL INDICATOR 00AD ; Changes_When_NFKC_Casefolded # Cf SOFT HYPHEN 00AF ; Changes_When_NFKC_Casefolded # Sk MACRON 00B2..00B3 ; Changes_When_NFKC_Casefolded # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE @@ -8198,7 +8422,7 @@ E01F0..E0FFF ; NFKC_CF; # Cn [3600] <reserved-E01F0>..<reserved- 00B5 ; Changes_When_NFKC_Casefolded # L& MICRO SIGN 00B8 ; Changes_When_NFKC_Casefolded # Sk CEDILLA 00B9 ; Changes_When_NFKC_Casefolded # No SUPERSCRIPT ONE -00BA ; Changes_When_NFKC_Casefolded # L& MASCULINE ORDINAL INDICATOR +00BA ; Changes_When_NFKC_Casefolded # Lo MASCULINE ORDINAL INDICATOR 00BC..00BE ; Changes_When_NFKC_Casefolded # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS 00C0..00D6 ; Changes_When_NFKC_Casefolded # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS 00D8..00DF ; Changes_When_NFKC_Casefolded # L& [8] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER SHARP S @@ -8503,15 +8727,16 @@ E01F0..E0FFF ; NFKC_CF; # Cn [3600] <reserved-E01F0>..<reserved- 0FAC ; Changes_When_NFKC_Casefolded # Mn TIBETAN SUBJOINED LETTER DZHA 0FB9 ; Changes_When_NFKC_Casefolded # Mn TIBETAN SUBJOINED LETTER KSSA 10A0..10C5 ; Changes_When_NFKC_Casefolded # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Changes_When_NFKC_Casefolded # L& GEORGIAN CAPITAL LETTER YN +10CD ; Changes_When_NFKC_Casefolded # L& GEORGIAN CAPITAL LETTER AEN 10FC ; Changes_When_NFKC_Casefolded # Lm MODIFIER LETTER GEORGIAN NAR 115F..1160 ; Changes_When_NFKC_Casefolded # Lo [2] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG FILLER -17B4..17B5 ; Changes_When_NFKC_Casefolded # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B4..17B5 ; Changes_When_NFKC_Casefolded # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 180B..180D ; Changes_When_NFKC_Casefolded # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE 1D2C..1D2E ; Changes_When_NFKC_Casefolded # Lm [3] MODIFIER LETTER CAPITAL A..MODIFIER LETTER CAPITAL B 1D30..1D3A ; Changes_When_NFKC_Casefolded # Lm [11] MODIFIER LETTER CAPITAL D..MODIFIER LETTER CAPITAL N 1D3C..1D4D ; Changes_When_NFKC_Casefolded # Lm [18] MODIFIER LETTER CAPITAL O..MODIFIER LETTER SMALL G -1D4F..1D61 ; Changes_When_NFKC_Casefolded # Lm [19] MODIFIER LETTER SMALL K..MODIFIER LETTER SMALL CHI -1D62..1D6A ; Changes_When_NFKC_Casefolded # L& [9] LATIN SUBSCRIPT SMALL LETTER I..GREEK SUBSCRIPT SMALL LETTER CHI +1D4F..1D6A ; Changes_When_NFKC_Casefolded # Lm [28] MODIFIER LETTER SMALL K..GREEK SUBSCRIPT SMALL LETTER CHI 1D78 ; Changes_When_NFKC_Casefolded # Lm MODIFIER LETTER CYRILLIC EN 1D9B..1DBF ; Changes_When_NFKC_Casefolded # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA 1E00 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER A WITH RING BELOW @@ -8749,8 +8974,7 @@ E01F0..E0FFF ; NFKC_CF; # Cn [3600] <reserved-E01F0>..<reserved- 2C6D..2C70 ; Changes_When_NFKC_Casefolded # L& [4] LATIN CAPITAL LETTER ALPHA..LATIN CAPITAL LETTER TURNED ALPHA 2C72 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER W WITH HOOK 2C75 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER HALF H -2C7C ; Changes_When_NFKC_Casefolded # L& LATIN SUBSCRIPT SMALL LETTER J -2C7D ; Changes_When_NFKC_Casefolded # Lm MODIFIER LETTER CAPITAL V +2C7C..2C7D ; Changes_When_NFKC_Casefolded # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2C80 ; Changes_When_NFKC_Casefolded # L& [3] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC CAPITAL LETTER ALFA 2C82 ; Changes_When_NFKC_Casefolded # L& COPTIC CAPITAL LETTER VIDA 2C84 ; Changes_When_NFKC_Casefolded # L& COPTIC CAPITAL LETTER GAMMA @@ -8803,6 +9027,7 @@ E01F0..E0FFF ; NFKC_CF; # Cn [3600] <reserved-E01F0>..<reserved- 2CE2 ; Changes_When_NFKC_Casefolded # L& COPTIC CAPITAL LETTER OLD NUBIAN WAU 2CEB ; Changes_When_NFKC_Casefolded # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI 2CED ; Changes_When_NFKC_Casefolded # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA +2CF2 ; Changes_When_NFKC_Casefolded # L& COPTIC CAPITAL LETTER BOHAIRIC KHEI 2D6F ; Changes_When_NFKC_Casefolded # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2E9F ; Changes_When_NFKC_Casefolded # So CJK RADICAL MOTHER 2EF3 ; Changes_When_NFKC_Casefolded # So CJK RADICAL C-SIMPLIFIED TURTLE @@ -8911,11 +9136,14 @@ A786 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER INS A78B ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER SALTILLO A78D ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER TURNED H A790 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER N WITH DESCENDER +A792 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER C WITH BAR A7A0 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER G WITH OBLIQUE STROKE A7A2 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER K WITH OBLIQUE STROKE A7A4 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER N WITH OBLIQUE STROKE A7A6 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER R WITH OBLIQUE STROKE A7A8 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER S WITH OBLIQUE STROKE +A7AA ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER H WITH HOOK +A7F8..A7F9 ; Changes_When_NFKC_Casefolded # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE F900..FA0D ; Changes_When_NFKC_Casefolded # Lo [270] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA0D FA10 ; Changes_When_NFKC_Casefolded # Lo CJK COMPATIBILITY IDEOGRAPH-FA10 FA12 ; Changes_When_NFKC_Casefolded # Lo CJK COMPATIBILITY IDEOGRAPH-FA12 @@ -8923,8 +9151,7 @@ FA15..FA1E ; Changes_When_NFKC_Casefolded # Lo [10] CJK COMPATIBILITY IDEOGR FA20 ; Changes_When_NFKC_Casefolded # Lo CJK COMPATIBILITY IDEOGRAPH-FA20 FA22 ; Changes_When_NFKC_Casefolded # Lo CJK COMPATIBILITY IDEOGRAPH-FA22 FA25..FA26 ; Changes_When_NFKC_Casefolded # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26 -FA2A..FA2D ; Changes_When_NFKC_Casefolded # Lo [4] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; Changes_When_NFKC_Casefolded # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA2A..FA6D ; Changes_When_NFKC_Casefolded # Lo [68] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; Changes_When_NFKC_Casefolded # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB00..FB06 ; Changes_When_NFKC_Casefolded # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; Changes_When_NFKC_Casefolded # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -9085,9 +9312,43 @@ FFF0..FFF8 ; Changes_When_NFKC_Casefolded # Cn [9] <reserved-FFF0>..<reserv 1D7C3 ; Changes_When_NFKC_Casefolded # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL 1D7C4..1D7CB ; Changes_When_NFKC_Casefolded # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1D7CE..1D7FF ; Changes_When_NFKC_Casefolded # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1EE00..1EE03 ; Changes_When_NFKC_Casefolded # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; Changes_When_NFKC_Casefolded # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; Changes_When_NFKC_Casefolded # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; Changes_When_NFKC_Casefolded # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; Changes_When_NFKC_Casefolded # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; Changes_When_NFKC_Casefolded # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; Changes_When_NFKC_Casefolded # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; Changes_When_NFKC_Casefolded # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; Changes_When_NFKC_Casefolded # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; Changes_When_NFKC_Casefolded # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; Changes_When_NFKC_Casefolded # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; Changes_When_NFKC_Casefolded # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; Changes_When_NFKC_Casefolded # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; Changes_When_NFKC_Casefolded # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; Changes_When_NFKC_Casefolded # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; Changes_When_NFKC_Casefolded # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; Changes_When_NFKC_Casefolded # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; Changes_When_NFKC_Casefolded # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; Changes_When_NFKC_Casefolded # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; Changes_When_NFKC_Casefolded # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; Changes_When_NFKC_Casefolded # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; Changes_When_NFKC_Casefolded # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; Changes_When_NFKC_Casefolded # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; Changes_When_NFKC_Casefolded # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; Changes_When_NFKC_Casefolded # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; Changes_When_NFKC_Casefolded # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; Changes_When_NFKC_Casefolded # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; Changes_When_NFKC_Casefolded # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; Changes_When_NFKC_Casefolded # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; Changes_When_NFKC_Casefolded # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; Changes_When_NFKC_Casefolded # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; Changes_When_NFKC_Casefolded # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; Changes_When_NFKC_Casefolded # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 1F100..1F10A ; Changes_When_NFKC_Casefolded # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA 1F110..1F12E ; Changes_When_NFKC_Casefolded # So [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ 1F130..1F14F ; Changes_When_NFKC_Casefolded # So [32] SQUARED LATIN CAPITAL LETTER A..SQUARED WC +1F16A..1F16B ; Changes_When_NFKC_Casefolded # So [2] RAISED MC SIGN..RAISED MD SIGN 1F190 ; Changes_When_NFKC_Casefolded # So SQUARE DJ 1F200..1F202 ; Changes_When_NFKC_Casefolded # So [3] SQUARE HIRAGANA HOKA..SQUARED KATAKANA SA 1F210..1F23A ; Changes_When_NFKC_Casefolded # So [43] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-55B6 @@ -9102,6 +9363,6 @@ E0080..E00FF ; Changes_When_NFKC_Casefolded # Cn [128] <reserved-E0080>..<reser E0100..E01EF ; Changes_When_NFKC_Casefolded # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 E01F0..E0FFF ; Changes_When_NFKC_Casefolded # Cn [3600] <reserved-E01F0>..<reserved-E0FFF> -# Total code points: 9792 +# Total code points: 9944 # EOF diff --git a/lib/unicore/EastAsianWidth.txt b/lib/unicore/EastAsianWidth.txt index d271d90d56..ea38eef618 100644 --- a/lib/unicore/EastAsianWidth.txt +++ b/lib/unicore/EastAsianWidth.txt @@ -1,12 +1,12 @@ -# EastAsianWidth-6.0.0.txt -# Date: 2010-08-17, 12:17:00 PDT [KW] +# EastAsianWidth-6.1.0.txt +# Date: 2011-09-19, 18:46:00 GMT [KW] # # East Asian Width Properties # # This file is an informative contributory data file in the # Unicode Character Database. # -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # # The format is two fields separated by a semicolon. @@ -1432,6 +1432,7 @@ 0587;N # ARMENIAN SMALL LIGATURE ECH YIWN 0589;N # ARMENIAN FULL STOP 058A;N # ARMENIAN HYPHEN +058F;N # ARMENIAN DRAM SIGN 0591;N # HEBREW ACCENT ETNAHTA 0592;N # HEBREW ACCENT SEGOL 0593;N # HEBREW ACCENT SHALSHELET @@ -1523,6 +1524,7 @@ 0601;N # ARABIC SIGN SANAH 0602;N # ARABIC FOOTNOTE MARKER 0603;N # ARABIC SIGN SAFHA +0604;N # ARABIC SIGN SAMVAT 0606;N # ARABIC-INDIC CUBE ROOT 0607;N # ARABIC-INDIC FOURTH ROOT 0608;N # ARABIC RAY @@ -2095,6 +2097,45 @@ 085A;N # MANDAIC VOCALIZATION MARK 085B;N # MANDAIC GEMINATION MARK 085E;N # MANDAIC PUNCTUATION +08A0;N # ARABIC LETTER BEH WITH SMALL V BELOW +08A2;N # ARABIC LETTER JEEM WITH TWO DOTS ABOVE +08A3;N # ARABIC LETTER TAH WITH TWO DOTS ABOVE +08A4;N # ARABIC LETTER FEH WITH DOT BELOW AND THREE DOTS ABOVE +08A5;N # ARABIC LETTER QAF WITH DOT BELOW +08A6;N # ARABIC LETTER LAM WITH DOUBLE BAR +08A7;N # ARABIC LETTER MEEM WITH THREE DOTS ABOVE +08A8;N # ARABIC LETTER YEH WITH TWO DOTS BELOW AND HAMZA ABOVE +08A9;N # ARABIC LETTER YEH WITH TWO DOTS BELOW AND DOT ABOVE +08AA;N # ARABIC LETTER REH WITH LOOP +08AB;N # ARABIC LETTER WAW WITH DOT WITHIN +08AC;N # ARABIC LETTER ROHINGYA YEH +08E4;N # ARABIC CURLY FATHA +08E5;N # ARABIC CURLY DAMMA +08E6;N # ARABIC CURLY KASRA +08E7;N # ARABIC CURLY FATHATAN +08E8;N # ARABIC CURLY DAMMATAN +08E9;N # ARABIC CURLY KASRATAN +08EA;N # ARABIC TONE ONE DOT ABOVE +08EB;N # ARABIC TONE TWO DOTS ABOVE +08EC;N # ARABIC TONE LOOP ABOVE +08ED;N # ARABIC TONE ONE DOT BELOW +08EE;N # ARABIC TONE TWO DOTS BELOW +08EF;N # ARABIC TONE LOOP BELOW +08F0;N # ARABIC OPEN FATHATAN +08F1;N # ARABIC OPEN DAMMATAN +08F2;N # ARABIC OPEN KASRATAN +08F3;N # ARABIC SMALL HIGH WAW +08F4;N # ARABIC FATHA WITH RING +08F5;N # ARABIC FATHA WITH DOT ABOVE +08F6;N # ARABIC KASRA WITH DOT BELOW +08F7;N # ARABIC LEFT ARROWHEAD ABOVE +08F8;N # ARABIC RIGHT ARROWHEAD ABOVE +08F9;N # ARABIC LEFT ARROWHEAD BELOW +08FA;N # ARABIC RIGHT ARROWHEAD BELOW +08FB;N # ARABIC DOUBLE RIGHT ARROWHEAD ABOVE +08FC;N # ARABIC DOUBLE RIGHT ARROWHEAD ABOVE WITH DOT +08FD;N # ARABIC RIGHT ARROWHEAD ABOVE WITH DOT +08FE;N # ARABIC DAMMA WITH DOT 0900;N # DEVANAGARI SIGN INVERTED CANDRABINDU 0901;N # DEVANAGARI SIGN CANDRABINDU 0902;N # DEVANAGARI SIGN ANUSVARA @@ -2475,6 +2516,7 @@ 0AED;N # GUJARATI DIGIT SEVEN 0AEE;N # GUJARATI DIGIT EIGHT 0AEF;N # GUJARATI DIGIT NINE +0AF0;N # GUJARATI ABBREVIATION SIGN 0AF1;N # GUJARATI RUPEE SIGN 0B01;N # ORIYA SIGN CANDRABINDU 0B02;N # ORIYA SIGN ANUSVARA @@ -3147,6 +3189,8 @@ 0ED9;N # LAO DIGIT NINE 0EDC;N # LAO HO NO 0EDD;N # LAO HO MO +0EDE;N # LAO LETTER KHMU GO +0EDF;N # LAO LETTER KHMU NYO 0F00;N # TIBETAN SYLLABLE OM 0F01;N # TIBETAN MARK GTER YIG MGO TRUNCATED A 0F02;N # TIBETAN MARK GTER YIG MGO -UM RNAM BCAD MA @@ -3556,6 +3600,8 @@ 10C3;N # GEORGIAN CAPITAL LETTER WE 10C4;N # GEORGIAN CAPITAL LETTER HAR 10C5;N # GEORGIAN CAPITAL LETTER HOE +10C7;N # GEORGIAN CAPITAL LETTER YN +10CD;N # GEORGIAN CAPITAL LETTER AEN 10D0;N # GEORGIAN LETTER AN 10D1;N # GEORGIAN LETTER BAN 10D2;N # GEORGIAN LETTER GAN @@ -3601,6 +3647,9 @@ 10FA;N # GEORGIAN LETTER AIN 10FB;N # GEORGIAN PARAGRAPH SEPARATOR 10FC;N # MODIFIER LETTER GEORGIAN NAR +10FD;N # GEORGIAN LETTER AEN +10FE;N # GEORGIAN LETTER HARD SIGN +10FF;N # GEORGIAN LETTER LABIAL SIGN 1100;W # HANGUL CHOSEONG KIYEOK 1101;W # HANGUL CHOSEONG SSANGKIYEOK 1102;W # HANGUL CHOSEONG NIEUN @@ -6034,6 +6083,9 @@ 1BA8;N # SUNDANESE VOWEL SIGN PAMEPET 1BA9;N # SUNDANESE VOWEL SIGN PANEULEUNG 1BAA;N # SUNDANESE SIGN PAMAAEH +1BAB;N # SUNDANESE SIGN VIRAMA +1BAC;N # SUNDANESE CONSONANT SIGN PASANGAN MA +1BAD;N # SUNDANESE CONSONANT SIGN PASANGAN WA 1BAE;N # SUNDANESE LETTER KHA 1BAF;N # SUNDANESE LETTER SYA 1BB0;N # SUNDANESE DIGIT ZERO @@ -6046,6 +6098,12 @@ 1BB7;N # SUNDANESE DIGIT SEVEN 1BB8;N # SUNDANESE DIGIT EIGHT 1BB9;N # SUNDANESE DIGIT NINE +1BBA;N # SUNDANESE AVAGRAHA +1BBB;N # SUNDANESE LETTER REU +1BBC;N # SUNDANESE LETTER LEU +1BBD;N # SUNDANESE LETTER BHA +1BBE;N # SUNDANESE LETTER FINAL K +1BBF;N # SUNDANESE LETTER FINAL M 1BC0;N # BATAK LETTER A 1BC1;N # BATAK LETTER SIMALUNGUN A 1BC2;N # BATAK LETTER HA @@ -6224,6 +6282,14 @@ 1C7D;N # OL CHIKI AHAD 1C7E;N # OL CHIKI PUNCTUATION MUCAAD 1C7F;N # OL CHIKI PUNCTUATION DOUBLE MUCAAD +1CC0;N # SUNDANESE PUNCTUATION BINDU SURYA +1CC1;N # SUNDANESE PUNCTUATION BINDU PANGLONG +1CC2;N # SUNDANESE PUNCTUATION BINDU PURNAMA +1CC3;N # SUNDANESE PUNCTUATION BINDU CAKRA +1CC4;N # SUNDANESE PUNCTUATION BINDU LEU SATANGA +1CC5;N # SUNDANESE PUNCTUATION BINDU KA SATANGA +1CC6;N # SUNDANESE PUNCTUATION BINDU DA SATANGA +1CC7;N # SUNDANESE PUNCTUATION BINDU BA SATANGA 1CD0;N # VEDIC TONE KARSHANA 1CD1;N # VEDIC TONE SHARA 1CD2;N # VEDIC TONE PRENKHA @@ -6259,6 +6325,10 @@ 1CF0;N # VEDIC SIGN RTHANG LONG ANUSVARA 1CF1;N # VEDIC SIGN ANUSVARA UBHAYATO MUKHA 1CF2;N # VEDIC SIGN ARDHAVISARGA +1CF3;N # VEDIC SIGN ROTATED ARDHAVISARGA +1CF4;N # VEDIC TONE CANDRA ABOVE +1CF5;N # VEDIC SIGN JIHVAMULIYA +1CF6;N # VEDIC SIGN UPADHMANIYA 1D00;N # LATIN LETTER SMALL CAPITAL A 1D01;N # LATIN LETTER SMALL CAPITAL AE 1D02;N # LATIN SMALL LETTER TURNED AE @@ -8865,7 +8935,9 @@ 27C8;N # REVERSE SOLIDUS PRECEDING SUBSET 27C9;N # SUPERSET PRECEDING SOLIDUS 27CA;N # VERTICAL BAR WITH HORIZONTAL STROKE +27CB;N # MATHEMATICAL RISING DIAGONAL 27CC;N # LONG DIVISION +27CD;N # MATHEMATICAL FALLING DIAGONAL 27CE;N # SQUARED LOGICAL AND 27CF;N # SQUARED LOGICAL OR 27D0;N # WHITE DIAMOND WITH CENTRED DOT @@ -10011,6 +10083,8 @@ 2CEF;N # COPTIC COMBINING NI ABOVE 2CF0;N # COPTIC COMBINING SPIRITUS ASPER 2CF1;N # COPTIC COMBINING SPIRITUS LENIS +2CF2;N # COPTIC CAPITAL LETTER BOHAIRIC KHEI +2CF3;N # COPTIC SMALL LETTER BOHAIRIC KHEI 2CF9;N # COPTIC OLD NUBIAN FULL STOP 2CFA;N # COPTIC OLD NUBIAN DIRECT QUESTION MARK 2CFB;N # COPTIC OLD NUBIAN INDIRECT QUESTION MARK @@ -10056,6 +10130,8 @@ 2D23;N # GEORGIAN SMALL LETTER WE 2D24;N # GEORGIAN SMALL LETTER HAR 2D25;N # GEORGIAN SMALL LETTER HOE +2D27;N # GEORGIAN SMALL LETTER YN +2D2D;N # GEORGIAN SMALL LETTER AEN 2D30;N # TIFINAGH LETTER YA 2D31;N # TIFINAGH LETTER YAB 2D32;N # TIFINAGH LETTER YABH @@ -10110,6 +10186,8 @@ 2D63;N # TIFINAGH LETTER YAZ 2D64;N # TIFINAGH LETTER TAWELLEMET YAZ 2D65;N # TIFINAGH LETTER YAZZ +2D66;N # TIFINAGH LETTER YE +2D67;N # TIFINAGH LETTER YO 2D6F;N # TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D70;N # TIFINAGH SEPARATOR MARK 2D7F;N # TIFINAGH CONSONANT JOINER @@ -10274,6 +10352,16 @@ 2E2F;N # VERTICAL TILDE 2E30;N # RING POINT 2E31;N # WORD SEPARATOR MIDDLE DOT +2E32;N # TURNED COMMA +2E33;N # RAISED DOT +2E34;N # RAISED COMMA +2E35;N # TURNED SEMICOLON +2E36;N # DAGGER WITH LEFT GUARD +2E37;N # DAGGER WITH RIGHT GUARD +2E38;N # TURNED DAGGER +2E39;N # TOP HALF SECTION SIGN +2E3A;N # TWO-EM DASH +2E3B;N # THREE-EM DASH 2E80;W # CJK RADICAL REPEAT 2E81;W # CJK RADICAL CLIFF 2E82;W # CJK RADICAL SECOND ONE @@ -11674,8 +11762,8 @@ 4DFD;N # HEXAGRAM FOR SMALL PREPONDERANCE 4DFE;N # HEXAGRAM FOR AFTER COMPLETION 4DFF;N # HEXAGRAM FOR BEFORE COMPLETION -4E00..9FCB;W # <CJK Ideograph, First>..<CJK Ideograph, Last> -9FCC..9FFF;W # <reserved-9FCC>..<reserved-9FFF> +4E00..9FCC;W # <CJK Ideograph, First>..<CJK Ideograph, Last> +9FCD..9FFF;W # <reserved-9FCD>..<reserved-9FFF> A000;W # YI SYLLABLE IT A001;W # YI SYLLABLE IX A002;W # YI SYLLABLE I @@ -13296,6 +13384,14 @@ A670;N # COMBINING CYRILLIC TEN MILLIONS SIGN A671;N # COMBINING CYRILLIC HUNDRED MILLIONS SIGN A672;N # COMBINING CYRILLIC THOUSAND MILLIONS SIGN A673;N # SLAVONIC ASTERISK +A674;N # COMBINING CYRILLIC LETTER UKRAINIAN IE +A675;N # COMBINING CYRILLIC LETTER I +A676;N # COMBINING CYRILLIC LETTER YI +A677;N # COMBINING CYRILLIC LETTER U +A678;N # COMBINING CYRILLIC LETTER HARD SIGN +A679;N # COMBINING CYRILLIC LETTER YERU +A67A;N # COMBINING CYRILLIC LETTER SOFT SIGN +A67B;N # COMBINING CYRILLIC LETTER OMEGA A67C;N # COMBINING CYRILLIC KAVYKA A67D;N # COMBINING CYRILLIC PAYEROK A67E;N # CYRILLIC KAVYKA @@ -13324,6 +13420,7 @@ A694;N # CYRILLIC CAPITAL LETTER HWE A695;N # CYRILLIC SMALL LETTER HWE A696;N # CYRILLIC CAPITAL LETTER SHWE A697;N # CYRILLIC SMALL LETTER SHWE +A69F;N # COMBINING CYRILLIC LETTER IOTIFIED E A6A0;N # BAMUM LETTER A A6A1;N # BAMUM LETTER KA A6A2;N # BAMUM LETTER U @@ -13557,6 +13654,8 @@ A78D;N # LATIN CAPITAL LETTER TURNED H A78E;N # LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A790;N # LATIN CAPITAL LETTER N WITH DESCENDER A791;N # LATIN SMALL LETTER N WITH DESCENDER +A792;N # LATIN CAPITAL LETTER C WITH BAR +A793;N # LATIN SMALL LETTER C WITH BAR A7A0;N # LATIN CAPITAL LETTER G WITH OBLIQUE STROKE A7A1;N # LATIN SMALL LETTER G WITH OBLIQUE STROKE A7A2;N # LATIN CAPITAL LETTER K WITH OBLIQUE STROKE @@ -13567,6 +13666,9 @@ A7A6;N # LATIN CAPITAL LETTER R WITH OBLIQUE STROKE A7A7;N # LATIN SMALL LETTER R WITH OBLIQUE STROKE A7A8;N # LATIN CAPITAL LETTER S WITH OBLIQUE STROKE A7A9;N # LATIN SMALL LETTER S WITH OBLIQUE STROKE +A7AA;N # LATIN CAPITAL LETTER H WITH HOOK +A7F8;N # MODIFIER LETTER CAPITAL H WITH STROKE +A7F9;N # MODIFIER LETTER SMALL LIGATURE OE A7FA;N # LATIN LETTER SMALL CAPITAL TURNED M A7FB;N # LATIN EPIGRAPHIC LETTER REVERSED F A7FC;N # LATIN EPIGRAPHIC LETTER REVERSED P @@ -14180,6 +14282,29 @@ AADC;N # TAI VIET SYMBOL NUENG AADD;N # TAI VIET SYMBOL SAM AADE;N # TAI VIET SYMBOL HO HOI AADF;N # TAI VIET SYMBOL KOI KOI +AAE0;N # MEETEI MAYEK LETTER E +AAE1;N # MEETEI MAYEK LETTER O +AAE2;N # MEETEI MAYEK LETTER CHA +AAE3;N # MEETEI MAYEK LETTER NYA +AAE4;N # MEETEI MAYEK LETTER TTA +AAE5;N # MEETEI MAYEK LETTER TTHA +AAE6;N # MEETEI MAYEK LETTER DDA +AAE7;N # MEETEI MAYEK LETTER DDHA +AAE8;N # MEETEI MAYEK LETTER NNA +AAE9;N # MEETEI MAYEK LETTER SHA +AAEA;N # MEETEI MAYEK LETTER SSA +AAEB;N # MEETEI MAYEK VOWEL SIGN II +AAEC;N # MEETEI MAYEK VOWEL SIGN UU +AAED;N # MEETEI MAYEK VOWEL SIGN AAI +AAEE;N # MEETEI MAYEK VOWEL SIGN AU +AAEF;N # MEETEI MAYEK VOWEL SIGN AAU +AAF0;N # MEETEI MAYEK CHEIKHAN +AAF1;N # MEETEI MAYEK AHANG KHUDAM +AAF2;N # MEETEI MAYEK ANJI +AAF3;N # MEETEI MAYEK SYLLABLE REPETITION MARK +AAF4;N # MEETEI MAYEK WORD REPETITION MARK +AAF5;N # MEETEI MAYEK VOWEL SIGN VISARGA +AAF6;N # MEETEI MAYEK VIRAMA AB01;N # ETHIOPIC SYLLABLE TTHU AB02;N # ETHIOPIC SYLLABLE TTHI AB03;N # ETHIOPIC SYLLABLE TTHAA @@ -14647,7 +14772,8 @@ FA2A;W # CJK COMPATIBILITY IDEOGRAPH-FA2A FA2B;W # CJK COMPATIBILITY IDEOGRAPH-FA2B FA2C;W # CJK COMPATIBILITY IDEOGRAPH-FA2C FA2D;W # CJK COMPATIBILITY IDEOGRAPH-FA2D -FA2E..FA2F;W # <reserved-FA2E>..<reserved-FA2F> +FA2E;W # CJK COMPATIBILITY IDEOGRAPH-FA2E +FA2F;W # CJK COMPATIBILITY IDEOGRAPH-FA2F FA30;W # CJK COMPATIBILITY IDEOGRAPH-FA30 FA31;W # CJK COMPATIBILITY IDEOGRAPH-FA31 FA32;W # CJK COMPATIBILITY IDEOGRAPH-FA32 @@ -16881,6 +17007,64 @@ FFFD;A # REPLACEMENT CHARACTER 10938;N # LYDIAN LETTER NN 10939;N # LYDIAN LETTER C 1093F;N # LYDIAN TRIANGULAR MARK +10980;N # MEROITIC HIEROGLYPHIC LETTER A +10981;N # MEROITIC HIEROGLYPHIC LETTER E +10982;N # MEROITIC HIEROGLYPHIC LETTER I +10983;N # MEROITIC HIEROGLYPHIC LETTER O +10984;N # MEROITIC HIEROGLYPHIC LETTER YA +10985;N # MEROITIC HIEROGLYPHIC LETTER WA +10986;N # MEROITIC HIEROGLYPHIC LETTER BA +10987;N # MEROITIC HIEROGLYPHIC LETTER BA-2 +10988;N # MEROITIC HIEROGLYPHIC LETTER PA +10989;N # MEROITIC HIEROGLYPHIC LETTER MA +1098A;N # MEROITIC HIEROGLYPHIC LETTER NA +1098B;N # MEROITIC HIEROGLYPHIC LETTER NA-2 +1098C;N # MEROITIC HIEROGLYPHIC LETTER NE +1098D;N # MEROITIC HIEROGLYPHIC LETTER NE-2 +1098E;N # MEROITIC HIEROGLYPHIC LETTER RA +1098F;N # MEROITIC HIEROGLYPHIC LETTER RA-2 +10990;N # MEROITIC HIEROGLYPHIC LETTER LA +10991;N # MEROITIC HIEROGLYPHIC LETTER KHA +10992;N # MEROITIC HIEROGLYPHIC LETTER HHA +10993;N # MEROITIC HIEROGLYPHIC LETTER SA +10994;N # MEROITIC HIEROGLYPHIC LETTER SA-2 +10995;N # MEROITIC HIEROGLYPHIC LETTER SE +10996;N # MEROITIC HIEROGLYPHIC LETTER KA +10997;N # MEROITIC HIEROGLYPHIC LETTER QA +10998;N # MEROITIC HIEROGLYPHIC LETTER TA +10999;N # MEROITIC HIEROGLYPHIC LETTER TA-2 +1099A;N # MEROITIC HIEROGLYPHIC LETTER TE +1099B;N # MEROITIC HIEROGLYPHIC LETTER TE-2 +1099C;N # MEROITIC HIEROGLYPHIC LETTER TO +1099D;N # MEROITIC HIEROGLYPHIC LETTER DA +1099E;N # MEROITIC HIEROGLYPHIC SYMBOL VIDJ +1099F;N # MEROITIC HIEROGLYPHIC SYMBOL VIDJ-2 +109A0;N # MEROITIC CURSIVE LETTER A +109A1;N # MEROITIC CURSIVE LETTER E +109A2;N # MEROITIC CURSIVE LETTER I +109A3;N # MEROITIC CURSIVE LETTER O +109A4;N # MEROITIC CURSIVE LETTER YA +109A5;N # MEROITIC CURSIVE LETTER WA +109A6;N # MEROITIC CURSIVE LETTER BA +109A7;N # MEROITIC CURSIVE LETTER PA +109A8;N # MEROITIC CURSIVE LETTER MA +109A9;N # MEROITIC CURSIVE LETTER NA +109AA;N # MEROITIC CURSIVE LETTER NE +109AB;N # MEROITIC CURSIVE LETTER RA +109AC;N # MEROITIC CURSIVE LETTER LA +109AD;N # MEROITIC CURSIVE LETTER KHA +109AE;N # MEROITIC CURSIVE LETTER HHA +109AF;N # MEROITIC CURSIVE LETTER SA +109B0;N # MEROITIC CURSIVE LETTER ARCHAIC SA +109B1;N # MEROITIC CURSIVE LETTER SE +109B2;N # MEROITIC CURSIVE LETTER KA +109B3;N # MEROITIC CURSIVE LETTER QA +109B4;N # MEROITIC CURSIVE LETTER TA +109B5;N # MEROITIC CURSIVE LETTER TE +109B6;N # MEROITIC CURSIVE LETTER TO +109B7;N # MEROITIC CURSIVE LETTER DA +109BE;N # MEROITIC CURSIVE LOGOGRAM RMT +109BF;N # MEROITIC CURSIVE LOGOGRAM IMN 10A00;N # KHAROSHTHI LETTER A 10A01;N # KHAROSHTHI VOWEL SIGN I 10A02;N # KHAROSHTHI VOWEL SIGN U @@ -17374,6 +17558,257 @@ FFFD;A # REPLACEMENT CHARACTER 110BF;N # KAITHI DOUBLE SECTION MARK 110C0;N # KAITHI DANDA 110C1;N # KAITHI DOUBLE DANDA +110D0;N # SORA SOMPENG LETTER SAH +110D1;N # SORA SOMPENG LETTER TAH +110D2;N # SORA SOMPENG LETTER BAH +110D3;N # SORA SOMPENG LETTER CAH +110D4;N # SORA SOMPENG LETTER DAH +110D5;N # SORA SOMPENG LETTER GAH +110D6;N # SORA SOMPENG LETTER MAH +110D7;N # SORA SOMPENG LETTER NGAH +110D8;N # SORA SOMPENG LETTER LAH +110D9;N # SORA SOMPENG LETTER NAH +110DA;N # SORA SOMPENG LETTER VAH +110DB;N # SORA SOMPENG LETTER PAH +110DC;N # SORA SOMPENG LETTER YAH +110DD;N # SORA SOMPENG LETTER RAH +110DE;N # SORA SOMPENG LETTER HAH +110DF;N # SORA SOMPENG LETTER KAH +110E0;N # SORA SOMPENG LETTER JAH +110E1;N # SORA SOMPENG LETTER NYAH +110E2;N # SORA SOMPENG LETTER AH +110E3;N # SORA SOMPENG LETTER EEH +110E4;N # SORA SOMPENG LETTER IH +110E5;N # SORA SOMPENG LETTER UH +110E6;N # SORA SOMPENG LETTER OH +110E7;N # SORA SOMPENG LETTER EH +110E8;N # SORA SOMPENG LETTER MAE +110F0;N # SORA SOMPENG DIGIT ZERO +110F1;N # SORA SOMPENG DIGIT ONE +110F2;N # SORA SOMPENG DIGIT TWO +110F3;N # SORA SOMPENG DIGIT THREE +110F4;N # SORA SOMPENG DIGIT FOUR +110F5;N # SORA SOMPENG DIGIT FIVE +110F6;N # SORA SOMPENG DIGIT SIX +110F7;N # SORA SOMPENG DIGIT SEVEN +110F8;N # SORA SOMPENG DIGIT EIGHT +110F9;N # SORA SOMPENG DIGIT NINE +11100;N # CHAKMA SIGN CANDRABINDU +11101;N # CHAKMA SIGN ANUSVARA +11102;N # CHAKMA SIGN VISARGA +11103;N # CHAKMA LETTER AA +11104;N # CHAKMA LETTER I +11105;N # CHAKMA LETTER U +11106;N # CHAKMA LETTER E +11107;N # CHAKMA LETTER KAA +11108;N # CHAKMA LETTER KHAA +11109;N # CHAKMA LETTER GAA +1110A;N # CHAKMA LETTER GHAA +1110B;N # CHAKMA LETTER NGAA +1110C;N # CHAKMA LETTER CAA +1110D;N # CHAKMA LETTER CHAA +1110E;N # CHAKMA LETTER JAA +1110F;N # CHAKMA LETTER JHAA +11110;N # CHAKMA LETTER NYAA +11111;N # CHAKMA LETTER TTAA +11112;N # CHAKMA LETTER TTHAA +11113;N # CHAKMA LETTER DDAA +11114;N # CHAKMA LETTER DDHAA +11115;N # CHAKMA LETTER NNAA +11116;N # CHAKMA LETTER TAA +11117;N # CHAKMA LETTER THAA +11118;N # CHAKMA LETTER DAA +11119;N # CHAKMA LETTER DHAA +1111A;N # CHAKMA LETTER NAA +1111B;N # CHAKMA LETTER PAA +1111C;N # CHAKMA LETTER PHAA +1111D;N # CHAKMA LETTER BAA +1111E;N # CHAKMA LETTER BHAA +1111F;N # CHAKMA LETTER MAA +11120;N # CHAKMA LETTER YYAA +11121;N # CHAKMA LETTER YAA +11122;N # CHAKMA LETTER RAA +11123;N # CHAKMA LETTER LAA +11124;N # CHAKMA LETTER WAA +11125;N # CHAKMA LETTER SAA +11126;N # CHAKMA LETTER HAA +11127;N # CHAKMA VOWEL SIGN A +11128;N # CHAKMA VOWEL SIGN I +11129;N # CHAKMA VOWEL SIGN II +1112A;N # CHAKMA VOWEL SIGN U +1112B;N # CHAKMA VOWEL SIGN UU +1112C;N # CHAKMA VOWEL SIGN E +1112D;N # CHAKMA VOWEL SIGN AI +1112E;N # CHAKMA VOWEL SIGN O +1112F;N # CHAKMA VOWEL SIGN AU +11130;N # CHAKMA VOWEL SIGN OI +11131;N # CHAKMA O MARK +11132;N # CHAKMA AU MARK +11133;N # CHAKMA VIRAMA +11134;N # CHAKMA MAAYYAA +11136;N # CHAKMA DIGIT ZERO +11137;N # CHAKMA DIGIT ONE +11138;N # CHAKMA DIGIT TWO +11139;N # CHAKMA DIGIT THREE +1113A;N # CHAKMA DIGIT FOUR +1113B;N # CHAKMA DIGIT FIVE +1113C;N # CHAKMA DIGIT SIX +1113D;N # CHAKMA DIGIT SEVEN +1113E;N # CHAKMA DIGIT EIGHT +1113F;N # CHAKMA DIGIT NINE +11140;N # CHAKMA SECTION MARK +11141;N # CHAKMA DANDA +11142;N # CHAKMA DOUBLE DANDA +11143;N # CHAKMA QUESTION MARK +11180;N # SHARADA SIGN CANDRABINDU +11181;N # SHARADA SIGN ANUSVARA +11182;N # SHARADA SIGN VISARGA +11183;N # SHARADA LETTER A +11184;N # SHARADA LETTER AA +11185;N # SHARADA LETTER I +11186;N # SHARADA LETTER II +11187;N # SHARADA LETTER U +11188;N # SHARADA LETTER UU +11189;N # SHARADA LETTER VOCALIC R +1118A;N # SHARADA LETTER VOCALIC RR +1118B;N # SHARADA LETTER VOCALIC L +1118C;N # SHARADA LETTER VOCALIC LL +1118D;N # SHARADA LETTER E +1118E;N # SHARADA LETTER AI +1118F;N # SHARADA LETTER O +11190;N # SHARADA LETTER AU +11191;N # SHARADA LETTER KA +11192;N # SHARADA LETTER KHA +11193;N # SHARADA LETTER GA +11194;N # SHARADA LETTER GHA +11195;N # SHARADA LETTER NGA +11196;N # SHARADA LETTER CA +11197;N # SHARADA LETTER CHA +11198;N # SHARADA LETTER JA +11199;N # SHARADA LETTER JHA +1119A;N # SHARADA LETTER NYA +1119B;N # SHARADA LETTER TTA +1119C;N # SHARADA LETTER TTHA +1119D;N # SHARADA LETTER DDA +1119E;N # SHARADA LETTER DDHA +1119F;N # SHARADA LETTER NNA +111A0;N # SHARADA LETTER TA +111A1;N # SHARADA LETTER THA +111A2;N # SHARADA LETTER DA +111A3;N # SHARADA LETTER DHA +111A4;N # SHARADA LETTER NA +111A5;N # SHARADA LETTER PA +111A6;N # SHARADA LETTER PHA +111A7;N # SHARADA LETTER BA +111A8;N # SHARADA LETTER BHA +111A9;N # SHARADA LETTER MA +111AA;N # SHARADA LETTER YA +111AB;N # SHARADA LETTER RA +111AC;N # SHARADA LETTER LA +111AD;N # SHARADA LETTER LLA +111AE;N # SHARADA LETTER VA +111AF;N # SHARADA LETTER SHA +111B0;N # SHARADA LETTER SSA +111B1;N # SHARADA LETTER SA +111B2;N # SHARADA LETTER HA +111B3;N # SHARADA VOWEL SIGN AA +111B4;N # SHARADA VOWEL SIGN I +111B5;N # SHARADA VOWEL SIGN II +111B6;N # SHARADA VOWEL SIGN U +111B7;N # SHARADA VOWEL SIGN UU +111B8;N # SHARADA VOWEL SIGN VOCALIC R +111B9;N # SHARADA VOWEL SIGN VOCALIC RR +111BA;N # SHARADA VOWEL SIGN VOCALIC L +111BB;N # SHARADA VOWEL SIGN VOCALIC LL +111BC;N # SHARADA VOWEL SIGN E +111BD;N # SHARADA VOWEL SIGN AI +111BE;N # SHARADA VOWEL SIGN O +111BF;N # SHARADA VOWEL SIGN AU +111C0;N # SHARADA SIGN VIRAMA +111C1;N # SHARADA SIGN AVAGRAHA +111C2;N # SHARADA SIGN JIHVAMULIYA +111C3;N # SHARADA SIGN UPADHMANIYA +111C4;N # SHARADA OM +111C5;N # SHARADA DANDA +111C6;N # SHARADA DOUBLE DANDA +111C7;N # SHARADA ABBREVIATION SIGN +111C8;N # SHARADA SEPARATOR +111D0;N # SHARADA DIGIT ZERO +111D1;N # SHARADA DIGIT ONE +111D2;N # SHARADA DIGIT TWO +111D3;N # SHARADA DIGIT THREE +111D4;N # SHARADA DIGIT FOUR +111D5;N # SHARADA DIGIT FIVE +111D6;N # SHARADA DIGIT SIX +111D7;N # SHARADA DIGIT SEVEN +111D8;N # SHARADA DIGIT EIGHT +111D9;N # SHARADA DIGIT NINE +11680;N # TAKRI LETTER A +11681;N # TAKRI LETTER AA +11682;N # TAKRI LETTER I +11683;N # TAKRI LETTER II +11684;N # TAKRI LETTER U +11685;N # TAKRI LETTER UU +11686;N # TAKRI LETTER E +11687;N # TAKRI LETTER AI +11688;N # TAKRI LETTER O +11689;N # TAKRI LETTER AU +1168A;N # TAKRI LETTER KA +1168B;N # TAKRI LETTER KHA +1168C;N # TAKRI LETTER GA +1168D;N # TAKRI LETTER GHA +1168E;N # TAKRI LETTER NGA +1168F;N # TAKRI LETTER CA +11690;N # TAKRI LETTER CHA +11691;N # TAKRI LETTER JA +11692;N # TAKRI LETTER JHA +11693;N # TAKRI LETTER NYA +11694;N # TAKRI LETTER TTA +11695;N # TAKRI LETTER TTHA +11696;N # TAKRI LETTER DDA +11697;N # TAKRI LETTER DDHA +11698;N # TAKRI LETTER NNA +11699;N # TAKRI LETTER TA +1169A;N # TAKRI LETTER THA +1169B;N # TAKRI LETTER DA +1169C;N # TAKRI LETTER DHA +1169D;N # TAKRI LETTER NA +1169E;N # TAKRI LETTER PA +1169F;N # TAKRI LETTER PHA +116A0;N # TAKRI LETTER BA +116A1;N # TAKRI LETTER BHA +116A2;N # TAKRI LETTER MA +116A3;N # TAKRI LETTER YA +116A4;N # TAKRI LETTER RA +116A5;N # TAKRI LETTER LA +116A6;N # TAKRI LETTER VA +116A7;N # TAKRI LETTER SHA +116A8;N # TAKRI LETTER SA +116A9;N # TAKRI LETTER HA +116AA;N # TAKRI LETTER RRA +116AB;N # TAKRI SIGN ANUSVARA +116AC;N # TAKRI SIGN VISARGA +116AD;N # TAKRI VOWEL SIGN AA +116AE;N # TAKRI VOWEL SIGN I +116AF;N # TAKRI VOWEL SIGN II +116B0;N # TAKRI VOWEL SIGN U +116B1;N # TAKRI VOWEL SIGN UU +116B2;N # TAKRI VOWEL SIGN E +116B3;N # TAKRI VOWEL SIGN AI +116B4;N # TAKRI VOWEL SIGN O +116B5;N # TAKRI VOWEL SIGN AU +116B6;N # TAKRI SIGN VIRAMA +116B7;N # TAKRI SIGN NUKTA +116C0;N # TAKRI DIGIT ZERO +116C1;N # TAKRI DIGIT ONE +116C2;N # TAKRI DIGIT TWO +116C3;N # TAKRI DIGIT THREE +116C4;N # TAKRI DIGIT FOUR +116C5;N # TAKRI DIGIT FIVE +116C6;N # TAKRI DIGIT SIX +116C7;N # TAKRI DIGIT SEVEN +116C8;N # TAKRI DIGIT EIGHT +116C9;N # TAKRI DIGIT NINE 12000;N # CUNEIFORM SIGN A 12001;N # CUNEIFORM SIGN A TIMES A 12002;N # CUNEIFORM SIGN A TIMES BAD @@ -19996,6 +20431,139 @@ FFFD;A # REPLACEMENT CHARACTER 16A36;N # BAMUM LETTER PHASE-F KPA 16A37;N # BAMUM LETTER PHASE-F SAMBA 16A38;N # BAMUM LETTER PHASE-F VUEQ +16F00;N # MIAO LETTER PA +16F01;N # MIAO LETTER BA +16F02;N # MIAO LETTER YI PA +16F03;N # MIAO LETTER PLA +16F04;N # MIAO LETTER MA +16F05;N # MIAO LETTER MHA +16F06;N # MIAO LETTER ARCHAIC MA +16F07;N # MIAO LETTER FA +16F08;N # MIAO LETTER VA +16F09;N # MIAO LETTER VFA +16F0A;N # MIAO LETTER TA +16F0B;N # MIAO LETTER DA +16F0C;N # MIAO LETTER YI TTA +16F0D;N # MIAO LETTER YI TA +16F0E;N # MIAO LETTER TTA +16F0F;N # MIAO LETTER DDA +16F10;N # MIAO LETTER NA +16F11;N # MIAO LETTER NHA +16F12;N # MIAO LETTER YI NNA +16F13;N # MIAO LETTER ARCHAIC NA +16F14;N # MIAO LETTER NNA +16F15;N # MIAO LETTER NNHA +16F16;N # MIAO LETTER LA +16F17;N # MIAO LETTER LYA +16F18;N # MIAO LETTER LHA +16F19;N # MIAO LETTER LHYA +16F1A;N # MIAO LETTER TLHA +16F1B;N # MIAO LETTER DLHA +16F1C;N # MIAO LETTER TLHYA +16F1D;N # MIAO LETTER DLHYA +16F1E;N # MIAO LETTER KA +16F1F;N # MIAO LETTER GA +16F20;N # MIAO LETTER YI KA +16F21;N # MIAO LETTER QA +16F22;N # MIAO LETTER QGA +16F23;N # MIAO LETTER NGA +16F24;N # MIAO LETTER NGHA +16F25;N # MIAO LETTER ARCHAIC NGA +16F26;N # MIAO LETTER HA +16F27;N # MIAO LETTER XA +16F28;N # MIAO LETTER GHA +16F29;N # MIAO LETTER GHHA +16F2A;N # MIAO LETTER TSSA +16F2B;N # MIAO LETTER DZZA +16F2C;N # MIAO LETTER NYA +16F2D;N # MIAO LETTER NYHA +16F2E;N # MIAO LETTER TSHA +16F2F;N # MIAO LETTER DZHA +16F30;N # MIAO LETTER YI TSHA +16F31;N # MIAO LETTER YI DZHA +16F32;N # MIAO LETTER REFORMED TSHA +16F33;N # MIAO LETTER SHA +16F34;N # MIAO LETTER SSA +16F35;N # MIAO LETTER ZHA +16F36;N # MIAO LETTER ZSHA +16F37;N # MIAO LETTER TSA +16F38;N # MIAO LETTER DZA +16F39;N # MIAO LETTER YI TSA +16F3A;N # MIAO LETTER SA +16F3B;N # MIAO LETTER ZA +16F3C;N # MIAO LETTER ZSA +16F3D;N # MIAO LETTER ZZA +16F3E;N # MIAO LETTER ZZSA +16F3F;N # MIAO LETTER ARCHAIC ZZA +16F40;N # MIAO LETTER ZZYA +16F41;N # MIAO LETTER ZZSYA +16F42;N # MIAO LETTER WA +16F43;N # MIAO LETTER AH +16F44;N # MIAO LETTER HHA +16F50;N # MIAO LETTER NASALIZATION +16F51;N # MIAO SIGN ASPIRATION +16F52;N # MIAO SIGN REFORMED VOICING +16F53;N # MIAO SIGN REFORMED ASPIRATION +16F54;N # MIAO VOWEL SIGN A +16F55;N # MIAO VOWEL SIGN AA +16F56;N # MIAO VOWEL SIGN AHH +16F57;N # MIAO VOWEL SIGN AN +16F58;N # MIAO VOWEL SIGN ANG +16F59;N # MIAO VOWEL SIGN O +16F5A;N # MIAO VOWEL SIGN OO +16F5B;N # MIAO VOWEL SIGN WO +16F5C;N # MIAO VOWEL SIGN W +16F5D;N # MIAO VOWEL SIGN E +16F5E;N # MIAO VOWEL SIGN EN +16F5F;N # MIAO VOWEL SIGN ENG +16F60;N # MIAO VOWEL SIGN OEY +16F61;N # MIAO VOWEL SIGN I +16F62;N # MIAO VOWEL SIGN IA +16F63;N # MIAO VOWEL SIGN IAN +16F64;N # MIAO VOWEL SIGN IANG +16F65;N # MIAO VOWEL SIGN IO +16F66;N # MIAO VOWEL SIGN IE +16F67;N # MIAO VOWEL SIGN II +16F68;N # MIAO VOWEL SIGN IU +16F69;N # MIAO VOWEL SIGN ING +16F6A;N # MIAO VOWEL SIGN U +16F6B;N # MIAO VOWEL SIGN UA +16F6C;N # MIAO VOWEL SIGN UAN +16F6D;N # MIAO VOWEL SIGN UANG +16F6E;N # MIAO VOWEL SIGN UU +16F6F;N # MIAO VOWEL SIGN UEI +16F70;N # MIAO VOWEL SIGN UNG +16F71;N # MIAO VOWEL SIGN Y +16F72;N # MIAO VOWEL SIGN YI +16F73;N # MIAO VOWEL SIGN AE +16F74;N # MIAO VOWEL SIGN AEE +16F75;N # MIAO VOWEL SIGN ERR +16F76;N # MIAO VOWEL SIGN ROUNDED ERR +16F77;N # MIAO VOWEL SIGN ER +16F78;N # MIAO VOWEL SIGN ROUNDED ER +16F79;N # MIAO VOWEL SIGN AI +16F7A;N # MIAO VOWEL SIGN EI +16F7B;N # MIAO VOWEL SIGN AU +16F7C;N # MIAO VOWEL SIGN OU +16F7D;N # MIAO VOWEL SIGN N +16F7E;N # MIAO VOWEL SIGN NG +16F8F;N # MIAO TONE RIGHT +16F90;N # MIAO TONE TOP RIGHT +16F91;N # MIAO TONE ABOVE +16F92;N # MIAO TONE BELOW +16F93;N # MIAO LETTER TONE-2 +16F94;N # MIAO LETTER TONE-3 +16F95;N # MIAO LETTER TONE-4 +16F96;N # MIAO LETTER TONE-5 +16F97;N # MIAO LETTER TONE-6 +16F98;N # MIAO LETTER TONE-7 +16F99;N # MIAO LETTER TONE-8 +16F9A;N # MIAO LETTER REFORMED TONE-1 +16F9B;N # MIAO LETTER REFORMED TONE-2 +16F9C;N # MIAO LETTER REFORMED TONE-4 +16F9D;N # MIAO LETTER REFORMED TONE-5 +16F9E;N # MIAO LETTER REFORMED TONE-6 +16F9F;N # MIAO LETTER REFORMED TONE-8 1B000;W # KATAKANA LETTER ARCHAIC E 1B001;W # HIRAGANA LETTER ARCHAIC YE 1D000;N # BYZANTINE MUSICAL SYMBOL PSILI @@ -21635,6 +22203,149 @@ FFFD;A # REPLACEMENT CHARACTER 1D7FD;N # MATHEMATICAL MONOSPACE DIGIT SEVEN 1D7FE;N # MATHEMATICAL MONOSPACE DIGIT EIGHT 1D7FF;N # MATHEMATICAL MONOSPACE DIGIT NINE +1EE00;N # ARABIC MATHEMATICAL ALEF +1EE01;N # ARABIC MATHEMATICAL BEH +1EE02;N # ARABIC MATHEMATICAL JEEM +1EE03;N # ARABIC MATHEMATICAL DAL +1EE05;N # ARABIC MATHEMATICAL WAW +1EE06;N # ARABIC MATHEMATICAL ZAIN +1EE07;N # ARABIC MATHEMATICAL HAH +1EE08;N # ARABIC MATHEMATICAL TAH +1EE09;N # ARABIC MATHEMATICAL YEH +1EE0A;N # ARABIC MATHEMATICAL KAF +1EE0B;N # ARABIC MATHEMATICAL LAM +1EE0C;N # ARABIC MATHEMATICAL MEEM +1EE0D;N # ARABIC MATHEMATICAL NOON +1EE0E;N # ARABIC MATHEMATICAL SEEN +1EE0F;N # ARABIC MATHEMATICAL AIN +1EE10;N # ARABIC MATHEMATICAL FEH +1EE11;N # ARABIC MATHEMATICAL SAD +1EE12;N # ARABIC MATHEMATICAL QAF +1EE13;N # ARABIC MATHEMATICAL REH +1EE14;N # ARABIC MATHEMATICAL SHEEN +1EE15;N # ARABIC MATHEMATICAL TEH +1EE16;N # ARABIC MATHEMATICAL THEH +1EE17;N # ARABIC MATHEMATICAL KHAH +1EE18;N # ARABIC MATHEMATICAL THAL +1EE19;N # ARABIC MATHEMATICAL DAD +1EE1A;N # ARABIC MATHEMATICAL ZAH +1EE1B;N # ARABIC MATHEMATICAL GHAIN +1EE1C;N # ARABIC MATHEMATICAL DOTLESS BEH +1EE1D;N # ARABIC MATHEMATICAL DOTLESS NOON +1EE1E;N # ARABIC MATHEMATICAL DOTLESS FEH +1EE1F;N # ARABIC MATHEMATICAL DOTLESS QAF +1EE21;N # ARABIC MATHEMATICAL INITIAL BEH +1EE22;N # ARABIC MATHEMATICAL INITIAL JEEM +1EE24;N # ARABIC MATHEMATICAL INITIAL HEH +1EE27;N # ARABIC MATHEMATICAL INITIAL HAH +1EE29;N # ARABIC MATHEMATICAL INITIAL YEH +1EE2A;N # ARABIC MATHEMATICAL INITIAL KAF +1EE2B;N # ARABIC MATHEMATICAL INITIAL LAM +1EE2C;N # ARABIC MATHEMATICAL INITIAL MEEM +1EE2D;N # ARABIC MATHEMATICAL INITIAL NOON +1EE2E;N # ARABIC MATHEMATICAL INITIAL SEEN +1EE2F;N # ARABIC MATHEMATICAL INITIAL AIN +1EE30;N # ARABIC MATHEMATICAL INITIAL FEH +1EE31;N # ARABIC MATHEMATICAL INITIAL SAD +1EE32;N # ARABIC MATHEMATICAL INITIAL QAF +1EE34;N # ARABIC MATHEMATICAL INITIAL SHEEN +1EE35;N # ARABIC MATHEMATICAL INITIAL TEH +1EE36;N # ARABIC MATHEMATICAL INITIAL THEH +1EE37;N # ARABIC MATHEMATICAL INITIAL KHAH +1EE39;N # ARABIC MATHEMATICAL INITIAL DAD +1EE3B;N # ARABIC MATHEMATICAL INITIAL GHAIN +1EE42;N # ARABIC MATHEMATICAL TAILED JEEM +1EE47;N # ARABIC MATHEMATICAL TAILED HAH +1EE49;N # ARABIC MATHEMATICAL TAILED YEH +1EE4B;N # ARABIC MATHEMATICAL TAILED LAM +1EE4D;N # ARABIC MATHEMATICAL TAILED NOON +1EE4E;N # ARABIC MATHEMATICAL TAILED SEEN +1EE4F;N # ARABIC MATHEMATICAL TAILED AIN +1EE51;N # ARABIC MATHEMATICAL TAILED SAD +1EE52;N # ARABIC MATHEMATICAL TAILED QAF +1EE54;N # ARABIC MATHEMATICAL TAILED SHEEN +1EE57;N # ARABIC MATHEMATICAL TAILED KHAH +1EE59;N # ARABIC MATHEMATICAL TAILED DAD +1EE5B;N # ARABIC MATHEMATICAL TAILED GHAIN +1EE5D;N # ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F;N # ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61;N # ARABIC MATHEMATICAL STRETCHED BEH +1EE62;N # ARABIC MATHEMATICAL STRETCHED JEEM +1EE64;N # ARABIC MATHEMATICAL STRETCHED HEH +1EE67;N # ARABIC MATHEMATICAL STRETCHED HAH +1EE68;N # ARABIC MATHEMATICAL STRETCHED TAH +1EE69;N # ARABIC MATHEMATICAL STRETCHED YEH +1EE6A;N # ARABIC MATHEMATICAL STRETCHED KAF +1EE6C;N # ARABIC MATHEMATICAL STRETCHED MEEM +1EE6D;N # ARABIC MATHEMATICAL STRETCHED NOON +1EE6E;N # ARABIC MATHEMATICAL STRETCHED SEEN +1EE6F;N # ARABIC MATHEMATICAL STRETCHED AIN +1EE70;N # ARABIC MATHEMATICAL STRETCHED FEH +1EE71;N # ARABIC MATHEMATICAL STRETCHED SAD +1EE72;N # ARABIC MATHEMATICAL STRETCHED QAF +1EE74;N # ARABIC MATHEMATICAL STRETCHED SHEEN +1EE75;N # ARABIC MATHEMATICAL STRETCHED TEH +1EE76;N # ARABIC MATHEMATICAL STRETCHED THEH +1EE77;N # ARABIC MATHEMATICAL STRETCHED KHAH +1EE79;N # ARABIC MATHEMATICAL STRETCHED DAD +1EE7A;N # ARABIC MATHEMATICAL STRETCHED ZAH +1EE7B;N # ARABIC MATHEMATICAL STRETCHED GHAIN +1EE7C;N # ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E;N # ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80;N # ARABIC MATHEMATICAL LOOPED ALEF +1EE81;N # ARABIC MATHEMATICAL LOOPED BEH +1EE82;N # ARABIC MATHEMATICAL LOOPED JEEM +1EE83;N # ARABIC MATHEMATICAL LOOPED DAL +1EE84;N # ARABIC MATHEMATICAL LOOPED HEH +1EE85;N # ARABIC MATHEMATICAL LOOPED WAW +1EE86;N # ARABIC MATHEMATICAL LOOPED ZAIN +1EE87;N # ARABIC MATHEMATICAL LOOPED HAH +1EE88;N # ARABIC MATHEMATICAL LOOPED TAH +1EE89;N # ARABIC MATHEMATICAL LOOPED YEH +1EE8B;N # ARABIC MATHEMATICAL LOOPED LAM +1EE8C;N # ARABIC MATHEMATICAL LOOPED MEEM +1EE8D;N # ARABIC MATHEMATICAL LOOPED NOON +1EE8E;N # ARABIC MATHEMATICAL LOOPED SEEN +1EE8F;N # ARABIC MATHEMATICAL LOOPED AIN +1EE90;N # ARABIC MATHEMATICAL LOOPED FEH +1EE91;N # ARABIC MATHEMATICAL LOOPED SAD +1EE92;N # ARABIC MATHEMATICAL LOOPED QAF +1EE93;N # ARABIC MATHEMATICAL LOOPED REH +1EE94;N # ARABIC MATHEMATICAL LOOPED SHEEN +1EE95;N # ARABIC MATHEMATICAL LOOPED TEH +1EE96;N # ARABIC MATHEMATICAL LOOPED THEH +1EE97;N # ARABIC MATHEMATICAL LOOPED KHAH +1EE98;N # ARABIC MATHEMATICAL LOOPED THAL +1EE99;N # ARABIC MATHEMATICAL LOOPED DAD +1EE9A;N # ARABIC MATHEMATICAL LOOPED ZAH +1EE9B;N # ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1;N # ARABIC MATHEMATICAL DOUBLE-STRUCK BEH +1EEA2;N # ARABIC MATHEMATICAL DOUBLE-STRUCK JEEM +1EEA3;N # ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5;N # ARABIC MATHEMATICAL DOUBLE-STRUCK WAW +1EEA6;N # ARABIC MATHEMATICAL DOUBLE-STRUCK ZAIN +1EEA7;N # ARABIC MATHEMATICAL DOUBLE-STRUCK HAH +1EEA8;N # ARABIC MATHEMATICAL DOUBLE-STRUCK TAH +1EEA9;N # ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB;N # ARABIC MATHEMATICAL DOUBLE-STRUCK LAM +1EEAC;N # ARABIC MATHEMATICAL DOUBLE-STRUCK MEEM +1EEAD;N # ARABIC MATHEMATICAL DOUBLE-STRUCK NOON +1EEAE;N # ARABIC MATHEMATICAL DOUBLE-STRUCK SEEN +1EEAF;N # ARABIC MATHEMATICAL DOUBLE-STRUCK AIN +1EEB0;N # ARABIC MATHEMATICAL DOUBLE-STRUCK FEH +1EEB1;N # ARABIC MATHEMATICAL DOUBLE-STRUCK SAD +1EEB2;N # ARABIC MATHEMATICAL DOUBLE-STRUCK QAF +1EEB3;N # ARABIC MATHEMATICAL DOUBLE-STRUCK REH +1EEB4;N # ARABIC MATHEMATICAL DOUBLE-STRUCK SHEEN +1EEB5;N # ARABIC MATHEMATICAL DOUBLE-STRUCK TEH +1EEB6;N # ARABIC MATHEMATICAL DOUBLE-STRUCK THEH +1EEB7;N # ARABIC MATHEMATICAL DOUBLE-STRUCK KHAH +1EEB8;N # ARABIC MATHEMATICAL DOUBLE-STRUCK THAL +1EEB9;N # ARABIC MATHEMATICAL DOUBLE-STRUCK DAD +1EEBA;N # ARABIC MATHEMATICAL DOUBLE-STRUCK ZAH +1EEBB;N # ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1EEF0;N # ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL +1EEF1;N # ARABIC MATHEMATICAL OPERATOR HAH WITH DAL 1F000;N # MAHJONG TILE EAST WIND 1F001;N # MAHJONG TILE SOUTH WIND 1F002;N # MAHJONG TILE WEST WIND @@ -21938,6 +22649,8 @@ FFFD;A # REPLACEMENT CHARACTER 1F167;A # NEGATIVE CIRCLED LATIN CAPITAL LETTER X 1F168;A # NEGATIVE CIRCLED LATIN CAPITAL LETTER Y 1F169;A # NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F16A;N # RAISED MC SIGN +1F16B;N # RAISED MD SIGN 1F170;A # NEGATIVE SQUARED LATIN CAPITAL LETTER A 1F171;A # NEGATIVE SQUARED LATIN CAPITAL LETTER B 1F172;A # NEGATIVE SQUARED LATIN CAPITAL LETTER C @@ -22564,6 +23277,10 @@ FFFD;A # REPLACEMENT CHARACTER 1F53B;N # DOWN-POINTING RED TRIANGLE 1F53C;N # UP-POINTING SMALL RED TRIANGLE 1F53D;N # DOWN-POINTING SMALL RED TRIANGLE +1F540;N # CIRCLED CROSS POMMEE +1F541;N # CROSS POMMEE WITH HALF-CIRCLE BELOW +1F542;N # CROSS POMMEE +1F543;N # NOTCHED LEFT SEMICIRCLE WITH THREE DOTS 1F550;N # CLOCK FACE ONE OCLOCK 1F551;N # CLOCK FACE TWO OCLOCK 1F552;N # CLOCK FACE THREE OCLOCK @@ -22593,6 +23310,7 @@ FFFD;A # REPLACEMENT CHARACTER 1F5FD;N # STATUE OF LIBERTY 1F5FE;N # SILHOUETTE OF JAPAN 1F5FF;N # MOYAI +1F600;N # GRINNING FACE 1F601;N # GRINNING FACE WITH SMILING EYES 1F602;N # FACE WITH TEARS OF JOY 1F603;N # SMILING FACE WITH OPEN MOUTH @@ -22609,30 +23327,42 @@ FFFD;A # REPLACEMENT CHARACTER 1F60E;N # SMILING FACE WITH SUNGLASSES 1F60F;N # SMIRKING FACE 1F610;N # NEUTRAL FACE +1F611;N # EXPRESSIONLESS FACE 1F612;N # UNAMUSED FACE 1F613;N # FACE WITH COLD SWEAT 1F614;N # PENSIVE FACE +1F615;N # CONFUSED FACE 1F616;N # CONFOUNDED FACE +1F617;N # KISSING FACE 1F618;N # FACE THROWING A KISS +1F619;N # KISSING FACE WITH SMILING EYES 1F61A;N # KISSING FACE WITH CLOSED EYES +1F61B;N # FACE WITH STUCK-OUT TONGUE 1F61C;N # FACE WITH STUCK-OUT TONGUE AND WINKING EYE 1F61D;N # FACE WITH STUCK-OUT TONGUE AND TIGHTLY-CLOSED EYES 1F61E;N # DISAPPOINTED FACE +1F61F;N # WORRIED FACE 1F620;N # ANGRY FACE 1F621;N # POUTING FACE 1F622;N # CRYING FACE 1F623;N # PERSEVERING FACE 1F624;N # FACE WITH LOOK OF TRIUMPH 1F625;N # DISAPPOINTED BUT RELIEVED FACE +1F626;N # FROWNING FACE WITH OPEN MOUTH +1F627;N # ANGUISHED FACE 1F628;N # FEARFUL FACE 1F629;N # WEARY FACE 1F62A;N # SLEEPY FACE 1F62B;N # TIRED FACE +1F62C;N # GRIMACING FACE 1F62D;N # LOUDLY CRYING FACE +1F62E;N # FACE WITH OPEN MOUTH +1F62F;N # HUSHED FACE 1F630;N # FACE WITH OPEN MOUTH AND COLD SWEAT 1F631;N # FACE SCREAMING IN FEAR 1F632;N # ASTONISHED FACE 1F633;N # FLUSHED FACE +1F634;N # SLEEPING FACE 1F635;N # DIZZY FACE 1F636;N # FACE WITHOUT MOUTH 1F637;N # FACE WITH MEDICAL MASK @@ -22845,7 +23575,7 @@ FFFD;A # REPLACEMENT CHARACTER 20000..2A6D6;W # <CJK Ideograph Extension B, First>..<CJK Ideograph Extension B, Last> 2A6D7..2A6FF;W # <reserved-2A6D7>..<reserved-2A6FF> 2A700..2B734;W # <CJK Ideograph Extension C, First>..<CJK Ideograph Extension C, Last> -2B735..2F73F;W # <reserved-2B735>..<reserved-2F73F> +2B735..2B73F;W # <reserved-2B735>..<reserved-2B73F> 2B740..2B81D;W # <CJK Ideograph Extension D, First>..<CJK Ideograph Extension D, Last> 2B81E..2F7FF;W # <reserved-2B735>..<reserved-2F7FF> 2F800;W # CJK COMPATIBILITY IDEOGRAPH-2F800 diff --git a/lib/unicore/EmojiSources.txt b/lib/unicore/EmojiSources.txt index 6f7161e296..c360c5e253 100644 --- a/lib/unicore/EmojiSources.txt +++ b/lib/unicore/EmojiSources.txt @@ -1,8 +1,8 @@ -# EmojiSources-6.0.0.txt -# Date: 2010-04-24, 00:00:00 GMT [MS] +# EmojiSources-6.1.0.txt +# Date: 2011-08-30, 23:30:00 GMT [MS, KW] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # @@ -15,6 +15,9 @@ # Note: It is possible that future versions of this file will include # additional data columns providing mappings for additional vendors. # +# Created for Unicode 6.0 by Marcus Scherer. +# Updated for Unicode 6.1 by Ken Whistler. +# # Format: Semicolon-delimited file with a fixed number of fields. # The number of fields may increase in the future. # diff --git a/lib/unicore/HangulSyllableType.txt b/lib/unicore/HangulSyllableType.txt index eaafd20b79..8b457daaba 100644 --- a/lib/unicore/HangulSyllableType.txt +++ b/lib/unicore/HangulSyllableType.txt @@ -1,8 +1,8 @@ -# HangulSyllableType-6.0.0.txt -# Date: 2010-05-18, 00:49:27 GMT [MD] +# HangulSyllableType-6.1.0.txt +# Date: 2011-08-25, 00:02:18 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ diff --git a/lib/unicore/Index.txt b/lib/unicore/Index.txt index 4dbd2ce602..50e1c9d5b9 100644 --- a/lib/unicore/Index.txt +++ b/lib/unicore/Index.txt @@ -249,6 +249,7 @@ Arabic Contextual Form Glyphs FB50 Arabic Contextual Form Glyphs FE80 ARABIC DATE SEPARATOR 060D ARABIC DECIMAL SEPARATOR 066B +Arabic Extended-A 08A0 Arabic Extensions 0671 ARABIC FULL STOP 06D4 Arabic Harakat 064B @@ -261,6 +262,7 @@ Arabic Letters, Extended 0671 Arabic Letters, Extended 0750 ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM FDFD Arabic Ligatures FBEA +Arabic Mathematical Alphabetic Symbols 1EE00 arabic phrase separator 066C Arabic Points, Glyphs for Spacing Forms of FE70 Arabic Presentation Forms-A FB50 @@ -769,7 +771,7 @@ cartesian product 00D7 CASTLE 26EB CAT 1F408 CAT FACE 1F431 -Cat faces 1F638 +Cat Faces 1F638 CAUTION SIGN 2621 cd 1F4BF CEDI SIGN 20B5 @@ -792,6 +794,7 @@ CENTRELINE LOW LINE FE4E CENTRELINE OVERLINE FE4A cgj 034F CHAINS 26D3 +Chakma 11100 Cham AA00 chandrakkala, malayalam 0D4D CHARACTER INTRODUCER, SINGLE 009A @@ -1224,6 +1227,7 @@ Cross Dingbats 2719 CROSS MARK 274C CROSS OF JERUSALEM 2629 CROSS OF LORRAINE 2628 +CROSS POMMEE 1F542 cross ratio 211E cross, constantine's 2627 CROSS, DOTTED 205C @@ -1339,8 +1343,10 @@ DASH, CIRCLED 229D DASH, EM 2014 DASH, EN 2013 DASH, FIGURE 2012 +dash, omission 2E3A dash, quotation 2015 DASH, SWUNG 2053 +DASH, TWO-EM 2E3A DASH, WAVE 301C DASH, WAVY 3030 DASHED LOW LINE FE4D @@ -2227,6 +2233,7 @@ Greek, Diacritics for 0342 Greek, Precomposed Polytonic 1F00 GROUND, EARTH 23DA group lock 21F0 +group select (ISO 9995-7) 21E8 group separator 001D GROUP SEPARATOR, SYMBOL FOR 241D GUARANI SIGN 20B2 @@ -2987,6 +2994,7 @@ LESS-THAN, VERY MUCH 22D8 Letterlike Mathematical Symbols, Hebrew 2135 Letterlike Symbols 2100 level 2 lock 21EB +level 2 select (ISO 9995-7) 21E7 level 3 lock 21EF level 3 select 21EE LEZH, LATIN SMALL LETTER 026E @@ -3141,6 +3149,7 @@ MACRON, COMBINING DOUBLE 035E MACRON, MODIFIER LETTER 02C9 MACRON, MODIFIER LETTER LOW 02CD macron, spacing 00AF +Magnetic Ink Character Recognition, MICR 2446 MAGNIFYING GLASS, LEFT-POINTING 1F50D MAGNIFYING GLASS, RIGHT-POINTING 1F50E Mahjong Tiles 1F000 @@ -3181,6 +3190,8 @@ marker, line 2319 Markers, Go 2686 Marks, Combining Diacritical 0300 Marks, Combining Half FE20 +marque de commerce 1F16A +marque deposee 1F16B MARRIAGE SYMBOL 26AD mars 2642 MASCULINE ORDINAL INDICATOR 00BA @@ -3222,6 +3233,8 @@ Mathematical Symbols, Script 1D49C Mathematical Symbols-A, Miscellaneous 27C0 Mathematical Symbols-B, Miscellaneous 2980 MATRIX, HERMITIAN CONJUGATE 22B9 +MC SIGN, RAISED 1F16A +MD SIGN, RAISED 1F16B MEASURED ANGLE 2221 Measured Angles, Angles and 299B MEASURED BY 225E @@ -3238,6 +3251,7 @@ MEDIUM WHITE CIRCLE 26AA MEDIUM, END OF 0019 MEDIUM, SYMBOL FOR END OF 2419 Meetei Mayek ABC0 +Meetei Mayek Extensions AAE0 MEMBER, CONTAINS AS 220B MEMBER, DOES NOT CONTAIN AS 220C MEMBER, SMALL CONTAINS AS 220D @@ -3246,11 +3260,15 @@ MEMBERSHIP, Z NOTATION BAG 22FF MEN HOLDING HANDS, TWO 1F46C MERCURY 263F merge 2A07 +Meroitic Cursive 109A0 +Meroitic Hieroglyphs 10980 merpadi, tamil 0BF8 MESSAGE WAITING 0095 MESSAGE, PRIVACY 009E Metrical Symbols 23D1 mho 2127 +Miao 16F00 +MICR, Magnetic Ink Character Recognition 2446 MICRO SIGN 00B5 mid space 2005 MIDDLE DOT 00B7 @@ -3599,6 +3617,7 @@ OM, DEVANAGARI 0950 OM, TIBETAN SYLLABLE 0F00 omega pi 03D6 OMEGA, LATIN SMALL LETTER CLOSED 0277 +omission dash 2E3A ONE DOT LEADER 2024 ONE HALF, VULGAR FRACTION 00BD ONE QUARTER, VULGAR FRACTION 00BC @@ -3622,6 +3641,7 @@ opening curly bracket 007B opening parenthesis 0028 opening square bracket 005B OPERATING SYSTEM COMMAND 009D +operating system key (ISO 9995-7) 2318 Operators Supplement, Mathematical 2A00 Operators, Database Theory 27D5 Operators, Dotted Mathematical 2234 @@ -3728,6 +3748,7 @@ page up 21DE PAGE, NEXT 2398 PAGE, PREVIOUS 2397 Pahlavi 10B60 +Palaeotype Transliteration Symbols 2E32 PALATAL HOOK, LATIN SMALL LETTER T WITH 01AB PALATALIZED HOOK BELOW, COMBINING 0321 PALM BRANCH 2E19 @@ -4498,6 +4519,7 @@ SHADOWED WHITE CIRCLE 274D SHAMROCK 2618 shamrock 2663 Shapes, Geometric 25A0 +Sharada 11180 SHARP S, LATIN SMALL LETTER 00DF SHARP SIGN, MUSIC 266F Shavian 10450 @@ -4640,6 +4662,7 @@ SOLIDUS OVERLAY, COMBINING SHORT 0337 SOLIDUS, BIG 29F8 SOLIDUS, BIG REVERSE 29F9 SOLIDUS, REVERSE 005C +Sora Sompeng 110D0 sound 1F50A SOUND RECORDING COPYRIGHT 2117 SOURCE, INFORMATION 2139 @@ -4805,6 +4828,7 @@ SUN BEHIND CLOUD 26C5 SUN WITH RAYS, BLACK 2600 SUN WITH RAYS, WHITE 263C Sundanese 1B80 +Sundanese Supplement 1CC0 sunna, telugu 0C02 Superscript Digits 2070 Superscript Letter Diacritics, Latin Medieval 1DD3 @@ -4995,6 +5019,7 @@ Tai Xuan Jing Symbols 1D300 Tails, Fish 297C tainome japanese bullet 25C9 TAKE, PRESCRIPTION 211E +Takri 11680 Tamil 0B80 TAMIL AS ABOVE SIGN 0BF8 tamil aytham 0B83 @@ -5275,6 +5300,7 @@ TWO ASTERISKS ALIGNED VERTICALLY 2051 TWO DOT LEADER 2025 TWO DOT PUNCTUATION 205A TWO, SUPERSCRIPT 00B2 +TWO-EM DASH 2E3A U BAR, LATIN CAPITAL LETTER 0244 U BAR, LATIN SMALL LETTER 0289 U WITH ACUTE, LATIN CAPITAL LETTER 00DA @@ -5369,9 +5395,9 @@ URANUS 2645 uranus 26E2 urdu paragraph separator 203B URN, FUNERAL 26B1 -User interface Input Status Symbols 1F520 +User Interface Input Status Symbols 1F520 User Interface Symbols 1F500 -User interface Symbols 1F53A +User Interface Symbols 1F53A v above 030C V WITH DOT BELOW, LATIN SMALL LETTER 1E7F V WITH HOOK, LATIN CAPITAL LETTER 01B2 diff --git a/lib/unicore/IndicMatraCategory.txt b/lib/unicore/IndicMatraCategory.txt index c5f2e11e58..68cbd09350 100644 --- a/lib/unicore/IndicMatraCategory.txt +++ b/lib/unicore/IndicMatraCategory.txt @@ -1,8 +1,8 @@ -# IndicMatraCategory-6.0.0.txt -# Date: 2010-07-14, 15:03:00 PDT [KW] +# IndicMatraCategory-6.1.0.txt +# Date: 2011-08-31, 23:50:00 GMT [KW] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see UAX #44. # @@ -63,7 +63,7 @@ # Tagalog, Hanunoo, Buhid, Tagbanwa, Khmer, Limbu, New Tai Lue, # Buginese, Tai Tham, Balinese, Sundanese, Batak, Lepcha, # Syloti Nagri, Saurashtra, Rejang, Javanese, Cham, Tai Viet, -# Meetei Mayek, Karoshthi, Brahmi, Kaithi +# Meetei Mayek, Kharoshthi, Brahmi, Kaithi, Chakma, Sharada, Takri # # All characters for all other scripts not in that list # take the default value for this property. @@ -157,12 +157,17 @@ A9B4..A9B5 ; Right # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN AAB1 ; Right # Lo TAI VIET VOWEL AA AABA ; Right # Lo TAI VIET VOWEL UA AABD ; Right # Lo TAI VIET VOWEL AN +AAEF ; Right # Mc MEETEI MAYEK VOWEL SIGN AAU ABE3..ABE4 ; Right # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP ABE6..ABE7 ; Right # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP ABE9..ABEA ; Right # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG 110B0 ; Right # Mc KAITHI VOWEL SIGN AA 110B2 ; Right # Mc KAITHI VOWEL SIGN II 110B7..110B8 ; Right # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +111B3 ; Right # Mc SHARADA VOWEL SIGN AA +111B5 ; Right # Mc SHARADA VOWEL SIGN II +111C0 ; Right # Mc SHARADA SIGN VIRAMA +116AF ; Right # Mc TAKRI VOWEL SIGN II # Indic_Matra_Category=Left @@ -190,7 +195,12 @@ ABE9..ABEA ; Right # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK V 1C27..1C28 ; Left # Mc [2] LEPCHA VOWEL SIGN I..LEPCHA VOWEL SIGN O A9BA..A9BB ; Left # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE AA2F..AA30 ; Left # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI +AAEB ; Left # Mc MEETEI MAYEK VOWEL SIGN II +AAEE ; Left # Mc MEETEI MAYEK VOWEL SIGN AU 110B1 ; Left # Mc KAITHI VOWEL SIGN I +1112C ; Left # Mc CHAKMA VOWEL SIGN E +111B4 ; Left # Mc SHARADA VOWEL SIGN I +116AE ; Left # Mc TAKRI VOWEL SIGN I # Indic_Matra_Category=Visual_Order_Left @@ -289,12 +299,21 @@ AAB0 ; Top # Mn TAI VIET MAI KANG AAB2..AAB3 ; Top # Mn [2] TAI VIET VOWEL I..TAI VIET VOWEL UE AAB7..AAB8 ; Top # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA AABE ; Top # Mn TAI VIET VOWEL AM +AAED ; Top # Mn MEETEI MAYEK VOWEL SIGN AAI ABE5 ; Top # Mn MEETEI MAYEK VOWEL SIGN ANAP 10A05 ; Top # Mn KHAROSHTHI VOWEL SIGN E 11038..1103B ; Top # Mn [4] BRAHMI VOWEL SIGN AA..BRAHMI VOWEL SIGN II 11042..11045 ; Top # Mn [4] BRAHMI VOWEL SIGN E..BRAHMI VOWEL SIGN AU 11046 ; Top # Mn BRAHMI VIRAMA 110B5..110B6 ; Top # Mn [2] KAITHI VOWEL SIGN E..KAITHI VOWEL SIGN AI +11127..11129 ; Top # Mn [3] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN II +1112D ; Top # Mn CHAKMA VOWEL SIGN AI +11130 ; Top # Mn CHAKMA VOWEL SIGN OI +11134 ; Top # Mn CHAKMA MAAYYAA +111BC..111BE ; Top # Mn [3] SHARADA VOWEL SIGN E..SHARADA VOWEL SIGN O +116AD ; Top # Mn TAKRI VOWEL SIGN AA +116B2..116B5 ; Top # Mn [4] TAKRI VOWEL SIGN E..TAKRI VOWEL SIGN AU +116B6 ; Top # Mn TAKRI SIGN VIRAMA # Indic_Matra_Category=Bottom @@ -352,6 +371,7 @@ A9B8..A9B9 ; Bottom # Mn [2] JAVANESE VOWEL SIGN SUKU..JAVANESE VOWEL SIGN AA2D ; Bottom # Mn CHAM VOWEL SIGN U AA32 ; Bottom # Mn CHAM VOWEL SIGN UE AAB4 ; Bottom # Mn TAI VIET VOWEL U +AAEC ; Bottom # Mn MEETEI MAYEK VOWEL SIGN UU ABE8 ; Bottom # Mn MEETEI MAYEK VOWEL SIGN UNAP ABED ; Bottom # Mn MEETEI MAYEK APUN IYEK 10A02..10A03 ; Bottom # Mn [2] KHAROSHTHI VOWEL SIGN U..KHAROSHTHI VOWEL SIGN VOCALIC R @@ -359,6 +379,10 @@ ABED ; Bottom # Mn MEETEI MAYEK APUN IYEK 1103C..11041 ; Bottom # Mn [6] BRAHMI VOWEL SIGN U..BRAHMI VOWEL SIGN VOCALIC LL 110B3..110B4 ; Bottom # Mn [2] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN UU 110B9 ; Bottom # Mn KAITHI SIGN VIRAMA +1112A..1112B ; Bottom # Mn [2] CHAKMA VOWEL SIGN U..CHAKMA VOWEL SIGN UU +11131..11132 ; Bottom # Mn [2] CHAKMA O MARK..CHAKMA AU MARK +111B6..111BB ; Bottom # Mn [6] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN VOCALIC LL +116B0..116B1 ; Bottom # Mn [2] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN UU # Indic_Matra_Category=Top_And_Bottom @@ -367,6 +391,7 @@ ABED ; Bottom # Mn MEETEI MAYEK APUN IYEK 0F76..0F79 ; Top_And_Bottom # Mn [4] TIBETAN VOWEL SIGN VOCALIC R..TIBETAN VOWEL SIGN VOCALIC LL 0F81 ; Top_And_Bottom # Mn TIBETAN VOWEL SIGN REVERSED II 1B3C ; Top_And_Bottom # Mn BALINESE VOWEL SIGN LA LENGA +1112E..1112F ; Top_And_Bottom # Mn [2] CHAKMA VOWEL SIGN O..CHAKMA VOWEL SIGN AU # Indic_Matra_Category=Top_And_Right @@ -377,6 +402,7 @@ ABED ; Bottom # Mn MEETEI MAYEK APUN IYEK 0CCA..0CCB ; Top_And_Right # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 1925..1926 ; Top_And_Right # Mc [2] LIMBU VOWEL SIGN OO..LIMBU VOWEL SIGN AU 1B43 ; Top_And_Right # Mc BALINESE VOWEL SIGN PEPET TEDUNG +111BF ; Top_And_Right # Mc SHARADA VOWEL SIGN AU # Indic_Matra_Category=Top_And_Left @@ -413,6 +439,8 @@ A9C0 ; Bottom_And_Right # Mc JAVANESE PANGKON 1039 ; Invisible # Mn MYANMAR SIGN VIRAMA 17D2 ; Invisible # Mn KHMER SIGN COENG 1A60 ; Invisible # Mn TAI THAM SIGN SAKOT +AAF6 ; Invisible # Mn MEETEI MAYEK VIRAMA 10A3F ; Invisible # Mn KHAROSHTHI VIRAMA +11133 ; Invisible # Mn CHAKMA VIRAMA # EOF diff --git a/lib/unicore/IndicSyllabicCategory.txt b/lib/unicore/IndicSyllabicCategory.txt index 674c4def38..9d771bacc0 100644 --- a/lib/unicore/IndicSyllabicCategory.txt +++ b/lib/unicore/IndicSyllabicCategory.txt @@ -1,8 +1,8 @@ -# IndicSyllabicCategory-6.0.0.txt -# Date: 2010-05-25, 11:45:00 PDT [KW] +# IndicSyllabicCategory-6.1.0.txt +# Date: 2011-08-31, 23:54:00 GMT [KW] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see UAX #44. # @@ -43,7 +43,7 @@ # Tagalog, Hanunoo, Buhid, Tagbanwa, Khmer, Limbu, Tai Le, New Tai Lue, # Buginese, Tai Tham, Balinese, Sundanese, Batak, Lepcha, # Syloti Nagri, Phags-Pa, Saurashtra, Kayah Li, Rejang, Javanese, Cham, Tai Viet, -# Meetei Mayek, Karoshthi, Brahmi, Kaithi +# Meetei Mayek, Kharoshthi, Brahmi, Kaithi, Chakma, Sharada, Takri # # All characters for all other scripts not in that list # take the default value for this property, unless they @@ -99,12 +99,18 @@ A880 ; Bindu # Mc SAURASHTRA SIGN ANUSVARA 11000 ; Bindu # Mc BRAHMI SIGN CANDRABINDU 11001 ; Bindu # Mn BRAHMI SIGN ANUSVARA 11080..11081 ; Bindu # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA +11100..11101 ; Bindu # Mn CHAKMA SIGN CANDRABINDU..CHAKMA SIGN ANUSVARA +11180..11181 ; Bindu # Mn SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +116AB ; Bindu # Mn TAKRI SIGN ANUSVARA # ================================================ # Indic_Syllabic_Category=Visarga # Visarga (-h) +# Includes specialized case for Sanskrit: ardhavisarga +# Excludes letters for jihvamuliya and upadhmaniya, which are +# related, but structured somewhat differently. # [Not derivable] @@ -122,11 +128,17 @@ A880 ; Bindu # Mc SAURASHTRA SIGN ANUSVARA 17C7 ; Visarga # Mc KHMER SIGN REAHMUK 1B04 ; Visarga # Mc BALINESE SIGN BISAH 1B82 ; Visarga # Mc SUNDANESE SIGN PANGWISAD +1CF2 ; Visarga # Mc VEDIC SIGN ARDHAVISARGA +1CF3 ; Visarga # Mc VEDIC SIGN ROTATED ARDHAVISARGA A881 ; Visarga # Mc SAURASHTRA SIGN VISARGA A983 ; Visarga # Mc JAVANESE SIGN WIGNYAN +AAF5 ; Visarga # Mc MEETEI MAYEK VOWEL SIGN VISARGA 10A0F ; Visarga # Mn KHAROSHTHI SIGN VISARGA 11002 ; Visarga # Mc BRAHMI SIGN VISARGA 11082 ; Visarga # Mc KAITHI SIGN VISARGA +11102 ; Visarga # Mn CHAKMA SIGN VISARGA +11182 ; Visarga # Mn SHARADA SIGN VISARGA +116AC ; Visarga # Mc TAKRI SIGN VISARGA # ================================================ @@ -145,6 +157,8 @@ A983 ; Visarga # Mc JAVANESE SIGN WIGNYAN 0D3D ; Avagraha # Lo MALAYALAM SIGN AVAGRAHA 0F85 ; Avagraha # Po TIBETAN MARK PALUTA 17DC ; Avagraha # Lo KHMER SIGN AVAKRAHASANYA +1BBA ; Avagraha # Lo SUNDANESE AVAGRAHA +111C1 ; Avagraha # Lo SHARADA SIGN AVAGRAHA # ================================================ @@ -165,6 +179,7 @@ A983 ; Visarga # Mc JAVANESE SIGN WIGNYAN 1C37 ; Nukta # Mn LEPCHA SIGN NUKTA A9B3 ; Nukta # Mn JAVANESE SIGN CECAK TELU 110BA ; Nukta # Mn KAITHI SIGN NUKTA +116B7 ; Nukta # Mn TAKRI SIGN NUKTA # ================================================ @@ -196,15 +211,20 @@ A9B3 ; Nukta # Mn JAVANESE SIGN CECAK TELU 1A60 ; Virama # Mn TAI THAM SIGN SAKOT 1B44 ; Virama # Mc BALINESE ADEG ADEG 1BAA ; Virama # Mc SUNDANESE SIGN PAMAAEH +1BAB ; Virama # Mc SUNDANESE SIGN VIRAMA 1BF2..1BF3 ; Virama # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN A806 ; Virama # Mn SYLOTI NAGRI SIGN HASANTA A8C4 ; Virama # Mn SAURASHTRA SIGN VIRAMA A953 ; Virama # Mc REJANG VIRAMA A9C0 ; Virama # Mc JAVANESE PANGKON +AAF6 ; Virama # Mn MEETEI MAYEK VIRAMA ABED ; Virama # Mn MEETEI MAYEK APUN IYEK 10A3F ; Virama # Mn KHAROSHTHI VIRAMA 11046 ; Virama # Mn BRAHMI VIRAMA 110B9 ; Virama # Mn KAITHI SIGN VIRAMA +11133..11134 ; Virama # Mn CHAKMA VIRAMA..CHAKMA MAAYYAA +111C0 ; Virama # Mc SHARADA SIGN VIRAMA +116B6 ; Virama # Mn TAKRI SIGN VIRAMA # ================================================ @@ -265,8 +285,14 @@ A882..A891 ; Vowel_Independent # Lo [16] SAURASHTRA LETTER A..SAURASHTRA LET A984..A988 ; Vowel_Independent # Lo [5] JAVANESE LETTER A..JAVANESE LETTER U A98C..A98E ; Vowel_Independent # Lo [3] JAVANESE LETTER E..JAVANESE LETTER O AA00..AA05 ; Vowel_Independent # Lo [6] CHAM LETTER A..CHAM LETTER O +AAE0..AAE1 ; Vowel_Independent # Lo [2] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER O +ABCE..ABCF ; Vowel_Independent # Lo [2] MEETEI MAYEK LETTER UN..MEETEI MAYEK LETTER I +ABD1 ; Vowel_Independent # Lo MEETEI MAYEK LETTER ATIYA 11005..11012 ; Vowel_Independent # Lo [14] BRAHMI LETTER A..BRAHMI LETTER AU 11083..1108C ; Vowel_Independent # Lo [10] KAITHI LETTER A..KAITHI LETTER AU +11103..11106 ; Vowel_Independent # Lo [4] CHAKMA LETTER AA..CHAKMA LETTER E +11183..11190 ; Vowel_Independent # Lo [14] SHARADA LETTER A..SHARADA LETTER AU +11680..11689 ; Vowel_Independent # Lo [10] TAKRI LETTER A..TAKRI LETTER AU # ================================================ @@ -426,6 +452,7 @@ AAB5..AAB6 ; Vowel_Dependent # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O AAB7..AAB8 ; Vowel_Dependent # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA AAB9..AABD ; Vowel_Dependent # Lo [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN AABE ; Vowel_Dependent # Mn TAI VIET VOWEL AM +AAEB..AAEF ; Vowel_Dependent # Mc [5] MEETEI MAYEK VOWEL SIGN II..MEETEI MAYEK VOWEL SIGN AAU ABE3..ABE4 ; Vowel_Dependent # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP ABE5 ; Vowel_Dependent # Mn MEETEI MAYEK VOWEL SIGN ANAP ABE6..ABE7 ; Vowel_Dependent # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP @@ -438,6 +465,9 @@ ABE9..ABEA ; Vowel_Dependent # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEET 110B0..110B2 ; Vowel_Dependent # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II 110B3..110B6 ; Vowel_Dependent # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B7..110B8 ; Vowel_Dependent # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +11127..11132 ; Vowel_Dependent # Mn [12] CHAKMA VOWEL SIGN A..CHAKMA AU MARK +111B3..111BF ; Vowel_Dependent # Mn [13] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN AU +116AD..116B5 ; Vowel_Dependent # Mn [9] TAKRI VOWEL SIGN AA..TAKRI VOWEL SIGN AU # ================================================ @@ -568,6 +598,7 @@ A926..A92A ; Vowel # Mn [5] KAYAH LI VOWEL UE..KAYAH LI VOWEL O 1B45..1B4B ; Consonant # Lo [7] BALINESE LETTER KAF SASAK..BALINESE LETTER ASYURA SASAK 1B8A..1BA0 ; Consonant # Lo [23] SUNDANESE LETTER KA..SUNDANESE LETTER HA 1BAE..1BAF ; Consonant # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA +1BBB..1BBD ; Consonant # Lo [3] SUNDANESE LETTER REU..SUNDANESE LETTER BHA 1BC0..1BE3 ; Consonant # Lo [36] BATAK LETTER A..BATAK LETTER MBA 1C00..1C23 ; Consonant # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A 1C4D..1C4F ; Consonant # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA @@ -587,13 +618,19 @@ AA60..AA6F ; Consonant # Lo [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KH AA71..AA73 ; Consonant # Lo [3] MYANMAR LETTER KHAMTI XA..MYAMNAR LETTER KHAMTI RA AA7A ; Consonant # Lo MYANMAR LETTER AITON RA AA80..AAAF ; Consonant # Lo [48] TAI VIET LETTER LOW KO..TAI VIET LETTER HIGH O -ABC0..ABDA ; Consonant # Lo [27] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER BHAM +AAE2..AAEA ; Consonant # Lo [9] MEETEI MAYEK LETTER CHA..MEETEI MAYEK LETTER SSA +ABC0..ABCD ; Consonant # Lo [14] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER HUK +ABD0 ; Consonant # Lo MEETEI MAYEK LETTER PHAM +ABD2..ABDA ; Consonant # Lo [9] MEETEI MAYEK LETTER GOK..MEETEI MAYEK LETTER BHAM 10A00 ; Consonant # Lo KHAROSHTHI LETTER A 10A10..10A13 ; Consonant # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA 10A15..10A17 ; Consonant # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA 10A19..10A33 ; Consonant # Lo [27] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER TTTHA 11013..11037 ; Consonant # Lo [37] BRAHMI LETTER KA..BRAHMI LETTER OLD TAMIL NNNA 1108D..110AF ; Consonant # Lo [35] KAITHI LETTER KA..KAITHI LETTER HA +11107..11126 ; Consonant # Lo [32] CHAKMA LETTER KAA..CHAKMA LETTER HAA +11191..111B2 ; Consonant # Lo [34] SHARADA LETTER KA..SHARADA LETTER HA +1168A..116AA ; Consonant # Lo [34] TAKRI LETTER KA..TAKRI LETTER RRA # ================================================ @@ -633,6 +670,7 @@ A982 ; Consonant_Repha # Mn JAVANESE SIGN LAYAR 1929..192B ; Consonant_Subjoined # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA 1BA1 ; Consonant_Subjoined # Mc SUNDANESE CONSONANT SIGN PAMINGKAL 1BA2..1BA3 ; Consonant_Subjoined # Mn [2] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE CONSONANT SIGN PANYIKU +1BAC..1BAD ; Consonant_Subjoined # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA 1C24..1C25 ; Consonant_Subjoined # Mc [2] LEPCHA SUBJOINED LETTER YA..LEPCHA SUBJOINED LETTER RA A867..A868 ; Consonant_Subjoined # Lo [2] PHAGS-PA SUBJOINED LETTER WA..PHAGS-PA SUBJOINED LETTER YA A871 ; Consonant_Subjoined # Lo PHAGS-PA SUBJOINED LETTER RA @@ -672,6 +710,7 @@ AA35..AA36 ; Consonant_Medial # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONA 19C1..19C7 ; Consonant_Final # Lo [7] NEW TAI LUE LETTER FINAL V..NEW TAI LUE LETTER FINAL B 1A57 ; Consonant_Final # Mc TAI THAM CONSONANT SIGN LA TANG LAI 1A58..1A5E ; Consonant_Final # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA +1BBE..1BBF ; Consonant_Final # Lo [2] SUNDANESE LETTER FINAL K..SUNDANESE LETTER FINAL M 1BF0..1BF1 ; Consonant_Final # Mn [2] BATAK CONSONANT SIGN NG..BATAK CONSONANT SIGN H 1C2D..1C33 ; Consonant_Final # Mn [7] LEPCHA CONSONANT SIGN K..LEPCHA CONSONANT SIGN T A8B4 ; Consonant_Final # Mc SAURASHTRA CONSONANT SIGN HAARU diff --git a/lib/unicore/Jamo.txt b/lib/unicore/Jamo.txt index b5df928191..3f325dee1f 100644 --- a/lib/unicore/Jamo.txt +++ b/lib/unicore/Jamo.txt @@ -1,22 +1,22 @@ -# Jamo-6.0.0.txt -# Date: 2010-05-19, 11:19:00 PDT [KW] +# Jamo-6.1.0.txt +# Date: 2011-06-22, 23:07:00 GMT [KW, LI] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # -# This file defines the Jamo Short Name property. +# This file defines the Jamo_Short_Name property. # -# See Section 3.12 of The Unicode Standard, Version 6.0 +# See Section 3.12 of The Unicode Standard, Version 6.1 # for more information. # # Each line contains two fields, separated by a semicolon. # # The first field gives the code point, in 4-digit hexadecimal -# form, of a combining jamo character that participates in -# the algorithmic determination Hangul syllable character names. -# The second field gives the Jamo Short Name as a one-, two-, +# form, of a conjoining jamo character that participates in the +# algorithmic determination of Hangul syllable character names. +# The second field gives the Jamo_Short_Name as a one-, two-, # or three-character ASCII string (or in one case, for U+110B, # the null string). # diff --git a/lib/unicore/LineBreak.txt b/lib/unicore/LineBreak.txt index 10a6d0e5b2..98e9671f66 100644 --- a/lib/unicore/LineBreak.txt +++ b/lib/unicore/LineBreak.txt @@ -1,5 +1,5 @@ -# LineBreak-6.0.0.txt -# Date: 2010-08-18, 17:25:00 PDT [KW] +# LineBreak-6.1.0.txt +# Date: 2011-11-08, 20:25:00 GMT [KW] # # Line Break Properties # @@ -7,7 +7,7 @@ # Unicode Character Database. # It contains both normative and informative data. # -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # # The format is two fields separated by a semicolon. @@ -19,7 +19,7 @@ # Informative: # "XX", "OP", "CL", "CP", "QU", "NS", "EX", "SY", # "IS", "PR", "PO", "NU", "AL", "ID", "IN", "HY", -# "BB", "BA", "SA", "AI", "B2" +# "BB", "BA", "SA", "AI", "B2", "HL", "CJ" # - All code points, assigned and unassigned, that are not listed # explicitly are given the value "XX". # The unassigned code points that default to "ID" include ranges in the @@ -1439,6 +1439,7 @@ 0587;AL # ARMENIAN SMALL LIGATURE ECH YIWN 0589;IS # ARMENIAN FULL STOP 058A;BA # ARMENIAN HYPHEN +058F;PR # ARMENIAN DRAM SIGN 0591;CM # HEBREW ACCENT ETNAHTA 0592;CM # HEBREW ACCENT SEGOL 0593;CM # HEBREW ACCENT SHALSHELET @@ -1494,42 +1495,43 @@ 05C5;CM # HEBREW MARK LOWER DOT 05C6;EX # HEBREW PUNCTUATION NUN HAFUKHA 05C7;CM # HEBREW POINT QAMATS QATAN -05D0;AL # HEBREW LETTER ALEF -05D1;AL # HEBREW LETTER BET -05D2;AL # HEBREW LETTER GIMEL -05D3;AL # HEBREW LETTER DALET -05D4;AL # HEBREW LETTER HE -05D5;AL # HEBREW LETTER VAV -05D6;AL # HEBREW LETTER ZAYIN -05D7;AL # HEBREW LETTER HET -05D8;AL # HEBREW LETTER TET -05D9;AL # HEBREW LETTER YOD -05DA;AL # HEBREW LETTER FINAL KAF -05DB;AL # HEBREW LETTER KAF -05DC;AL # HEBREW LETTER LAMED -05DD;AL # HEBREW LETTER FINAL MEM -05DE;AL # HEBREW LETTER MEM -05DF;AL # HEBREW LETTER FINAL NUN -05E0;AL # HEBREW LETTER NUN -05E1;AL # HEBREW LETTER SAMEKH -05E2;AL # HEBREW LETTER AYIN -05E3;AL # HEBREW LETTER FINAL PE -05E4;AL # HEBREW LETTER PE -05E5;AL # HEBREW LETTER FINAL TSADI -05E6;AL # HEBREW LETTER TSADI -05E7;AL # HEBREW LETTER QOF -05E8;AL # HEBREW LETTER RESH -05E9;AL # HEBREW LETTER SHIN -05EA;AL # HEBREW LETTER TAV -05F0;AL # HEBREW LIGATURE YIDDISH DOUBLE VAV -05F1;AL # HEBREW LIGATURE YIDDISH VAV YOD -05F2;AL # HEBREW LIGATURE YIDDISH DOUBLE YOD +05D0;HL # HEBREW LETTER ALEF +05D1;HL # HEBREW LETTER BET +05D2;HL # HEBREW LETTER GIMEL +05D3;HL # HEBREW LETTER DALET +05D4;HL # HEBREW LETTER HE +05D5;HL # HEBREW LETTER VAV +05D6;HL # HEBREW LETTER ZAYIN +05D7;HL # HEBREW LETTER HET +05D8;HL # HEBREW LETTER TET +05D9;HL # HEBREW LETTER YOD +05DA;HL # HEBREW LETTER FINAL KAF +05DB;HL # HEBREW LETTER KAF +05DC;HL # HEBREW LETTER LAMED +05DD;HL # HEBREW LETTER FINAL MEM +05DE;HL # HEBREW LETTER MEM +05DF;HL # HEBREW LETTER FINAL NUN +05E0;HL # HEBREW LETTER NUN +05E1;HL # HEBREW LETTER SAMEKH +05E2;HL # HEBREW LETTER AYIN +05E3;HL # HEBREW LETTER FINAL PE +05E4;HL # HEBREW LETTER PE +05E5;HL # HEBREW LETTER FINAL TSADI +05E6;HL # HEBREW LETTER TSADI +05E7;HL # HEBREW LETTER QOF +05E8;HL # HEBREW LETTER RESH +05E9;HL # HEBREW LETTER SHIN +05EA;HL # HEBREW LETTER TAV +05F0;HL # HEBREW LIGATURE YIDDISH DOUBLE VAV +05F1;HL # HEBREW LIGATURE YIDDISH VAV YOD +05F2;HL # HEBREW LIGATURE YIDDISH DOUBLE YOD 05F3;AL # HEBREW PUNCTUATION GERESH 05F4;AL # HEBREW PUNCTUATION GERSHAYIM 0600;AL # ARABIC NUMBER SIGN 0601;AL # ARABIC SIGN SANAH 0602;AL # ARABIC FOOTNOTE MARKER 0603;AL # ARABIC SIGN SAFHA +0604;AL # ARABIC SIGN SAMVAT 0606;AL # ARABIC-INDIC CUBE ROOT 0607;AL # ARABIC-INDIC FOURTH ROOT 0608;AL # ARABIC RAY @@ -2102,6 +2104,45 @@ 085A;CM # MANDAIC VOCALIZATION MARK 085B;CM # MANDAIC GEMINATION MARK 085E;AL # MANDAIC PUNCTUATION +08A0;AL # ARABIC LETTER BEH WITH SMALL V BELOW +08A2;AL # ARABIC LETTER JEEM WITH TWO DOTS ABOVE +08A3;AL # ARABIC LETTER TAH WITH TWO DOTS ABOVE +08A4;AL # ARABIC LETTER FEH WITH DOT BELOW AND THREE DOTS ABOVE +08A5;AL # ARABIC LETTER QAF WITH DOT BELOW +08A6;AL # ARABIC LETTER LAM WITH DOUBLE BAR +08A7;AL # ARABIC LETTER MEEM WITH THREE DOTS ABOVE +08A8;AL # ARABIC LETTER YEH WITH TWO DOTS BELOW AND HAMZA ABOVE +08A9;AL # ARABIC LETTER YEH WITH TWO DOTS BELOW AND DOT ABOVE +08AA;AL # ARABIC LETTER REH WITH LOOP +08AB;AL # ARABIC LETTER WAW WITH DOT WITHIN +08AC;AL # ARABIC LETTER ROHINGYA YEH +08E4;CM # ARABIC CURLY FATHA +08E5;CM # ARABIC CURLY DAMMA +08E6;CM # ARABIC CURLY KASRA +08E7;CM # ARABIC CURLY FATHATAN +08E8;CM # ARABIC CURLY DAMMATAN +08E9;CM # ARABIC CURLY KASRATAN +08EA;CM # ARABIC TONE ONE DOT ABOVE +08EB;CM # ARABIC TONE TWO DOTS ABOVE +08EC;CM # ARABIC TONE LOOP ABOVE +08ED;CM # ARABIC TONE ONE DOT BELOW +08EE;CM # ARABIC TONE TWO DOTS BELOW +08EF;CM # ARABIC TONE LOOP BELOW +08F0;CM # ARABIC OPEN FATHATAN +08F1;CM # ARABIC OPEN DAMMATAN +08F2;CM # ARABIC OPEN KASRATAN +08F3;CM # ARABIC SMALL HIGH WAW +08F4;CM # ARABIC FATHA WITH RING +08F5;CM # ARABIC FATHA WITH DOT ABOVE +08F6;CM # ARABIC KASRA WITH DOT BELOW +08F7;CM # ARABIC LEFT ARROWHEAD ABOVE +08F8;CM # ARABIC RIGHT ARROWHEAD ABOVE +08F9;CM # ARABIC LEFT ARROWHEAD BELOW +08FA;CM # ARABIC RIGHT ARROWHEAD BELOW +08FB;CM # ARABIC DOUBLE RIGHT ARROWHEAD ABOVE +08FC;CM # ARABIC DOUBLE RIGHT ARROWHEAD ABOVE WITH DOT +08FD;CM # ARABIC RIGHT ARROWHEAD ABOVE WITH DOT +08FE;CM # ARABIC DAMMA WITH DOT 0900;CM # DEVANAGARI SIGN INVERTED CANDRABINDU 0901;CM # DEVANAGARI SIGN CANDRABINDU 0902;CM # DEVANAGARI SIGN ANUSVARA @@ -2482,6 +2523,7 @@ 0AED;NU # GUJARATI DIGIT SEVEN 0AEE;NU # GUJARATI DIGIT EIGHT 0AEF;NU # GUJARATI DIGIT NINE +0AF0;AL # GUJARATI ABBREVIATION SIGN 0AF1;PR # GUJARATI RUPEE SIGN 0B01;CM # ORIYA SIGN CANDRABINDU 0B02;CM # ORIYA SIGN ANUSVARA @@ -3154,6 +3196,8 @@ 0ED9;NU # LAO DIGIT NINE 0EDC;SA # LAO HO NO 0EDD;SA # LAO HO MO +0EDE;SA # LAO LETTER KHMU GO +0EDF;SA # LAO LETTER KHMU NYO 0F00;AL # TIBETAN SYLLABLE OM 0F01;BB # TIBETAN MARK GTER YIG MGO TRUNCATED A 0F02;BB # TIBETAN MARK GTER YIG MGO -UM RNAM BCAD MA @@ -3563,6 +3607,8 @@ 10C3;AL # GEORGIAN CAPITAL LETTER WE 10C4;AL # GEORGIAN CAPITAL LETTER HAR 10C5;AL # GEORGIAN CAPITAL LETTER HOE +10C7;AL # GEORGIAN CAPITAL LETTER YN +10CD;AL # GEORGIAN CAPITAL LETTER AEN 10D0;AL # GEORGIAN LETTER AN 10D1;AL # GEORGIAN LETTER BAN 10D2;AL # GEORGIAN LETTER GAN @@ -3608,6 +3654,9 @@ 10FA;AL # GEORGIAN LETTER AIN 10FB;AL # GEORGIAN PARAGRAPH SEPARATOR 10FC;AL # MODIFIER LETTER GEORGIAN NAR +10FD;AL # GEORGIAN LETTER AEN +10FE;AL # GEORGIAN LETTER HARD SIGN +10FF;AL # GEORGIAN LETTER LABIAL SIGN 1100;JL # HANGUL CHOSEONG KIYEOK 1101;JL # HANGUL CHOSEONG SSANGKIYEOK 1102;JL # HANGUL CHOSEONG NIEUN @@ -6041,6 +6090,9 @@ 1BA8;CM # SUNDANESE VOWEL SIGN PAMEPET 1BA9;CM # SUNDANESE VOWEL SIGN PANEULEUNG 1BAA;CM # SUNDANESE SIGN PAMAAEH +1BAB;CM # SUNDANESE SIGN VIRAMA +1BAC;CM # SUNDANESE CONSONANT SIGN PASANGAN MA +1BAD;CM # SUNDANESE CONSONANT SIGN PASANGAN WA 1BAE;AL # SUNDANESE LETTER KHA 1BAF;AL # SUNDANESE LETTER SYA 1BB0;NU # SUNDANESE DIGIT ZERO @@ -6053,6 +6105,12 @@ 1BB7;NU # SUNDANESE DIGIT SEVEN 1BB8;NU # SUNDANESE DIGIT EIGHT 1BB9;NU # SUNDANESE DIGIT NINE +1BBA;AL # SUNDANESE AVAGRAHA +1BBB;AL # SUNDANESE LETTER REU +1BBC;AL # SUNDANESE LETTER LEU +1BBD;AL # SUNDANESE LETTER BHA +1BBE;AL # SUNDANESE LETTER FINAL K +1BBF;AL # SUNDANESE LETTER FINAL M 1BC0;AL # BATAK LETTER A 1BC1;AL # BATAK LETTER SIMALUNGUN A 1BC2;AL # BATAK LETTER HA @@ -6231,6 +6289,14 @@ 1C7D;AL # OL CHIKI AHAD 1C7E;BA # OL CHIKI PUNCTUATION MUCAAD 1C7F;BA # OL CHIKI PUNCTUATION DOUBLE MUCAAD +1CC0;AL # SUNDANESE PUNCTUATION BINDU SURYA +1CC1;AL # SUNDANESE PUNCTUATION BINDU PANGLONG +1CC2;AL # SUNDANESE PUNCTUATION BINDU PURNAMA +1CC3;AL # SUNDANESE PUNCTUATION BINDU CAKRA +1CC4;AL # SUNDANESE PUNCTUATION BINDU LEU SATANGA +1CC5;AL # SUNDANESE PUNCTUATION BINDU KA SATANGA +1CC6;AL # SUNDANESE PUNCTUATION BINDU DA SATANGA +1CC7;AL # SUNDANESE PUNCTUATION BINDU BA SATANGA 1CD0;CM # VEDIC TONE KARSHANA 1CD1;CM # VEDIC TONE SHARA 1CD2;CM # VEDIC TONE PRENKHA @@ -6266,6 +6332,10 @@ 1CF0;AL # VEDIC SIGN RTHANG LONG ANUSVARA 1CF1;AL # VEDIC SIGN ANUSVARA UBHAYATO MUKHA 1CF2;CM # VEDIC SIGN ARDHAVISARGA +1CF3;CM # VEDIC SIGN ROTATED ARDHAVISARGA +1CF4;CM # VEDIC TONE CANDRA ABOVE +1CF5;AL # VEDIC SIGN JIHVAMULIYA +1CF6;AL # VEDIC SIGN UPADHMANIYA 1D00;AL # LATIN LETTER SMALL CAPITAL A 1D01;AL # LATIN LETTER SMALL CAPITAL AE 1D02;AL # LATIN SMALL LETTER TURNED AE @@ -8872,7 +8942,9 @@ 27C8;AL # REVERSE SOLIDUS PRECEDING SUBSET 27C9;AL # SUPERSET PRECEDING SOLIDUS 27CA;AL # VERTICAL BAR WITH HORIZONTAL STROKE +27CB;AL # MATHEMATICAL RISING DIAGONAL 27CC;AL # LONG DIVISION +27CD;AL # MATHEMATICAL FALLING DIAGONAL 27CE;AL # SQUARED LOGICAL AND 27CF;AL # SQUARED LOGICAL OR 27D0;AL # WHITE DIAMOND WITH CENTRED DOT @@ -10018,6 +10090,8 @@ 2CEF;CM # COPTIC COMBINING NI ABOVE 2CF0;CM # COPTIC COMBINING SPIRITUS ASPER 2CF1;CM # COPTIC COMBINING SPIRITUS LENIS +2CF2;AL # COPTIC CAPITAL LETTER BOHAIRIC KHEI +2CF3;AL # COPTIC SMALL LETTER BOHAIRIC KHEI 2CF9;EX # COPTIC OLD NUBIAN FULL STOP 2CFA;BA # COPTIC OLD NUBIAN DIRECT QUESTION MARK 2CFB;BA # COPTIC OLD NUBIAN INDIRECT QUESTION MARK @@ -10063,6 +10137,8 @@ 2D23;AL # GEORGIAN SMALL LETTER WE 2D24;AL # GEORGIAN SMALL LETTER HAR 2D25;AL # GEORGIAN SMALL LETTER HOE +2D27;AL # GEORGIAN SMALL LETTER YN +2D2D;AL # GEORGIAN SMALL LETTER AEN 2D30;AL # TIFINAGH LETTER YA 2D31;AL # TIFINAGH LETTER YAB 2D32;AL # TIFINAGH LETTER YABH @@ -10117,6 +10193,8 @@ 2D63;AL # TIFINAGH LETTER YAZ 2D64;AL # TIFINAGH LETTER TAWELLEMET YAZ 2D65;AL # TIFINAGH LETTER YAZZ +2D66;AL # TIFINAGH LETTER YE +2D67;AL # TIFINAGH LETTER YO 2D6F;AL # TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D70;BA # TIFINAGH SEPARATOR MARK 2D7F;CM # TIFINAGH CONSONANT JOINER @@ -10281,6 +10359,16 @@ 2E2F;AL # VERTICAL TILDE 2E30;BA # RING POINT 2E31;BA # WORD SEPARATOR MIDDLE DOT +2E32;AL # TURNED COMMA +2E33;BA # RAISED DOT +2E34;BA # RAISED COMMA +2E35;AL # TURNED SEMICOLON +2E36;AL # DAGGER WITH LEFT GUARD +2E37;AL # DAGGER WITH RIGHT GUARD +2E38;AL # TURNED DAGGER +2E39;AL # TOP HALF SECTION SIGN +2E3A;B2 # TWO-EM DASH +2E3B;B2 # THREE-EM DASH 2E80;ID # CJK RADICAL REPEAT 2E81;ID # CJK RADICAL CLIFF 2E82;ID # CJK RADICAL SECOND ONE @@ -10686,15 +10774,15 @@ 303D;ID # PART ALTERNATION MARK 303E;ID # IDEOGRAPHIC VARIATION INDICATOR 303F;ID # IDEOGRAPHIC HALF FILL SPACE -3041;NS # HIRAGANA LETTER SMALL A +3041;CJ # HIRAGANA LETTER SMALL A 3042;ID # HIRAGANA LETTER A -3043;NS # HIRAGANA LETTER SMALL I +3043;CJ # HIRAGANA LETTER SMALL I 3044;ID # HIRAGANA LETTER I -3045;NS # HIRAGANA LETTER SMALL U +3045;CJ # HIRAGANA LETTER SMALL U 3046;ID # HIRAGANA LETTER U -3047;NS # HIRAGANA LETTER SMALL E +3047;CJ # HIRAGANA LETTER SMALL E 3048;ID # HIRAGANA LETTER E -3049;NS # HIRAGANA LETTER SMALL O +3049;CJ # HIRAGANA LETTER SMALL O 304A;ID # HIRAGANA LETTER O 304B;ID # HIRAGANA LETTER KA 304C;ID # HIRAGANA LETTER GA @@ -10720,7 +10808,7 @@ 3060;ID # HIRAGANA LETTER DA 3061;ID # HIRAGANA LETTER TI 3062;ID # HIRAGANA LETTER DI -3063;NS # HIRAGANA LETTER SMALL TU +3063;CJ # HIRAGANA LETTER SMALL TU 3064;ID # HIRAGANA LETTER TU 3065;ID # HIRAGANA LETTER DU 3066;ID # HIRAGANA LETTER TE @@ -10752,26 +10840,26 @@ 3080;ID # HIRAGANA LETTER MU 3081;ID # HIRAGANA LETTER ME 3082;ID # HIRAGANA LETTER MO -3083;NS # HIRAGANA LETTER SMALL YA +3083;CJ # HIRAGANA LETTER SMALL YA 3084;ID # HIRAGANA LETTER YA -3085;NS # HIRAGANA LETTER SMALL YU +3085;CJ # HIRAGANA LETTER SMALL YU 3086;ID # HIRAGANA LETTER YU -3087;NS # HIRAGANA LETTER SMALL YO +3087;CJ # HIRAGANA LETTER SMALL YO 3088;ID # HIRAGANA LETTER YO 3089;ID # HIRAGANA LETTER RA 308A;ID # HIRAGANA LETTER RI 308B;ID # HIRAGANA LETTER RU 308C;ID # HIRAGANA LETTER RE 308D;ID # HIRAGANA LETTER RO -308E;NS # HIRAGANA LETTER SMALL WA +308E;CJ # HIRAGANA LETTER SMALL WA 308F;ID # HIRAGANA LETTER WA 3090;ID # HIRAGANA LETTER WI 3091;ID # HIRAGANA LETTER WE 3092;ID # HIRAGANA LETTER WO 3093;ID # HIRAGANA LETTER N 3094;ID # HIRAGANA LETTER VU -3095;NS # HIRAGANA LETTER SMALL KA -3096;NS # HIRAGANA LETTER SMALL KE +3095;CJ # HIRAGANA LETTER SMALL KA +3096;CJ # HIRAGANA LETTER SMALL KE 3099;CM # COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK 309A;CM # COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK 309B;NS # KATAKANA-HIRAGANA VOICED SOUND MARK @@ -10780,15 +10868,15 @@ 309E;NS # HIRAGANA VOICED ITERATION MARK 309F;ID # HIRAGANA DIGRAPH YORI 30A0;NS # KATAKANA-HIRAGANA DOUBLE HYPHEN -30A1;NS # KATAKANA LETTER SMALL A +30A1;CJ # KATAKANA LETTER SMALL A 30A2;ID # KATAKANA LETTER A -30A3;NS # KATAKANA LETTER SMALL I +30A3;CJ # KATAKANA LETTER SMALL I 30A4;ID # KATAKANA LETTER I -30A5;NS # KATAKANA LETTER SMALL U +30A5;CJ # KATAKANA LETTER SMALL U 30A6;ID # KATAKANA LETTER U -30A7;NS # KATAKANA LETTER SMALL E +30A7;CJ # KATAKANA LETTER SMALL E 30A8;ID # KATAKANA LETTER E -30A9;NS # KATAKANA LETTER SMALL O +30A9;CJ # KATAKANA LETTER SMALL O 30AA;ID # KATAKANA LETTER O 30AB;ID # KATAKANA LETTER KA 30AC;ID # KATAKANA LETTER GA @@ -10814,7 +10902,7 @@ 30C0;ID # KATAKANA LETTER DA 30C1;ID # KATAKANA LETTER TI 30C2;ID # KATAKANA LETTER DI -30C3;NS # KATAKANA LETTER SMALL TU +30C3;CJ # KATAKANA LETTER SMALL TU 30C4;ID # KATAKANA LETTER TU 30C5;ID # KATAKANA LETTER DU 30C6;ID # KATAKANA LETTER TE @@ -10846,32 +10934,32 @@ 30E0;ID # KATAKANA LETTER MU 30E1;ID # KATAKANA LETTER ME 30E2;ID # KATAKANA LETTER MO -30E3;NS # KATAKANA LETTER SMALL YA +30E3;CJ # KATAKANA LETTER SMALL YA 30E4;ID # KATAKANA LETTER YA -30E5;NS # KATAKANA LETTER SMALL YU +30E5;CJ # KATAKANA LETTER SMALL YU 30E6;ID # KATAKANA LETTER YU -30E7;NS # KATAKANA LETTER SMALL YO +30E7;CJ # KATAKANA LETTER SMALL YO 30E8;ID # KATAKANA LETTER YO 30E9;ID # KATAKANA LETTER RA 30EA;ID # KATAKANA LETTER RI 30EB;ID # KATAKANA LETTER RU 30EC;ID # KATAKANA LETTER RE 30ED;ID # KATAKANA LETTER RO -30EE;NS # KATAKANA LETTER SMALL WA +30EE;CJ # KATAKANA LETTER SMALL WA 30EF;ID # KATAKANA LETTER WA 30F0;ID # KATAKANA LETTER WI 30F1;ID # KATAKANA LETTER WE 30F2;ID # KATAKANA LETTER WO 30F3;ID # KATAKANA LETTER N 30F4;ID # KATAKANA LETTER VU -30F5;NS # KATAKANA LETTER SMALL KA -30F6;NS # KATAKANA LETTER SMALL KE +30F5;CJ # KATAKANA LETTER SMALL KA +30F6;CJ # KATAKANA LETTER SMALL KE 30F7;ID # KATAKANA LETTER VA 30F8;ID # KATAKANA LETTER VI 30F9;ID # KATAKANA LETTER VE 30FA;ID # KATAKANA LETTER VO 30FB;NS # KATAKANA MIDDLE DOT -30FC;NS # KATAKANA-HIRAGANA PROLONGED SOUND MARK +30FC;CJ # KATAKANA-HIRAGANA PROLONGED SOUND MARK 30FD;NS # KATAKANA ITERATION MARK 30FE;NS # KATAKANA VOICED ITERATION MARK 30FF;ID # KATAKANA DIGRAPH KOTO @@ -11089,22 +11177,22 @@ 31E1;ID # CJK STROKE HZZZG 31E2;ID # CJK STROKE PG 31E3;ID # CJK STROKE Q -31F0;NS # KATAKANA LETTER SMALL KU -31F1;NS # KATAKANA LETTER SMALL SI -31F2;NS # KATAKANA LETTER SMALL SU -31F3;NS # KATAKANA LETTER SMALL TO -31F4;NS # KATAKANA LETTER SMALL NU -31F5;NS # KATAKANA LETTER SMALL HA -31F6;NS # KATAKANA LETTER SMALL HI -31F7;NS # KATAKANA LETTER SMALL HU -31F8;NS # KATAKANA LETTER SMALL HE -31F9;NS # KATAKANA LETTER SMALL HO -31FA;NS # KATAKANA LETTER SMALL MU -31FB;NS # KATAKANA LETTER SMALL RA -31FC;NS # KATAKANA LETTER SMALL RI -31FD;NS # KATAKANA LETTER SMALL RU -31FE;NS # KATAKANA LETTER SMALL RE -31FF;NS # KATAKANA LETTER SMALL RO +31F0;CJ # KATAKANA LETTER SMALL KU +31F1;CJ # KATAKANA LETTER SMALL SI +31F2;CJ # KATAKANA LETTER SMALL SU +31F3;CJ # KATAKANA LETTER SMALL TO +31F4;CJ # KATAKANA LETTER SMALL NU +31F5;CJ # KATAKANA LETTER SMALL HA +31F6;CJ # KATAKANA LETTER SMALL HI +31F7;CJ # KATAKANA LETTER SMALL HU +31F8;CJ # KATAKANA LETTER SMALL HE +31F9;CJ # KATAKANA LETTER SMALL HO +31FA;CJ # KATAKANA LETTER SMALL MU +31FB;CJ # KATAKANA LETTER SMALL RA +31FC;CJ # KATAKANA LETTER SMALL RI +31FD;CJ # KATAKANA LETTER SMALL RU +31FE;CJ # KATAKANA LETTER SMALL RE +31FF;CJ # KATAKANA LETTER SMALL RO 3200;ID # PARENTHESIZED HANGUL KIYEOK 3201;ID # PARENTHESIZED HANGUL NIEUN 3202;ID # PARENTHESIZED HANGUL TIKEUT @@ -11681,8 +11769,8 @@ 4DFD;AL # HEXAGRAM FOR SMALL PREPONDERANCE 4DFE;AL # HEXAGRAM FOR AFTER COMPLETION 4DFF;AL # HEXAGRAM FOR BEFORE COMPLETION -4E00..9FCB;ID # <CJK Ideograph, First>..<CJK Ideograph, Last> -9FCC..9FFF;ID # <reserved-9FCC>..<reserved-9FFF> +4E00..9FCC;ID # <CJK Ideograph, First>..<CJK Ideograph, Last> +9FCD..9FFF;ID # <reserved-9FCD>..<reserved-9FFF> A000;ID # YI SYLLABLE IT A001;ID # YI SYLLABLE IX A002;ID # YI SYLLABLE I @@ -13303,6 +13391,14 @@ A670;CM # COMBINING CYRILLIC TEN MILLIONS SIGN A671;CM # COMBINING CYRILLIC HUNDRED MILLIONS SIGN A672;CM # COMBINING CYRILLIC THOUSAND MILLIONS SIGN A673;AL # SLAVONIC ASTERISK +A674;CM # COMBINING CYRILLIC LETTER UKRAINIAN IE +A675;CM # COMBINING CYRILLIC LETTER I +A676;CM # COMBINING CYRILLIC LETTER YI +A677;CM # COMBINING CYRILLIC LETTER U +A678;CM # COMBINING CYRILLIC LETTER HARD SIGN +A679;CM # COMBINING CYRILLIC LETTER YERU +A67A;CM # COMBINING CYRILLIC LETTER SOFT SIGN +A67B;CM # COMBINING CYRILLIC LETTER OMEGA A67C;CM # COMBINING CYRILLIC KAVYKA A67D;CM # COMBINING CYRILLIC PAYEROK A67E;AL # CYRILLIC KAVYKA @@ -13331,6 +13427,7 @@ A694;AL # CYRILLIC CAPITAL LETTER HWE A695;AL # CYRILLIC SMALL LETTER HWE A696;AL # CYRILLIC CAPITAL LETTER SHWE A697;AL # CYRILLIC SMALL LETTER SHWE +A69F;CM # COMBINING CYRILLIC LETTER IOTIFIED E A6A0;AL # BAMUM LETTER A A6A1;AL # BAMUM LETTER KA A6A2;AL # BAMUM LETTER U @@ -13564,6 +13661,8 @@ A78D;AL # LATIN CAPITAL LETTER TURNED H A78E;AL # LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A790;AL # LATIN CAPITAL LETTER N WITH DESCENDER A791;AL # LATIN SMALL LETTER N WITH DESCENDER +A792;AL # LATIN CAPITAL LETTER C WITH BAR +A793;AL # LATIN SMALL LETTER C WITH BAR A7A0;AL # LATIN CAPITAL LETTER G WITH OBLIQUE STROKE A7A1;AL # LATIN SMALL LETTER G WITH OBLIQUE STROKE A7A2;AL # LATIN CAPITAL LETTER K WITH OBLIQUE STROKE @@ -13574,6 +13673,9 @@ A7A6;AL # LATIN CAPITAL LETTER R WITH OBLIQUE STROKE A7A7;AL # LATIN SMALL LETTER R WITH OBLIQUE STROKE A7A8;AL # LATIN CAPITAL LETTER S WITH OBLIQUE STROKE A7A9;AL # LATIN SMALL LETTER S WITH OBLIQUE STROKE +A7AA;AL # LATIN CAPITAL LETTER H WITH HOOK +A7F8;AL # MODIFIER LETTER CAPITAL H WITH STROKE +A7F9;AL # MODIFIER LETTER SMALL LIGATURE OE A7FA;AL # LATIN LETTER SMALL CAPITAL TURNED M A7FB;AL # LATIN EPIGRAPHIC LETTER REVERSED F A7FC;AL # LATIN EPIGRAPHIC LETTER REVERSED P @@ -14187,6 +14289,29 @@ AADC;SA # TAI VIET SYMBOL NUENG AADD;SA # TAI VIET SYMBOL SAM AADE;SA # TAI VIET SYMBOL HO HOI AADF;SA # TAI VIET SYMBOL KOI KOI +AAE0;AL # MEETEI MAYEK LETTER E +AAE1;AL # MEETEI MAYEK LETTER O +AAE2;AL # MEETEI MAYEK LETTER CHA +AAE3;AL # MEETEI MAYEK LETTER NYA +AAE4;AL # MEETEI MAYEK LETTER TTA +AAE5;AL # MEETEI MAYEK LETTER TTHA +AAE6;AL # MEETEI MAYEK LETTER DDA +AAE7;AL # MEETEI MAYEK LETTER DDHA +AAE8;AL # MEETEI MAYEK LETTER NNA +AAE9;AL # MEETEI MAYEK LETTER SHA +AAEA;AL # MEETEI MAYEK LETTER SSA +AAEB;CM # MEETEI MAYEK VOWEL SIGN II +AAEC;CM # MEETEI MAYEK VOWEL SIGN UU +AAED;CM # MEETEI MAYEK VOWEL SIGN AAI +AAEE;CM # MEETEI MAYEK VOWEL SIGN AU +AAEF;CM # MEETEI MAYEK VOWEL SIGN AAU +AAF0;BA # MEETEI MAYEK CHEIKHAN +AAF1;BA # MEETEI MAYEK AHANG KHUDAM +AAF2;AL # MEETEI MAYEK ANJI +AAF3;AL # MEETEI MAYEK SYLLABLE REPETITION MARK +AAF4;AL # MEETEI MAYEK WORD REPETITION MARK +AAF5;CM # MEETEI MAYEK VOWEL SIGN VISARGA +AAF6;CM # MEETEI MAYEK VIRAMA AB01;AL # ETHIOPIC SYLLABLE TTHU AB02;AL # ETHIOPIC SYLLABLE TTHI AB03;AL # ETHIOPIC SYLLABLE TTHAA @@ -15451,7 +15576,8 @@ FA2A;ID # CJK COMPATIBILITY IDEOGRAPH-FA2A FA2B;ID # CJK COMPATIBILITY IDEOGRAPH-FA2B FA2C;ID # CJK COMPATIBILITY IDEOGRAPH-FA2C FA2D;ID # CJK COMPATIBILITY IDEOGRAPH-FA2D -FA2E..FA2F;ID # <reserved-FA2E>..<reserved-FA2F> +FA2E;ID # CJK COMPATIBILITY IDEOGRAPH-FA2E +FA2F;ID # CJK COMPATIBILITY IDEOGRAPH-FA2F FA30;ID # CJK COMPATIBILITY IDEOGRAPH-FA30 FA31;ID # CJK COMPATIBILITY IDEOGRAPH-FA31 FA32;ID # CJK COMPATIBILITY IDEOGRAPH-FA32 @@ -15634,52 +15760,52 @@ FB14;AL # ARMENIAN SMALL LIGATURE MEN ECH FB15;AL # ARMENIAN SMALL LIGATURE MEN INI FB16;AL # ARMENIAN SMALL LIGATURE VEW NOW FB17;AL # ARMENIAN SMALL LIGATURE MEN XEH -FB1D;AL # HEBREW LETTER YOD WITH HIRIQ +FB1D;HL # HEBREW LETTER YOD WITH HIRIQ FB1E;CM # HEBREW POINT JUDEO-SPANISH VARIKA -FB1F;AL # HEBREW LIGATURE YIDDISH YOD YOD PATAH -FB20;AL # HEBREW LETTER ALTERNATIVE AYIN -FB21;AL # HEBREW LETTER WIDE ALEF -FB22;AL # HEBREW LETTER WIDE DALET -FB23;AL # HEBREW LETTER WIDE HE -FB24;AL # HEBREW LETTER WIDE KAF -FB25;AL # HEBREW LETTER WIDE LAMED -FB26;AL # HEBREW LETTER WIDE FINAL MEM -FB27;AL # HEBREW LETTER WIDE RESH -FB28;AL # HEBREW LETTER WIDE TAV +FB1F;HL # HEBREW LIGATURE YIDDISH YOD YOD PATAH +FB20;HL # HEBREW LETTER ALTERNATIVE AYIN +FB21;HL # HEBREW LETTER WIDE ALEF +FB22;HL # HEBREW LETTER WIDE DALET +FB23;HL # HEBREW LETTER WIDE HE +FB24;HL # HEBREW LETTER WIDE KAF +FB25;HL # HEBREW LETTER WIDE LAMED +FB26;HL # HEBREW LETTER WIDE FINAL MEM +FB27;HL # HEBREW LETTER WIDE RESH +FB28;HL # HEBREW LETTER WIDE TAV FB29;AL # HEBREW LETTER ALTERNATIVE PLUS SIGN -FB2A;AL # HEBREW LETTER SHIN WITH SHIN DOT -FB2B;AL # HEBREW LETTER SHIN WITH SIN DOT -FB2C;AL # HEBREW LETTER SHIN WITH DAGESH AND SHIN DOT -FB2D;AL # HEBREW LETTER SHIN WITH DAGESH AND SIN DOT -FB2E;AL # HEBREW LETTER ALEF WITH PATAH -FB2F;AL # HEBREW LETTER ALEF WITH QAMATS -FB30;AL # HEBREW LETTER ALEF WITH MAPIQ -FB31;AL # HEBREW LETTER BET WITH DAGESH -FB32;AL # HEBREW LETTER GIMEL WITH DAGESH -FB33;AL # HEBREW LETTER DALET WITH DAGESH -FB34;AL # HEBREW LETTER HE WITH MAPIQ -FB35;AL # HEBREW LETTER VAV WITH DAGESH -FB36;AL # HEBREW LETTER ZAYIN WITH DAGESH -FB38;AL # HEBREW LETTER TET WITH DAGESH -FB39;AL # HEBREW LETTER YOD WITH DAGESH -FB3A;AL # HEBREW LETTER FINAL KAF WITH DAGESH -FB3B;AL # HEBREW LETTER KAF WITH DAGESH -FB3C;AL # HEBREW LETTER LAMED WITH DAGESH -FB3E;AL # HEBREW LETTER MEM WITH DAGESH -FB40;AL # HEBREW LETTER NUN WITH DAGESH -FB41;AL # HEBREW LETTER SAMEKH WITH DAGESH -FB43;AL # HEBREW LETTER FINAL PE WITH DAGESH -FB44;AL # HEBREW LETTER PE WITH DAGESH -FB46;AL # HEBREW LETTER TSADI WITH DAGESH -FB47;AL # HEBREW LETTER QOF WITH DAGESH -FB48;AL # HEBREW LETTER RESH WITH DAGESH -FB49;AL # HEBREW LETTER SHIN WITH DAGESH -FB4A;AL # HEBREW LETTER TAV WITH DAGESH -FB4B;AL # HEBREW LETTER VAV WITH HOLAM -FB4C;AL # HEBREW LETTER BET WITH RAFE -FB4D;AL # HEBREW LETTER KAF WITH RAFE -FB4E;AL # HEBREW LETTER PE WITH RAFE -FB4F;AL # HEBREW LIGATURE ALEF LAMED +FB2A;HL # HEBREW LETTER SHIN WITH SHIN DOT +FB2B;HL # HEBREW LETTER SHIN WITH SIN DOT +FB2C;HL # HEBREW LETTER SHIN WITH DAGESH AND SHIN DOT +FB2D;HL # HEBREW LETTER SHIN WITH DAGESH AND SIN DOT +FB2E;HL # HEBREW LETTER ALEF WITH PATAH +FB2F;HL # HEBREW LETTER ALEF WITH QAMATS +FB30;HL # HEBREW LETTER ALEF WITH MAPIQ +FB31;HL # HEBREW LETTER BET WITH DAGESH +FB32;HL # HEBREW LETTER GIMEL WITH DAGESH +FB33;HL # HEBREW LETTER DALET WITH DAGESH +FB34;HL # HEBREW LETTER HE WITH MAPIQ +FB35;HL # HEBREW LETTER VAV WITH DAGESH +FB36;HL # HEBREW LETTER ZAYIN WITH DAGESH +FB38;HL # HEBREW LETTER TET WITH DAGESH +FB39;HL # HEBREW LETTER YOD WITH DAGESH +FB3A;HL # HEBREW LETTER FINAL KAF WITH DAGESH +FB3B;HL # HEBREW LETTER KAF WITH DAGESH +FB3C;HL # HEBREW LETTER LAMED WITH DAGESH +FB3E;HL # HEBREW LETTER MEM WITH DAGESH +FB40;HL # HEBREW LETTER NUN WITH DAGESH +FB41;HL # HEBREW LETTER SAMEKH WITH DAGESH +FB43;HL # HEBREW LETTER FINAL PE WITH DAGESH +FB44;HL # HEBREW LETTER PE WITH DAGESH +FB46;HL # HEBREW LETTER TSADI WITH DAGESH +FB47;HL # HEBREW LETTER QOF WITH DAGESH +FB48;HL # HEBREW LETTER RESH WITH DAGESH +FB49;HL # HEBREW LETTER SHIN WITH DAGESH +FB4A;HL # HEBREW LETTER TAV WITH DAGESH +FB4B;HL # HEBREW LETTER VAV WITH HOLAM +FB4C;HL # HEBREW LETTER BET WITH RAFE +FB4D;HL # HEBREW LETTER KAF WITH RAFE +FB4E;HL # HEBREW LETTER PE WITH RAFE +FB4F;HL # HEBREW LIGATURE ALEF LAMED FB50;AL # ARABIC LETTER ALEF WASLA ISOLATED FORM FB51;AL # ARABIC LETTER ALEF WASLA FINAL FORM FB52;AL # ARABIC LETTER BEEH ISOLATED FORM @@ -16625,16 +16751,16 @@ FF63;CL # HALFWIDTH RIGHT CORNER BRACKET FF64;CL # HALFWIDTH IDEOGRAPHIC COMMA FF65;NS # HALFWIDTH KATAKANA MIDDLE DOT FF66;AL # HALFWIDTH KATAKANA LETTER WO -FF67;NS # HALFWIDTH KATAKANA LETTER SMALL A -FF68;NS # HALFWIDTH KATAKANA LETTER SMALL I -FF69;NS # HALFWIDTH KATAKANA LETTER SMALL U -FF6A;NS # HALFWIDTH KATAKANA LETTER SMALL E -FF6B;NS # HALFWIDTH KATAKANA LETTER SMALL O -FF6C;NS # HALFWIDTH KATAKANA LETTER SMALL YA -FF6D;NS # HALFWIDTH KATAKANA LETTER SMALL YU -FF6E;NS # HALFWIDTH KATAKANA LETTER SMALL YO -FF6F;NS # HALFWIDTH KATAKANA LETTER SMALL TU -FF70;NS # HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF67;CJ # HALFWIDTH KATAKANA LETTER SMALL A +FF68;CJ # HALFWIDTH KATAKANA LETTER SMALL I +FF69;CJ # HALFWIDTH KATAKANA LETTER SMALL U +FF6A;CJ # HALFWIDTH KATAKANA LETTER SMALL E +FF6B;CJ # HALFWIDTH KATAKANA LETTER SMALL O +FF6C;CJ # HALFWIDTH KATAKANA LETTER SMALL YA +FF6D;CJ # HALFWIDTH KATAKANA LETTER SMALL YU +FF6E;CJ # HALFWIDTH KATAKANA LETTER SMALL YO +FF6F;CJ # HALFWIDTH KATAKANA LETTER SMALL TU +FF70;CJ # HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK FF71;AL # HALFWIDTH KATAKANA LETTER A FF72;AL # HALFWIDTH KATAKANA LETTER I FF73;AL # HALFWIDTH KATAKANA LETTER U @@ -17685,6 +17811,64 @@ FFFD;AI # REPLACEMENT CHARACTER 10938;AL # LYDIAN LETTER NN 10939;AL # LYDIAN LETTER C 1093F;AL # LYDIAN TRIANGULAR MARK +10980;AL # MEROITIC HIEROGLYPHIC LETTER A +10981;AL # MEROITIC HIEROGLYPHIC LETTER E +10982;AL # MEROITIC HIEROGLYPHIC LETTER I +10983;AL # MEROITIC HIEROGLYPHIC LETTER O +10984;AL # MEROITIC HIEROGLYPHIC LETTER YA +10985;AL # MEROITIC HIEROGLYPHIC LETTER WA +10986;AL # MEROITIC HIEROGLYPHIC LETTER BA +10987;AL # MEROITIC HIEROGLYPHIC LETTER BA-2 +10988;AL # MEROITIC HIEROGLYPHIC LETTER PA +10989;AL # MEROITIC HIEROGLYPHIC LETTER MA +1098A;AL # MEROITIC HIEROGLYPHIC LETTER NA +1098B;AL # MEROITIC HIEROGLYPHIC LETTER NA-2 +1098C;AL # MEROITIC HIEROGLYPHIC LETTER NE +1098D;AL # MEROITIC HIEROGLYPHIC LETTER NE-2 +1098E;AL # MEROITIC HIEROGLYPHIC LETTER RA +1098F;AL # MEROITIC HIEROGLYPHIC LETTER RA-2 +10990;AL # MEROITIC HIEROGLYPHIC LETTER LA +10991;AL # MEROITIC HIEROGLYPHIC LETTER KHA +10992;AL # MEROITIC HIEROGLYPHIC LETTER HHA +10993;AL # MEROITIC HIEROGLYPHIC LETTER SA +10994;AL # MEROITIC HIEROGLYPHIC LETTER SA-2 +10995;AL # MEROITIC HIEROGLYPHIC LETTER SE +10996;AL # MEROITIC HIEROGLYPHIC LETTER KA +10997;AL # MEROITIC HIEROGLYPHIC LETTER QA +10998;AL # MEROITIC HIEROGLYPHIC LETTER TA +10999;AL # MEROITIC HIEROGLYPHIC LETTER TA-2 +1099A;AL # MEROITIC HIEROGLYPHIC LETTER TE +1099B;AL # MEROITIC HIEROGLYPHIC LETTER TE-2 +1099C;AL # MEROITIC HIEROGLYPHIC LETTER TO +1099D;AL # MEROITIC HIEROGLYPHIC LETTER DA +1099E;AL # MEROITIC HIEROGLYPHIC SYMBOL VIDJ +1099F;AL # MEROITIC HIEROGLYPHIC SYMBOL VIDJ-2 +109A0;AL # MEROITIC CURSIVE LETTER A +109A1;AL # MEROITIC CURSIVE LETTER E +109A2;AL # MEROITIC CURSIVE LETTER I +109A3;AL # MEROITIC CURSIVE LETTER O +109A4;AL # MEROITIC CURSIVE LETTER YA +109A5;AL # MEROITIC CURSIVE LETTER WA +109A6;AL # MEROITIC CURSIVE LETTER BA +109A7;AL # MEROITIC CURSIVE LETTER PA +109A8;AL # MEROITIC CURSIVE LETTER MA +109A9;AL # MEROITIC CURSIVE LETTER NA +109AA;AL # MEROITIC CURSIVE LETTER NE +109AB;AL # MEROITIC CURSIVE LETTER RA +109AC;AL # MEROITIC CURSIVE LETTER LA +109AD;AL # MEROITIC CURSIVE LETTER KHA +109AE;AL # MEROITIC CURSIVE LETTER HHA +109AF;AL # MEROITIC CURSIVE LETTER SA +109B0;AL # MEROITIC CURSIVE LETTER ARCHAIC SA +109B1;AL # MEROITIC CURSIVE LETTER SE +109B2;AL # MEROITIC CURSIVE LETTER KA +109B3;AL # MEROITIC CURSIVE LETTER QA +109B4;AL # MEROITIC CURSIVE LETTER TA +109B5;AL # MEROITIC CURSIVE LETTER TE +109B6;AL # MEROITIC CURSIVE LETTER TO +109B7;AL # MEROITIC CURSIVE LETTER DA +109BE;AL # MEROITIC CURSIVE LOGOGRAM RMT +109BF;AL # MEROITIC CURSIVE LOGOGRAM IMN 10A00;AL # KHAROSHTHI LETTER A 10A01;CM # KHAROSHTHI VOWEL SIGN I 10A02;CM # KHAROSHTHI VOWEL SIGN U @@ -18178,6 +18362,257 @@ FFFD;AI # REPLACEMENT CHARACTER 110BF;BA # KAITHI DOUBLE SECTION MARK 110C0;BA # KAITHI DANDA 110C1;BA # KAITHI DOUBLE DANDA +110D0;AL # SORA SOMPENG LETTER SAH +110D1;AL # SORA SOMPENG LETTER TAH +110D2;AL # SORA SOMPENG LETTER BAH +110D3;AL # SORA SOMPENG LETTER CAH +110D4;AL # SORA SOMPENG LETTER DAH +110D5;AL # SORA SOMPENG LETTER GAH +110D6;AL # SORA SOMPENG LETTER MAH +110D7;AL # SORA SOMPENG LETTER NGAH +110D8;AL # SORA SOMPENG LETTER LAH +110D9;AL # SORA SOMPENG LETTER NAH +110DA;AL # SORA SOMPENG LETTER VAH +110DB;AL # SORA SOMPENG LETTER PAH +110DC;AL # SORA SOMPENG LETTER YAH +110DD;AL # SORA SOMPENG LETTER RAH +110DE;AL # SORA SOMPENG LETTER HAH +110DF;AL # SORA SOMPENG LETTER KAH +110E0;AL # SORA SOMPENG LETTER JAH +110E1;AL # SORA SOMPENG LETTER NYAH +110E2;AL # SORA SOMPENG LETTER AH +110E3;AL # SORA SOMPENG LETTER EEH +110E4;AL # SORA SOMPENG LETTER IH +110E5;AL # SORA SOMPENG LETTER UH +110E6;AL # SORA SOMPENG LETTER OH +110E7;AL # SORA SOMPENG LETTER EH +110E8;AL # SORA SOMPENG LETTER MAE +110F0;NU # SORA SOMPENG DIGIT ZERO +110F1;NU # SORA SOMPENG DIGIT ONE +110F2;NU # SORA SOMPENG DIGIT TWO +110F3;NU # SORA SOMPENG DIGIT THREE +110F4;NU # SORA SOMPENG DIGIT FOUR +110F5;NU # SORA SOMPENG DIGIT FIVE +110F6;NU # SORA SOMPENG DIGIT SIX +110F7;NU # SORA SOMPENG DIGIT SEVEN +110F8;NU # SORA SOMPENG DIGIT EIGHT +110F9;NU # SORA SOMPENG DIGIT NINE +11100;CM # CHAKMA SIGN CANDRABINDU +11101;CM # CHAKMA SIGN ANUSVARA +11102;CM # CHAKMA SIGN VISARGA +11103;AL # CHAKMA LETTER AA +11104;AL # CHAKMA LETTER I +11105;AL # CHAKMA LETTER U +11106;AL # CHAKMA LETTER E +11107;AL # CHAKMA LETTER KAA +11108;AL # CHAKMA LETTER KHAA +11109;AL # CHAKMA LETTER GAA +1110A;AL # CHAKMA LETTER GHAA +1110B;AL # CHAKMA LETTER NGAA +1110C;AL # CHAKMA LETTER CAA +1110D;AL # CHAKMA LETTER CHAA +1110E;AL # CHAKMA LETTER JAA +1110F;AL # CHAKMA LETTER JHAA +11110;AL # CHAKMA LETTER NYAA +11111;AL # CHAKMA LETTER TTAA +11112;AL # CHAKMA LETTER TTHAA +11113;AL # CHAKMA LETTER DDAA +11114;AL # CHAKMA LETTER DDHAA +11115;AL # CHAKMA LETTER NNAA +11116;AL # CHAKMA LETTER TAA +11117;AL # CHAKMA LETTER THAA +11118;AL # CHAKMA LETTER DAA +11119;AL # CHAKMA LETTER DHAA +1111A;AL # CHAKMA LETTER NAA +1111B;AL # CHAKMA LETTER PAA +1111C;AL # CHAKMA LETTER PHAA +1111D;AL # CHAKMA LETTER BAA +1111E;AL # CHAKMA LETTER BHAA +1111F;AL # CHAKMA LETTER MAA +11120;AL # CHAKMA LETTER YYAA +11121;AL # CHAKMA LETTER YAA +11122;AL # CHAKMA LETTER RAA +11123;AL # CHAKMA LETTER LAA +11124;AL # CHAKMA LETTER WAA +11125;AL # CHAKMA LETTER SAA +11126;AL # CHAKMA LETTER HAA +11127;CM # CHAKMA VOWEL SIGN A +11128;CM # CHAKMA VOWEL SIGN I +11129;CM # CHAKMA VOWEL SIGN II +1112A;CM # CHAKMA VOWEL SIGN U +1112B;CM # CHAKMA VOWEL SIGN UU +1112C;CM # CHAKMA VOWEL SIGN E +1112D;CM # CHAKMA VOWEL SIGN AI +1112E;CM # CHAKMA VOWEL SIGN O +1112F;CM # CHAKMA VOWEL SIGN AU +11130;CM # CHAKMA VOWEL SIGN OI +11131;CM # CHAKMA O MARK +11132;CM # CHAKMA AU MARK +11133;CM # CHAKMA VIRAMA +11134;CM # CHAKMA MAAYYAA +11136;NU # CHAKMA DIGIT ZERO +11137;NU # CHAKMA DIGIT ONE +11138;NU # CHAKMA DIGIT TWO +11139;NU # CHAKMA DIGIT THREE +1113A;NU # CHAKMA DIGIT FOUR +1113B;NU # CHAKMA DIGIT FIVE +1113C;NU # CHAKMA DIGIT SIX +1113D;NU # CHAKMA DIGIT SEVEN +1113E;NU # CHAKMA DIGIT EIGHT +1113F;NU # CHAKMA DIGIT NINE +11140;BA # CHAKMA SECTION MARK +11141;BA # CHAKMA DANDA +11142;BA # CHAKMA DOUBLE DANDA +11143;BA # CHAKMA QUESTION MARK +11180;CM # SHARADA SIGN CANDRABINDU +11181;CM # SHARADA SIGN ANUSVARA +11182;CM # SHARADA SIGN VISARGA +11183;AL # SHARADA LETTER A +11184;AL # SHARADA LETTER AA +11185;AL # SHARADA LETTER I +11186;AL # SHARADA LETTER II +11187;AL # SHARADA LETTER U +11188;AL # SHARADA LETTER UU +11189;AL # SHARADA LETTER VOCALIC R +1118A;AL # SHARADA LETTER VOCALIC RR +1118B;AL # SHARADA LETTER VOCALIC L +1118C;AL # SHARADA LETTER VOCALIC LL +1118D;AL # SHARADA LETTER E +1118E;AL # SHARADA LETTER AI +1118F;AL # SHARADA LETTER O +11190;AL # SHARADA LETTER AU +11191;AL # SHARADA LETTER KA +11192;AL # SHARADA LETTER KHA +11193;AL # SHARADA LETTER GA +11194;AL # SHARADA LETTER GHA +11195;AL # SHARADA LETTER NGA +11196;AL # SHARADA LETTER CA +11197;AL # SHARADA LETTER CHA +11198;AL # SHARADA LETTER JA +11199;AL # SHARADA LETTER JHA +1119A;AL # SHARADA LETTER NYA +1119B;AL # SHARADA LETTER TTA +1119C;AL # SHARADA LETTER TTHA +1119D;AL # SHARADA LETTER DDA +1119E;AL # SHARADA LETTER DDHA +1119F;AL # SHARADA LETTER NNA +111A0;AL # SHARADA LETTER TA +111A1;AL # SHARADA LETTER THA +111A2;AL # SHARADA LETTER DA +111A3;AL # SHARADA LETTER DHA +111A4;AL # SHARADA LETTER NA +111A5;AL # SHARADA LETTER PA +111A6;AL # SHARADA LETTER PHA +111A7;AL # SHARADA LETTER BA +111A8;AL # SHARADA LETTER BHA +111A9;AL # SHARADA LETTER MA +111AA;AL # SHARADA LETTER YA +111AB;AL # SHARADA LETTER RA +111AC;AL # SHARADA LETTER LA +111AD;AL # SHARADA LETTER LLA +111AE;AL # SHARADA LETTER VA +111AF;AL # SHARADA LETTER SHA +111B0;AL # SHARADA LETTER SSA +111B1;AL # SHARADA LETTER SA +111B2;AL # SHARADA LETTER HA +111B3;CM # SHARADA VOWEL SIGN AA +111B4;CM # SHARADA VOWEL SIGN I +111B5;CM # SHARADA VOWEL SIGN II +111B6;CM # SHARADA VOWEL SIGN U +111B7;CM # SHARADA VOWEL SIGN UU +111B8;CM # SHARADA VOWEL SIGN VOCALIC R +111B9;CM # SHARADA VOWEL SIGN VOCALIC RR +111BA;CM # SHARADA VOWEL SIGN VOCALIC L +111BB;CM # SHARADA VOWEL SIGN VOCALIC LL +111BC;CM # SHARADA VOWEL SIGN E +111BD;CM # SHARADA VOWEL SIGN AI +111BE;CM # SHARADA VOWEL SIGN O +111BF;CM # SHARADA VOWEL SIGN AU +111C0;CM # SHARADA SIGN VIRAMA +111C1;AL # SHARADA SIGN AVAGRAHA +111C2;AL # SHARADA SIGN JIHVAMULIYA +111C3;AL # SHARADA SIGN UPADHMANIYA +111C4;AL # SHARADA OM +111C5;BA # SHARADA DANDA +111C6;BA # SHARADA DOUBLE DANDA +111C7;AL # SHARADA ABBREVIATION SIGN +111C8;BA # SHARADA SEPARATOR +111D0;NU # SHARADA DIGIT ZERO +111D1;NU # SHARADA DIGIT ONE +111D2;NU # SHARADA DIGIT TWO +111D3;NU # SHARADA DIGIT THREE +111D4;NU # SHARADA DIGIT FOUR +111D5;NU # SHARADA DIGIT FIVE +111D6;NU # SHARADA DIGIT SIX +111D7;NU # SHARADA DIGIT SEVEN +111D8;NU # SHARADA DIGIT EIGHT +111D9;NU # SHARADA DIGIT NINE +11680;AL # TAKRI LETTER A +11681;AL # TAKRI LETTER AA +11682;AL # TAKRI LETTER I +11683;AL # TAKRI LETTER II +11684;AL # TAKRI LETTER U +11685;AL # TAKRI LETTER UU +11686;AL # TAKRI LETTER E +11687;AL # TAKRI LETTER AI +11688;AL # TAKRI LETTER O +11689;AL # TAKRI LETTER AU +1168A;AL # TAKRI LETTER KA +1168B;AL # TAKRI LETTER KHA +1168C;AL # TAKRI LETTER GA +1168D;AL # TAKRI LETTER GHA +1168E;AL # TAKRI LETTER NGA +1168F;AL # TAKRI LETTER CA +11690;AL # TAKRI LETTER CHA +11691;AL # TAKRI LETTER JA +11692;AL # TAKRI LETTER JHA +11693;AL # TAKRI LETTER NYA +11694;AL # TAKRI LETTER TTA +11695;AL # TAKRI LETTER TTHA +11696;AL # TAKRI LETTER DDA +11697;AL # TAKRI LETTER DDHA +11698;AL # TAKRI LETTER NNA +11699;AL # TAKRI LETTER TA +1169A;AL # TAKRI LETTER THA +1169B;AL # TAKRI LETTER DA +1169C;AL # TAKRI LETTER DHA +1169D;AL # TAKRI LETTER NA +1169E;AL # TAKRI LETTER PA +1169F;AL # TAKRI LETTER PHA +116A0;AL # TAKRI LETTER BA +116A1;AL # TAKRI LETTER BHA +116A2;AL # TAKRI LETTER MA +116A3;AL # TAKRI LETTER YA +116A4;AL # TAKRI LETTER RA +116A5;AL # TAKRI LETTER LA +116A6;AL # TAKRI LETTER VA +116A7;AL # TAKRI LETTER SHA +116A8;AL # TAKRI LETTER SA +116A9;AL # TAKRI LETTER HA +116AA;AL # TAKRI LETTER RRA +116AB;CM # TAKRI SIGN ANUSVARA +116AC;CM # TAKRI SIGN VISARGA +116AD;CM # TAKRI VOWEL SIGN AA +116AE;CM # TAKRI VOWEL SIGN I +116AF;CM # TAKRI VOWEL SIGN II +116B0;CM # TAKRI VOWEL SIGN U +116B1;CM # TAKRI VOWEL SIGN UU +116B2;CM # TAKRI VOWEL SIGN E +116B3;CM # TAKRI VOWEL SIGN AI +116B4;CM # TAKRI VOWEL SIGN O +116B5;CM # TAKRI VOWEL SIGN AU +116B6;CM # TAKRI SIGN VIRAMA +116B7;CM # TAKRI SIGN NUKTA +116C0;NU # TAKRI DIGIT ZERO +116C1;NU # TAKRI DIGIT ONE +116C2;NU # TAKRI DIGIT TWO +116C3;NU # TAKRI DIGIT THREE +116C4;NU # TAKRI DIGIT FOUR +116C5;NU # TAKRI DIGIT FIVE +116C6;NU # TAKRI DIGIT SIX +116C7;NU # TAKRI DIGIT SEVEN +116C8;NU # TAKRI DIGIT EIGHT +116C9;NU # TAKRI DIGIT NINE 12000;AL # CUNEIFORM SIGN A 12001;AL # CUNEIFORM SIGN A TIMES A 12002;AL # CUNEIFORM SIGN A TIMES BAD @@ -20800,6 +21235,139 @@ FFFD;AI # REPLACEMENT CHARACTER 16A36;AL # BAMUM LETTER PHASE-F KPA 16A37;AL # BAMUM LETTER PHASE-F SAMBA 16A38;AL # BAMUM LETTER PHASE-F VUEQ +16F00;AL # MIAO LETTER PA +16F01;AL # MIAO LETTER BA +16F02;AL # MIAO LETTER YI PA +16F03;AL # MIAO LETTER PLA +16F04;AL # MIAO LETTER MA +16F05;AL # MIAO LETTER MHA +16F06;AL # MIAO LETTER ARCHAIC MA +16F07;AL # MIAO LETTER FA +16F08;AL # MIAO LETTER VA +16F09;AL # MIAO LETTER VFA +16F0A;AL # MIAO LETTER TA +16F0B;AL # MIAO LETTER DA +16F0C;AL # MIAO LETTER YI TTA +16F0D;AL # MIAO LETTER YI TA +16F0E;AL # MIAO LETTER TTA +16F0F;AL # MIAO LETTER DDA +16F10;AL # MIAO LETTER NA +16F11;AL # MIAO LETTER NHA +16F12;AL # MIAO LETTER YI NNA +16F13;AL # MIAO LETTER ARCHAIC NA +16F14;AL # MIAO LETTER NNA +16F15;AL # MIAO LETTER NNHA +16F16;AL # MIAO LETTER LA +16F17;AL # MIAO LETTER LYA +16F18;AL # MIAO LETTER LHA +16F19;AL # MIAO LETTER LHYA +16F1A;AL # MIAO LETTER TLHA +16F1B;AL # MIAO LETTER DLHA +16F1C;AL # MIAO LETTER TLHYA +16F1D;AL # MIAO LETTER DLHYA +16F1E;AL # MIAO LETTER KA +16F1F;AL # MIAO LETTER GA +16F20;AL # MIAO LETTER YI KA +16F21;AL # MIAO LETTER QA +16F22;AL # MIAO LETTER QGA +16F23;AL # MIAO LETTER NGA +16F24;AL # MIAO LETTER NGHA +16F25;AL # MIAO LETTER ARCHAIC NGA +16F26;AL # MIAO LETTER HA +16F27;AL # MIAO LETTER XA +16F28;AL # MIAO LETTER GHA +16F29;AL # MIAO LETTER GHHA +16F2A;AL # MIAO LETTER TSSA +16F2B;AL # MIAO LETTER DZZA +16F2C;AL # MIAO LETTER NYA +16F2D;AL # MIAO LETTER NYHA +16F2E;AL # MIAO LETTER TSHA +16F2F;AL # MIAO LETTER DZHA +16F30;AL # MIAO LETTER YI TSHA +16F31;AL # MIAO LETTER YI DZHA +16F32;AL # MIAO LETTER REFORMED TSHA +16F33;AL # MIAO LETTER SHA +16F34;AL # MIAO LETTER SSA +16F35;AL # MIAO LETTER ZHA +16F36;AL # MIAO LETTER ZSHA +16F37;AL # MIAO LETTER TSA +16F38;AL # MIAO LETTER DZA +16F39;AL # MIAO LETTER YI TSA +16F3A;AL # MIAO LETTER SA +16F3B;AL # MIAO LETTER ZA +16F3C;AL # MIAO LETTER ZSA +16F3D;AL # MIAO LETTER ZZA +16F3E;AL # MIAO LETTER ZZSA +16F3F;AL # MIAO LETTER ARCHAIC ZZA +16F40;AL # MIAO LETTER ZZYA +16F41;AL # MIAO LETTER ZZSYA +16F42;AL # MIAO LETTER WA +16F43;AL # MIAO LETTER AH +16F44;AL # MIAO LETTER HHA +16F50;AL # MIAO LETTER NASALIZATION +16F51;CM # MIAO SIGN ASPIRATION +16F52;CM # MIAO SIGN REFORMED VOICING +16F53;CM # MIAO SIGN REFORMED ASPIRATION +16F54;CM # MIAO VOWEL SIGN A +16F55;CM # MIAO VOWEL SIGN AA +16F56;CM # MIAO VOWEL SIGN AHH +16F57;CM # MIAO VOWEL SIGN AN +16F58;CM # MIAO VOWEL SIGN ANG +16F59;CM # MIAO VOWEL SIGN O +16F5A;CM # MIAO VOWEL SIGN OO +16F5B;CM # MIAO VOWEL SIGN WO +16F5C;CM # MIAO VOWEL SIGN W +16F5D;CM # MIAO VOWEL SIGN E +16F5E;CM # MIAO VOWEL SIGN EN +16F5F;CM # MIAO VOWEL SIGN ENG +16F60;CM # MIAO VOWEL SIGN OEY +16F61;CM # MIAO VOWEL SIGN I +16F62;CM # MIAO VOWEL SIGN IA +16F63;CM # MIAO VOWEL SIGN IAN +16F64;CM # MIAO VOWEL SIGN IANG +16F65;CM # MIAO VOWEL SIGN IO +16F66;CM # MIAO VOWEL SIGN IE +16F67;CM # MIAO VOWEL SIGN II +16F68;CM # MIAO VOWEL SIGN IU +16F69;CM # MIAO VOWEL SIGN ING +16F6A;CM # MIAO VOWEL SIGN U +16F6B;CM # MIAO VOWEL SIGN UA +16F6C;CM # MIAO VOWEL SIGN UAN +16F6D;CM # MIAO VOWEL SIGN UANG +16F6E;CM # MIAO VOWEL SIGN UU +16F6F;CM # MIAO VOWEL SIGN UEI +16F70;CM # MIAO VOWEL SIGN UNG +16F71;CM # MIAO VOWEL SIGN Y +16F72;CM # MIAO VOWEL SIGN YI +16F73;CM # MIAO VOWEL SIGN AE +16F74;CM # MIAO VOWEL SIGN AEE +16F75;CM # MIAO VOWEL SIGN ERR +16F76;CM # MIAO VOWEL SIGN ROUNDED ERR +16F77;CM # MIAO VOWEL SIGN ER +16F78;CM # MIAO VOWEL SIGN ROUNDED ER +16F79;CM # MIAO VOWEL SIGN AI +16F7A;CM # MIAO VOWEL SIGN EI +16F7B;CM # MIAO VOWEL SIGN AU +16F7C;CM # MIAO VOWEL SIGN OU +16F7D;CM # MIAO VOWEL SIGN N +16F7E;CM # MIAO VOWEL SIGN NG +16F8F;CM # MIAO TONE RIGHT +16F90;CM # MIAO TONE TOP RIGHT +16F91;CM # MIAO TONE ABOVE +16F92;CM # MIAO TONE BELOW +16F93;AL # MIAO LETTER TONE-2 +16F94;AL # MIAO LETTER TONE-3 +16F95;AL # MIAO LETTER TONE-4 +16F96;AL # MIAO LETTER TONE-5 +16F97;AL # MIAO LETTER TONE-6 +16F98;AL # MIAO LETTER TONE-7 +16F99;AL # MIAO LETTER TONE-8 +16F9A;AL # MIAO LETTER REFORMED TONE-1 +16F9B;AL # MIAO LETTER REFORMED TONE-2 +16F9C;AL # MIAO LETTER REFORMED TONE-4 +16F9D;AL # MIAO LETTER REFORMED TONE-5 +16F9E;AL # MIAO LETTER REFORMED TONE-6 +16F9F;AL # MIAO LETTER REFORMED TONE-8 1B000;ID # KATAKANA LETTER ARCHAIC E 1B001;ID # HIRAGANA LETTER ARCHAIC YE 1D000;AL # BYZANTINE MUSICAL SYMBOL PSILI @@ -22439,6 +23007,149 @@ FFFD;AI # REPLACEMENT CHARACTER 1D7FD;NU # MATHEMATICAL MONOSPACE DIGIT SEVEN 1D7FE;NU # MATHEMATICAL MONOSPACE DIGIT EIGHT 1D7FF;NU # MATHEMATICAL MONOSPACE DIGIT NINE +1EE00;AL # ARABIC MATHEMATICAL ALEF +1EE01;AL # ARABIC MATHEMATICAL BEH +1EE02;AL # ARABIC MATHEMATICAL JEEM +1EE03;AL # ARABIC MATHEMATICAL DAL +1EE05;AL # ARABIC MATHEMATICAL WAW +1EE06;AL # ARABIC MATHEMATICAL ZAIN +1EE07;AL # ARABIC MATHEMATICAL HAH +1EE08;AL # ARABIC MATHEMATICAL TAH +1EE09;AL # ARABIC MATHEMATICAL YEH +1EE0A;AL # ARABIC MATHEMATICAL KAF +1EE0B;AL # ARABIC MATHEMATICAL LAM +1EE0C;AL # ARABIC MATHEMATICAL MEEM +1EE0D;AL # ARABIC MATHEMATICAL NOON +1EE0E;AL # ARABIC MATHEMATICAL SEEN +1EE0F;AL # ARABIC MATHEMATICAL AIN +1EE10;AL # ARABIC MATHEMATICAL FEH +1EE11;AL # ARABIC MATHEMATICAL SAD +1EE12;AL # ARABIC MATHEMATICAL QAF +1EE13;AL # ARABIC MATHEMATICAL REH +1EE14;AL # ARABIC MATHEMATICAL SHEEN +1EE15;AL # ARABIC MATHEMATICAL TEH +1EE16;AL # ARABIC MATHEMATICAL THEH +1EE17;AL # ARABIC MATHEMATICAL KHAH +1EE18;AL # ARABIC MATHEMATICAL THAL +1EE19;AL # ARABIC MATHEMATICAL DAD +1EE1A;AL # ARABIC MATHEMATICAL ZAH +1EE1B;AL # ARABIC MATHEMATICAL GHAIN +1EE1C;AL # ARABIC MATHEMATICAL DOTLESS BEH +1EE1D;AL # ARABIC MATHEMATICAL DOTLESS NOON +1EE1E;AL # ARABIC MATHEMATICAL DOTLESS FEH +1EE1F;AL # ARABIC MATHEMATICAL DOTLESS QAF +1EE21;AL # ARABIC MATHEMATICAL INITIAL BEH +1EE22;AL # ARABIC MATHEMATICAL INITIAL JEEM +1EE24;AL # ARABIC MATHEMATICAL INITIAL HEH +1EE27;AL # ARABIC MATHEMATICAL INITIAL HAH +1EE29;AL # ARABIC MATHEMATICAL INITIAL YEH +1EE2A;AL # ARABIC MATHEMATICAL INITIAL KAF +1EE2B;AL # ARABIC MATHEMATICAL INITIAL LAM +1EE2C;AL # ARABIC MATHEMATICAL INITIAL MEEM +1EE2D;AL # ARABIC MATHEMATICAL INITIAL NOON +1EE2E;AL # ARABIC MATHEMATICAL INITIAL SEEN +1EE2F;AL # ARABIC MATHEMATICAL INITIAL AIN +1EE30;AL # ARABIC MATHEMATICAL INITIAL FEH +1EE31;AL # ARABIC MATHEMATICAL INITIAL SAD +1EE32;AL # ARABIC MATHEMATICAL INITIAL QAF +1EE34;AL # ARABIC MATHEMATICAL INITIAL SHEEN +1EE35;AL # ARABIC MATHEMATICAL INITIAL TEH +1EE36;AL # ARABIC MATHEMATICAL INITIAL THEH +1EE37;AL # ARABIC MATHEMATICAL INITIAL KHAH +1EE39;AL # ARABIC MATHEMATICAL INITIAL DAD +1EE3B;AL # ARABIC MATHEMATICAL INITIAL GHAIN +1EE42;AL # ARABIC MATHEMATICAL TAILED JEEM +1EE47;AL # ARABIC MATHEMATICAL TAILED HAH +1EE49;AL # ARABIC MATHEMATICAL TAILED YEH +1EE4B;AL # ARABIC MATHEMATICAL TAILED LAM +1EE4D;AL # ARABIC MATHEMATICAL TAILED NOON +1EE4E;AL # ARABIC MATHEMATICAL TAILED SEEN +1EE4F;AL # ARABIC MATHEMATICAL TAILED AIN +1EE51;AL # ARABIC MATHEMATICAL TAILED SAD +1EE52;AL # ARABIC MATHEMATICAL TAILED QAF +1EE54;AL # ARABIC MATHEMATICAL TAILED SHEEN +1EE57;AL # ARABIC MATHEMATICAL TAILED KHAH +1EE59;AL # ARABIC MATHEMATICAL TAILED DAD +1EE5B;AL # ARABIC MATHEMATICAL TAILED GHAIN +1EE5D;AL # ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F;AL # ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61;AL # ARABIC MATHEMATICAL STRETCHED BEH +1EE62;AL # ARABIC MATHEMATICAL STRETCHED JEEM +1EE64;AL # ARABIC MATHEMATICAL STRETCHED HEH +1EE67;AL # ARABIC MATHEMATICAL STRETCHED HAH +1EE68;AL # ARABIC MATHEMATICAL STRETCHED TAH +1EE69;AL # ARABIC MATHEMATICAL STRETCHED YEH +1EE6A;AL # ARABIC MATHEMATICAL STRETCHED KAF +1EE6C;AL # ARABIC MATHEMATICAL STRETCHED MEEM +1EE6D;AL # ARABIC MATHEMATICAL STRETCHED NOON +1EE6E;AL # ARABIC MATHEMATICAL STRETCHED SEEN +1EE6F;AL # ARABIC MATHEMATICAL STRETCHED AIN +1EE70;AL # ARABIC MATHEMATICAL STRETCHED FEH +1EE71;AL # ARABIC MATHEMATICAL STRETCHED SAD +1EE72;AL # ARABIC MATHEMATICAL STRETCHED QAF +1EE74;AL # ARABIC MATHEMATICAL STRETCHED SHEEN +1EE75;AL # ARABIC MATHEMATICAL STRETCHED TEH +1EE76;AL # ARABIC MATHEMATICAL STRETCHED THEH +1EE77;AL # ARABIC MATHEMATICAL STRETCHED KHAH +1EE79;AL # ARABIC MATHEMATICAL STRETCHED DAD +1EE7A;AL # ARABIC MATHEMATICAL STRETCHED ZAH +1EE7B;AL # ARABIC MATHEMATICAL STRETCHED GHAIN +1EE7C;AL # ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E;AL # ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80;AL # ARABIC MATHEMATICAL LOOPED ALEF +1EE81;AL # ARABIC MATHEMATICAL LOOPED BEH +1EE82;AL # ARABIC MATHEMATICAL LOOPED JEEM +1EE83;AL # ARABIC MATHEMATICAL LOOPED DAL +1EE84;AL # ARABIC MATHEMATICAL LOOPED HEH +1EE85;AL # ARABIC MATHEMATICAL LOOPED WAW +1EE86;AL # ARABIC MATHEMATICAL LOOPED ZAIN +1EE87;AL # ARABIC MATHEMATICAL LOOPED HAH +1EE88;AL # ARABIC MATHEMATICAL LOOPED TAH +1EE89;AL # ARABIC MATHEMATICAL LOOPED YEH +1EE8B;AL # ARABIC MATHEMATICAL LOOPED LAM +1EE8C;AL # ARABIC MATHEMATICAL LOOPED MEEM +1EE8D;AL # ARABIC MATHEMATICAL LOOPED NOON +1EE8E;AL # ARABIC MATHEMATICAL LOOPED SEEN +1EE8F;AL # ARABIC MATHEMATICAL LOOPED AIN +1EE90;AL # ARABIC MATHEMATICAL LOOPED FEH +1EE91;AL # ARABIC MATHEMATICAL LOOPED SAD +1EE92;AL # ARABIC MATHEMATICAL LOOPED QAF +1EE93;AL # ARABIC MATHEMATICAL LOOPED REH +1EE94;AL # ARABIC MATHEMATICAL LOOPED SHEEN +1EE95;AL # ARABIC MATHEMATICAL LOOPED TEH +1EE96;AL # ARABIC MATHEMATICAL LOOPED THEH +1EE97;AL # ARABIC MATHEMATICAL LOOPED KHAH +1EE98;AL # ARABIC MATHEMATICAL LOOPED THAL +1EE99;AL # ARABIC MATHEMATICAL LOOPED DAD +1EE9A;AL # ARABIC MATHEMATICAL LOOPED ZAH +1EE9B;AL # ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK BEH +1EEA2;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK JEEM +1EEA3;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK WAW +1EEA6;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK ZAIN +1EEA7;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK HAH +1EEA8;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK TAH +1EEA9;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK LAM +1EEAC;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK MEEM +1EEAD;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK NOON +1EEAE;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK SEEN +1EEAF;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK AIN +1EEB0;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK FEH +1EEB1;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK SAD +1EEB2;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK QAF +1EEB3;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK REH +1EEB4;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK SHEEN +1EEB5;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK TEH +1EEB6;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK THEH +1EEB7;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK KHAH +1EEB8;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK THAL +1EEB9;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK DAD +1EEBA;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK ZAH +1EEBB;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1EEF0;AL # ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL +1EEF1;AL # ARABIC MATHEMATICAL OPERATOR HAH WITH DAL 1F000;AL # MAHJONG TILE EAST WIND 1F001;AL # MAHJONG TILE SOUTH WIND 1F002;AL # MAHJONG TILE WEST WIND @@ -22742,6 +23453,8 @@ FFFD;AI # REPLACEMENT CHARACTER 1F167;AI # NEGATIVE CIRCLED LATIN CAPITAL LETTER X 1F168;AI # NEGATIVE CIRCLED LATIN CAPITAL LETTER Y 1F169;AI # NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F16A;AL # RAISED MC SIGN +1F16B;AL # RAISED MD SIGN 1F170;AI # NEGATIVE SQUARED LATIN CAPITAL LETTER A 1F171;AI # NEGATIVE SQUARED LATIN CAPITAL LETTER B 1F172;AI # NEGATIVE SQUARED LATIN CAPITAL LETTER C @@ -23368,6 +24081,10 @@ FFFD;AI # REPLACEMENT CHARACTER 1F53B;AL # DOWN-POINTING RED TRIANGLE 1F53C;AL # UP-POINTING SMALL RED TRIANGLE 1F53D;AL # DOWN-POINTING SMALL RED TRIANGLE +1F540;AL # CIRCLED CROSS POMMEE +1F541;AL # CROSS POMMEE WITH HALF-CIRCLE BELOW +1F542;AL # CROSS POMMEE +1F543;AL # NOTCHED LEFT SEMICIRCLE WITH THREE DOTS 1F550;AL # CLOCK FACE ONE OCLOCK 1F551;AL # CLOCK FACE TWO OCLOCK 1F552;AL # CLOCK FACE THREE OCLOCK @@ -23397,6 +24114,7 @@ FFFD;AI # REPLACEMENT CHARACTER 1F5FD;AL # STATUE OF LIBERTY 1F5FE;AL # SILHOUETTE OF JAPAN 1F5FF;AL # MOYAI +1F600;AL # GRINNING FACE 1F601;AL # GRINNING FACE WITH SMILING EYES 1F602;AL # FACE WITH TEARS OF JOY 1F603;AL # SMILING FACE WITH OPEN MOUTH @@ -23413,30 +24131,42 @@ FFFD;AI # REPLACEMENT CHARACTER 1F60E;AL # SMILING FACE WITH SUNGLASSES 1F60F;AL # SMIRKING FACE 1F610;AL # NEUTRAL FACE +1F611;AL # EXPRESSIONLESS FACE 1F612;AL # UNAMUSED FACE 1F613;AL # FACE WITH COLD SWEAT 1F614;AL # PENSIVE FACE +1F615;AL # CONFUSED FACE 1F616;AL # CONFOUNDED FACE +1F617;AL # KISSING FACE 1F618;AL # FACE THROWING A KISS +1F619;AL # KISSING FACE WITH SMILING EYES 1F61A;AL # KISSING FACE WITH CLOSED EYES +1F61B;AL # FACE WITH STUCK-OUT TONGUE 1F61C;AL # FACE WITH STUCK-OUT TONGUE AND WINKING EYE 1F61D;AL # FACE WITH STUCK-OUT TONGUE AND TIGHTLY-CLOSED EYES 1F61E;AL # DISAPPOINTED FACE +1F61F;AL # WORRIED FACE 1F620;AL # ANGRY FACE 1F621;AL # POUTING FACE 1F622;AL # CRYING FACE 1F623;AL # PERSEVERING FACE 1F624;AL # FACE WITH LOOK OF TRIUMPH 1F625;AL # DISAPPOINTED BUT RELIEVED FACE +1F626;AL # FROWNING FACE WITH OPEN MOUTH +1F627;AL # ANGUISHED FACE 1F628;AL # FEARFUL FACE 1F629;AL # WEARY FACE 1F62A;AL # SLEEPY FACE 1F62B;AL # TIRED FACE +1F62C;AL # GRIMACING FACE 1F62D;AL # LOUDLY CRYING FACE +1F62E;AL # FACE WITH OPEN MOUTH +1F62F;AL # HUSHED FACE 1F630;AL # FACE WITH OPEN MOUTH AND COLD SWEAT 1F631;AL # FACE SCREAMING IN FEAR 1F632;AL # ASTONISHED FACE 1F633;AL # FLUSHED FACE +1F634;AL # SLEEPING FACE 1F635;AL # DIZZY FACE 1F636;AL # FACE WITHOUT MOUTH 1F637;AL # FACE WITH MEDICAL MASK diff --git a/lib/unicore/NameAliases.txt b/lib/unicore/NameAliases.txt index caa462f4be..3992620096 100644 --- a/lib/unicore/NameAliases.txt +++ b/lib/unicore/NameAliases.txt @@ -1,40 +1,508 @@ -# NameAliases-6.0.0.txt -# Date: 2010-05-10, 11:58:00 PDT [KW] +# NameAliases-6.1.0.txt +# Date: 2012-01-03, 21:52:00 GMT [KW] # # This file is a normative contributory data file in the # Unicode Character Database. # -# Copyright (c) 2005-2010 Unicode, Inc. +# Copyright (c) 2005-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # # This file defines the formal name aliases for Unicode characters. # # For informative aliases see NamesList.txt # +# The formal name aliases are divided into five types. +# +# 1. Corrections for serious problems in the character names +# 2. ISO 6429 names for C0 and C1 control functions, and other +# commonly occurring names for control codes +# 3. A few widely used alternate names for format characters +# 4. Several documented labels for C1 control code points which +# were never actually approved in any standard +# 5. Commonly occurring abbreviations (or acronyms) for control codes, +# format characters, spaces, and variation selectors +# +# The formal name aliases are part of the Unicode character namespace, which +# includes the character names and the names of named character sequences. +# The inclusion of ISO 6429 names and other commonly occurring names and +# abbreviations for control codes and format characters as formal name aliases +# is to help avoid name collisions between Unicode character names and the +# labels which commonly appear in text and/or in implementations such as regex, for +# control codes (which have no Unicode character name) or for format characters. +# # For documentation, see NamesList.html and http://www.unicode.org/reports/tr44/ # # FORMAT # -# Each line has two fields -# First field: Code point +# Each line has three fields, as described here: +# +# First field: Code point # Second field: Alias +# Third field: Type +# +# The Type labels used are: correction, control, alternate, figment, abbreviation +# +# Those Type labels can be mapped to other strings for display, if desired. # # In case multiple aliases are assigned, additional aliases -# would be provided on separate lines +# are provided on separate lines. Parsers of this data file should +# take note that the same code point can (and does) occur more than once. # #----------------------------------------------------------------- -01A2;LATIN CAPITAL LETTER GHA -01A3;LATIN SMALL LETTER GHA -0CDE;KANNADA LETTER LLLA -0E9D;LAO LETTER FO FON -0E9F;LAO LETTER FO FAY -0EA3;LAO LETTER RO -0EA5;LAO LETTER LO -0FD0;TIBETAN MARK BKA- SHOG GI MGO RGYAN -A015;YI SYLLABLE ITERATION MARK -FE18;PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET -1D0C5;BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS -# Total code points: 11 +0000;NULL;control +0000;NUL;abbreviation +0001;START OF HEADING;control +0001;SOH;abbreviation +0002;START OF TEXT;control +0002;STX;abbreviation +0003;END OF TEXT;control +0003;ETX;abbreviation +0004;END OF TRANSMISSION;control +0004;EOT;abbreviation +0005;ENQUIRY;control +0005;ENQ;abbreviation +0006;ACKNOWLEDGE;control +0006;ACK;abbreviation + +# Note that no formal name alias for the ISO 6429 "BELL" is +# provided for U+0007, because of the existing name collision +# with U+1F514 BELL. + +0007;ALERT;control +0007;BEL;abbreviation +0008;BACKSPACE;control +0008;BS;abbreviation +0009;CHARACTER TABULATION;control +0009;HORIZONTAL TABULATION;control +0009;HT;abbreviation +0009;TAB;abbreviation +000A;LINE FEED;control +000A;NEW LINE;control +000A;END OF LINE;control +000A;LF;abbreviation +000A;NL;abbreviation +000A;EOL;abbreviation +000B;LINE TABULATION;control +000B;VERTICAL TABULATION;control +000B;VT;abbreviation +000C;FORM FEED;control +000C;FF;abbreviation +000D;CARRIAGE RETURN;control +000D;CR;abbreviation +000E;SHIFT OUT;control +000E;LOCKING-SHIFT ONE;control +000E;SO;abbreviation +000F;SHIFT IN;control +000F;LOCKING-SHIFT ZERO;control +000F;SI;abbreviation +0010;DATA LINK ESCAPE;control +0010;DLE;abbreviation +0011;DEVICE CONTROL ONE;control +0011;DC1;abbreviation +0012;DEVICE CONTROL TWO;control +0012;DC2;abbreviation +0013;DEVICE CONTROL THREE;control +0013;DC3;abbreviation +0014;DEVICE CONTROL FOUR;control +0014;DC4;abbreviation +0015;NEGATIVE ACKNOWLEDGE;control +0015;NAK;abbreviation +0016;SYNCHRONOUS IDLE;control +0016;SYN;abbreviation +0017;END OF TRANSMISSION BLOCK;control +0017;ETB;abbreviation +0018;CANCEL;control +0018;CAN;abbreviation +0019;END OF MEDIUM;control +0019;EOM;abbreviation +001A;SUBSTITUTE;control +001A;SUB;abbreviation +001B;ESCAPE;control +001B;ESC;abbreviation +001C;INFORMATION SEPARATOR FOUR;control +001C;FILE SEPARATOR;control +001C;FS;abbreviation +001D;INFORMATION SEPARATOR THREE;control +001D;GROUP SEPARATOR;control +001D;GS;abbreviation +001E;INFORMATION SEPARATOR TWO;control +001E;RECORD SEPARATOR;control +001E;RS;abbreviation +001F;INFORMATION SEPARATOR ONE;control +001F;UNIT SEPARATOR;control +001F;US;abbreviation +0020;SP;abbreviation +007F;DELETE;control +007F;DEL;abbreviation +0080;PADDING CHARACTER;figment +0080;PAD;abbreviation +0081;HIGH OCTET PRESET;figment +0081;HOP;abbreviation +0082;BREAK PERMITTED HERE;control +0082;BPH;abbreviation +0083;NO BREAK HERE;control +0083;NBH;abbreviation +0084;INDEX;control +0084;IND;abbreviation +0085;NEXT LINE;control +0085;NEL;abbreviation +0086;START OF SELECTED AREA;control +0086;SSA;abbreviation +0087;END OF SELECTED AREA;control +0087;ESA;abbreviation +0088;CHARACTER TABULATION SET;control +0088;HORIZONTAL TABULATION SET;control +0088;HTS;abbreviation +0089;CHARACTER TABULATION WITH JUSTIFICATION;control +0089;HORIZONTAL TABULATION WITH JUSTIFICATION;control +0089;HTJ;abbreviation +008A;LINE TABULATION SET;control +008A;VERTICAL TABULATION SET;control +008A;VTS;abbreviation +008B;PARTIAL LINE FORWARD;control +008B;PARTIAL LINE DOWN;control +008B;PLD;abbreviation +008C;PARTIAL LINE BACKWARD;control +008C;PARTIAL LINE UP;control +008C;PLU;abbreviation +008D;REVERSE LINE FEED;control +008D;REVERSE INDEX;control +008D;RI;abbreviation +008E;SINGLE SHIFT TWO;control +008E;SINGLE-SHIFT-2;control +008E;SS2;abbreviation +008F;SINGLE SHIFT THREE;control +008F;SINGLE-SHIFT-3;control +008F;SS3;abbreviation +0090;DEVICE CONTROL STRING;control +0090;DCS;abbreviation +0091;PRIVATE USE ONE;control +0091;PRIVATE USE-1;control +0091;PU1;abbreviation +0092;PRIVATE USE TWO;control +0092;PRIVATE USE-2;control +0092;PU2;abbreviation +0093;SET TRANSMIT STATE;control +0093;STS;abbreviation +0094;CANCEL CHARACTER;control +0094;CCH;abbreviation +0095;MESSAGE WAITING;control +0095;MW;abbreviation +0096;START OF GUARDED AREA;control +0096;START OF PROTECTED AREA;control +0096;SPA;abbreviation +0097;END OF GUARDED AREA;control +0097;END OF PROTECTED AREA;control +0097;EPA;abbreviation +0098;START OF STRING;control +0098;SOS;abbreviation +0099;SINGLE GRAPHIC CHARACTER INTRODUCER;figment +0099;SGC;abbreviation +009A;SINGLE CHARACTER INTRODUCER;control +009A;SCI;abbreviation +009B;CONTROL SEQUENCE INTRODUCER;control +009B;CSI;abbreviation +009C;STRING TERMINATOR;control +009C;ST;abbreviation +009D;OPERATING SYSTEM COMMAND;control +009D;OSC;abbreviation +009E;PRIVACY MESSAGE;control +009E;PM;abbreviation +009F;APPLICATION PROGRAM COMMAND;control +009F;APC;abbreviation +00A0;NBSP;abbreviation +00AD;SHY;abbreviation +01A2;LATIN CAPITAL LETTER GHA;correction +01A3;LATIN SMALL LETTER GHA;correction +034F;CGJ;abbreviation +0CDE;KANNADA LETTER LLLA;correction +0E9D;LAO LETTER FO FON;correction +0E9F;LAO LETTER FO FAY;correction +0EA3;LAO LETTER RO;correction +0EA5;LAO LETTER LO;correction +0FD0;TIBETAN MARK BKA- SHOG GI MGO RGYAN;correction +180B;FVS1;abbreviation +180C;FVS2;abbreviation +180D;FVS3;abbreviation +180E;MVS;abbreviation +200B;ZWSP;abbreviation +200C;ZWNJ;abbreviation +200D;ZWJ;abbreviation +200E;LRM;abbreviation +200F;RLM;abbreviation +202A;LRE;abbreviation +202B;RLE;abbreviation +202C;PDF;abbreviation +202D;LRO;abbreviation +202E;RLO;abbreviation +202F;NNBSP;abbreviation +205F;MMSP;abbreviation +2060;WJ;abbreviation +2118;WEIERSTRASS ELLIPTIC FUNCTION;correction +2448;MICR ON US SYMBOL;correction +2449;MICR DASH SYMBOL;correction +A015;YI SYLLABLE ITERATION MARK;correction +FE18;PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET;correction +FE00;VS1;abbreviation +FE01;VS2;abbreviation +FE02;VS3;abbreviation +FE03;VS4;abbreviation +FE04;VS5;abbreviation +FE05;VS6;abbreviation +FE06;VS7;abbreviation +FE07;VS8;abbreviation +FE08;VS9;abbreviation +FE09;VS10;abbreviation +FE0A;VS11;abbreviation +FE0B;VS12;abbreviation +FE0C;VS13;abbreviation +FE0D;VS14;abbreviation +FE0E;VS15;abbreviation +FE0F;VS16;abbreviation +FEFF;BYTE ORDER MARK;alternate +FEFF;BOM;abbreviation +FEFF;ZWNBSP;abbreviation +1D0C5;BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS;correction +E0100;VS17;abbreviation +E0101;VS18;abbreviation +E0102;VS19;abbreviation +E0103;VS20;abbreviation +E0104;VS21;abbreviation +E0105;VS22;abbreviation +E0106;VS23;abbreviation +E0107;VS24;abbreviation +E0108;VS25;abbreviation +E0109;VS26;abbreviation +E010A;VS27;abbreviation +E010B;VS28;abbreviation +E010C;VS29;abbreviation +E010D;VS30;abbreviation +E010E;VS31;abbreviation +E010F;VS32;abbreviation +E0110;VS33;abbreviation +E0111;VS34;abbreviation +E0112;VS35;abbreviation +E0113;VS36;abbreviation +E0114;VS37;abbreviation +E0115;VS38;abbreviation +E0116;VS39;abbreviation +E0117;VS40;abbreviation +E0118;VS41;abbreviation +E0119;VS42;abbreviation +E011A;VS43;abbreviation +E011B;VS44;abbreviation +E011C;VS45;abbreviation +E011D;VS46;abbreviation +E011E;VS47;abbreviation +E011F;VS48;abbreviation +E0120;VS49;abbreviation +E0121;VS50;abbreviation +E0122;VS51;abbreviation +E0123;VS52;abbreviation +E0124;VS53;abbreviation +E0125;VS54;abbreviation +E0126;VS55;abbreviation +E0127;VS56;abbreviation +E0128;VS57;abbreviation +E0129;VS58;abbreviation +E012A;VS59;abbreviation +E012B;VS60;abbreviation +E012C;VS61;abbreviation +E012D;VS62;abbreviation +E012E;VS63;abbreviation +E012F;VS64;abbreviation +E0130;VS65;abbreviation +E0131;VS66;abbreviation +E0132;VS67;abbreviation +E0133;VS68;abbreviation +E0134;VS69;abbreviation +E0135;VS70;abbreviation +E0136;VS71;abbreviation +E0137;VS72;abbreviation +E0138;VS73;abbreviation +E0139;VS74;abbreviation +E013A;VS75;abbreviation +E013B;VS76;abbreviation +E013C;VS77;abbreviation +E013D;VS78;abbreviation +E013E;VS79;abbreviation +E013F;VS80;abbreviation +E0140;VS81;abbreviation +E0141;VS82;abbreviation +E0142;VS83;abbreviation +E0143;VS84;abbreviation +E0144;VS85;abbreviation +E0145;VS86;abbreviation +E0146;VS87;abbreviation +E0147;VS88;abbreviation +E0148;VS89;abbreviation +E0149;VS90;abbreviation +E014A;VS91;abbreviation +E014B;VS92;abbreviation +E014C;VS93;abbreviation +E014D;VS94;abbreviation +E014E;VS95;abbreviation +E014F;VS96;abbreviation +E0150;VS97;abbreviation +E0151;VS98;abbreviation +E0152;VS99;abbreviation +E0153;VS100;abbreviation +E0154;VS101;abbreviation +E0155;VS102;abbreviation +E0156;VS103;abbreviation +E0157;VS104;abbreviation +E0158;VS105;abbreviation +E0159;VS106;abbreviation +E015A;VS107;abbreviation +E015B;VS108;abbreviation +E015C;VS109;abbreviation +E015D;VS110;abbreviation +E015E;VS111;abbreviation +E015F;VS112;abbreviation +E0160;VS113;abbreviation +E0161;VS114;abbreviation +E0162;VS115;abbreviation +E0163;VS116;abbreviation +E0164;VS117;abbreviation +E0165;VS118;abbreviation +E0166;VS119;abbreviation +E0167;VS120;abbreviation +E0168;VS121;abbreviation +E0169;VS122;abbreviation +E016A;VS123;abbreviation +E016B;VS124;abbreviation +E016C;VS125;abbreviation +E016D;VS126;abbreviation +E016E;VS127;abbreviation +E016F;VS128;abbreviation +E0170;VS129;abbreviation +E0171;VS130;abbreviation +E0172;VS131;abbreviation +E0173;VS132;abbreviation +E0174;VS133;abbreviation +E0175;VS134;abbreviation +E0176;VS135;abbreviation +E0177;VS136;abbreviation +E0178;VS137;abbreviation +E0179;VS138;abbreviation +E017A;VS139;abbreviation +E017B;VS140;abbreviation +E017C;VS141;abbreviation +E017D;VS142;abbreviation +E017E;VS143;abbreviation +E017F;VS144;abbreviation +E0180;VS145;abbreviation +E0181;VS146;abbreviation +E0182;VS147;abbreviation +E0183;VS148;abbreviation +E0184;VS149;abbreviation +E0185;VS150;abbreviation +E0186;VS151;abbreviation +E0187;VS152;abbreviation +E0188;VS153;abbreviation +E0189;VS154;abbreviation +E018A;VS155;abbreviation +E018B;VS156;abbreviation +E018C;VS157;abbreviation +E018D;VS158;abbreviation +E018E;VS159;abbreviation +E018F;VS160;abbreviation +E0190;VS161;abbreviation +E0191;VS162;abbreviation +E0192;VS163;abbreviation +E0193;VS164;abbreviation +E0194;VS165;abbreviation +E0195;VS166;abbreviation +E0196;VS167;abbreviation +E0197;VS168;abbreviation +E0198;VS169;abbreviation +E0199;VS170;abbreviation +E019A;VS171;abbreviation +E019B;VS172;abbreviation +E019C;VS173;abbreviation +E019D;VS174;abbreviation +E019E;VS175;abbreviation +E019F;VS176;abbreviation +E01A0;VS177;abbreviation +E01A1;VS178;abbreviation +E01A2;VS179;abbreviation +E01A3;VS180;abbreviation +E01A4;VS181;abbreviation +E01A5;VS182;abbreviation +E01A6;VS183;abbreviation +E01A7;VS184;abbreviation +E01A8;VS185;abbreviation +E01A9;VS186;abbreviation +E01AA;VS187;abbreviation +E01AB;VS188;abbreviation +E01AC;VS189;abbreviation +E01AD;VS190;abbreviation +E01AE;VS191;abbreviation +E01AF;VS192;abbreviation +E01B0;VS193;abbreviation +E01B1;VS194;abbreviation +E01B2;VS195;abbreviation +E01B3;VS196;abbreviation +E01B4;VS197;abbreviation +E01B5;VS198;abbreviation +E01B6;VS199;abbreviation +E01B7;VS200;abbreviation +E01B8;VS201;abbreviation +E01B9;VS202;abbreviation +E01BA;VS203;abbreviation +E01BB;VS204;abbreviation +E01BC;VS205;abbreviation +E01BD;VS206;abbreviation +E01BE;VS207;abbreviation +E01BF;VS208;abbreviation +E01C0;VS209;abbreviation +E01C1;VS210;abbreviation +E01C2;VS211;abbreviation +E01C3;VS212;abbreviation +E01C4;VS213;abbreviation +E01C5;VS214;abbreviation +E01C6;VS215;abbreviation +E01C7;VS216;abbreviation +E01C8;VS217;abbreviation +E01C9;VS218;abbreviation +E01CA;VS219;abbreviation +E01CB;VS220;abbreviation +E01CC;VS221;abbreviation +E01CD;VS222;abbreviation +E01CE;VS223;abbreviation +E01CF;VS224;abbreviation +E01D0;VS225;abbreviation +E01D1;VS226;abbreviation +E01D2;VS227;abbreviation +E01D3;VS228;abbreviation +E01D4;VS229;abbreviation +E01D5;VS230;abbreviation +E01D6;VS231;abbreviation +E01D7;VS232;abbreviation +E01D8;VS233;abbreviation +E01D9;VS234;abbreviation +E01DA;VS235;abbreviation +E01DB;VS236;abbreviation +E01DC;VS237;abbreviation +E01DD;VS238;abbreviation +E01DE;VS239;abbreviation +E01DF;VS240;abbreviation +E01E0;VS241;abbreviation +E01E1;VS242;abbreviation +E01E2;VS243;abbreviation +E01E3;VS244;abbreviation +E01E4;VS245;abbreviation +E01E5;VS246;abbreviation +E01E6;VS247;abbreviation +E01E7;VS248;abbreviation +E01E8;VS249;abbreviation +E01E9;VS250;abbreviation +E01EA;VS251;abbreviation +E01EB;VS252;abbreviation +E01EC;VS253;abbreviation +E01ED;VS254;abbreviation +E01EE;VS255;abbreviation +E01EF;VS256;abbreviation # EOF diff --git a/lib/unicore/NamedSequences.txt b/lib/unicore/NamedSequences.txt index 0c270410df..e14c39505b 100644 --- a/lib/unicore/NamedSequences.txt +++ b/lib/unicore/NamedSequences.txt @@ -1,8 +1,8 @@ -# NamedSequences-6.0.0.txt -# Date: 2010-05-18, 10:48:00 PDT [KW] +# NamedSequences-6.1.0.txt +# Date: 2011-07-26, 19:47:00 GMT [KW] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # @@ -431,6 +431,13 @@ TAMIL SYLLABLE KSSAU; 0B95 0BCD 0BB7 0BCC TAMIL SYLLABLE SHRII; 0BB6 0BCD 0BB0 0BC0 +# Sinhala medial consonants and "reph" form +# Provisional 2010-05-13, Approved 2011-08-05 + +SINHALA CONSONANT SIGN YANSAYA;0DCA 200D 0DBA +SINHALA CONSONANT SIGN RAKAARAANSAYA;0DCA 200D 0DBB +SINHALA CONSONANT SIGN REPAYA;0DBB 0DCA 200D + GEORGIAN LETTER U-BRJGU;10E3 0302 KHMER CONSONANT SIGN COENG KA;17D2 1780 KHMER CONSONANT SIGN COENG KHA;17D2 1781 diff --git a/lib/unicore/NamedSqProv.txt b/lib/unicore/NamedSqProv.txt index 9658de8451..c7561948f8 100644 --- a/lib/unicore/NamedSqProv.txt +++ b/lib/unicore/NamedSqProv.txt @@ -1,8 +1,8 @@ -# NamedSequencesProv-6.0.0.txt -# Date: 2010-05-18, 10:49:00 PDT [KW] +# NamedSequencesProv-6.1.0.txt +# Date: 2011-07-26, 19:46:00 GMT [KW] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # @@ -34,12 +34,7 @@ # Provisional entries for NamedSequences.txt. -# Sinhala medial consonants and "reph" form -# Added to provisional named sequences, 2010-05-13 - -SINHALA CONSONANT SIGN YANSAYA;0DCA 200D 0DBA -SINHALA CONSONANT SIGN RAKAARAANSAYA;0DCA 200D 0DBB -SINHALA CONSONANT SIGN REPAYA;0DBB 0DCA 200D +# There are currently no provisional named sequences. # ================================================ diff --git a/lib/unicore/NamesList.txt b/lib/unicore/NamesList.txt index 4f698c7339..19ecbdd55a 100644 --- a/lib/unicore/NamesList.txt +++ b/lib/unicore/NamesList.txt @@ -1,13 +1,13 @@ -@@@ The Unicode Standard 6.0 -@@@+ U60M100817.lst - Final Unicode 6.0 names list. +@@@ The Unicode Standard 6.1 +@@@+ U61M111117.lst + Final Unicode 6.1 names list. This file is semi-automatically derived from UnicodeData.txt and a set of manually created annotations using a script to select or suppress information from the data file. The rules used for this process are aimed at readability for the human reader, at the expense of some details; therefore, this file should not be parsed for machine-readable information. -@+ Copyright (c) 1991-2010 Unicode, Inc. +@+ Copyright (c) 1991-2012 Unicode, Inc. For terms of use, see http://www.unicode.org/terms_of_use.html @@ 0000 C0 Controls and Basic Latin (Basic Latin) 007F @@+ @@ -120,7 +120,7 @@ 0024 DOLLAR SIGN = milreis, escudo * glyph may have one or two vertical bars - * other currency symbol characters: 20A0-20B8 + * other currency symbol characters: 20A0-20B9 x (currency sign - 00A4) x (heavy dollar sign - 1F4B2) 0025 PERCENT SIGN @@ -466,7 +466,7 @@ x (lira sign - 20A4) x (roman semuncia sign - 10192) 00A4 CURRENCY SIGN - * other currency symbol characters: 20A0-20B5 + * other currency symbol characters: 20A0-20B9 x (dollar sign - 0024) 00A5 YEN SIGN = yuan sign @@ -551,6 +551,7 @@ x (bullet operator - 2219) x (dot operator - 22C5) x (word separator middle dot - 2E31) + x (raised dot - 2E33) x (katakana middle dot - 30FB) 00B8 CEDILLA * this is a spacing character @@ -657,7 +658,7 @@ = Eszett * German * uppercase is "SS" - * in origin a ligature of 017F and 0073 + * typographically the glyph for this character can be based on a ligature of 017F with either 0073 or with an old-style glyph for 007A (the latter similar in appearance to 0292). Both forms exist interchangeably today. x (greek small letter beta - 03B2) x (latin capital letter sharp s - 1E9E) 00E0 LATIN SMALL LETTER A WITH GRAVE @@ -848,6 +849,7 @@ * there are three major glyph variants : 0067 0327 0124 LATIN CAPITAL LETTER H WITH CIRCUMFLEX + * lowercase in Nawdm is 0266 : 0048 0302 0125 LATIN SMALL LETTER H WITH CIRCUMFLEX * Esperanto @@ -1747,10 +1749,12 @@ * uppercase is A78D 0266 LATIN SMALL LETTER H WITH HOOK * breathy-voiced glottal fricative + * uppercase is A7AA + * uppercase in Nawdm is 0124 x (modifier letter small h with hook - 02B1) 0267 LATIN SMALL LETTER HENG WITH HOOK * voiceless coarticulated velar and palatoalveolar fricative - * "tj" or "kj" or "sj" in some Swedish dialects + * "sj" in some Swedish dialects 0268 LATIN SMALL LETTER I WITH STROKE = barred i, i bar * high central unrounded vowel @@ -3405,6 +3409,8 @@ x (colon - 003A) 058A ARMENIAN HYPHEN = yentamna +@ Currency symbol +058F ARMENIAN DRAM SIGN @@ 0590 Hebrew 05FF @ Cantillation marks 0591 HEBREW ACCENT ETNAHTA @@ -3550,6 +3556,8 @@ 0601 ARABIC SIGN SANAH 0602 ARABIC FOOTNOTE MARKER 0603 ARABIC SIGN SAFHA +0604 ARABIC SIGN SAMVAT + * used for writing Samvat era dates in Urdu @ Radix symbols 0606 ARABIC-INDIC CUBE ROOT x (cube root - 221B) @@ -3568,6 +3576,7 @@ 060C ARABIC COMMA * also used with Thaana and Syriac in modern text x (comma - 002C) + x (turned comma - 2E32) 060D ARABIC DATE SEPARATOR @ Poetic marks 060E ARABIC POETIC VERSE SIGN @@ -3602,6 +3611,7 @@ 061B ARABIC SEMICOLON * also used with Thaana and Syriac in modern text x (semicolon - 003B) + x (turned semicolon - 2E35) 061E ARABIC TRIPLE DOT PUNCTUATION MARK 061F ARABIC QUESTION MARK * also used with Thaana and Syriac in modern text @@ -3610,6 +3620,7 @@ @ Addition for Kashmiri 0620 ARABIC LETTER KASHMIRI YEH @ Based on ISO 8859-6 +@+ Arabic letter names follow romanization conventions derived from ISO 8859-6. These differ from the Literary Arabic pronunciation of the letter names. For example, U+0628 ARABIC LETTER BEH has a Literary Arabic pronunciation of ba'. 0621 ARABIC LETTER HAMZA x (modifier letter right half ring - 02BE) 0622 ARABIC LETTER ALEF WITH MADDA ABOVE @@ -3666,7 +3677,11 @@ 0648 ARABIC LETTER WAW 0649 ARABIC LETTER ALEF MAKSURA * represents YEH-shaped letter with no dots in any positional form + * not intended for use in combination with 0654 + x (arabic letter yeh with hamza above - 0626) 064A ARABIC LETTER YEH + * loses its dots when used in combination with 0654 + * retains its dots when used in combination with other combining marks @ Points from ISO 8859-6 064B ARABIC FATHATAN 064C ARABIC DAMMATAN @@ -3683,6 +3698,8 @@ @ Combining maddah and hamza 0653 ARABIC MADDAH ABOVE 0654 ARABIC HAMZA ABOVE + * not restricted to hamza semantics + * may also occur as a diacritic forming new letters 0655 ARABIC HAMZA BELOW @ Other combining marks 0656 ARABIC SUBSCRIPT ALEF @@ -3739,8 +3756,12 @@ * Koranic Arabic 0672 ARABIC LETTER ALEF WITH WAVY HAMZA ABOVE * Baluchi, Kashmiri +@ Deprecated letter 0673 ARABIC LETTER ALEF WITH WAVY HAMZA BELOW * Kashmiri + * this character is deprecated and its use is strongly discouraged + * use the sequence 0627 065F instead +@ Extended Arabic letters 0674 ARABIC LETTER HIGH HAMZA * Kazakh * forms digraphs @@ -4461,6 +4482,60 @@ 085B MANDAIC GEMINATION MARK @ Punctuation 085E MANDAIC PUNCTUATION +@@ 08A0 Arabic Extended-A 08FF +@ Extended Arabic letters for African languages +08A0 ARABIC LETTER BEH WITH SMALL V BELOW +08A2 ARABIC LETTER JEEM WITH TWO DOTS ABOVE +08A3 ARABIC LETTER TAH WITH TWO DOTS ABOVE +08A4 ARABIC LETTER FEH WITH DOT BELOW AND THREE DOTS ABOVE +08A5 ARABIC LETTER QAF WITH DOT BELOW +08A6 ARABIC LETTER LAM WITH DOUBLE BAR +08A7 ARABIC LETTER MEEM WITH THREE DOTS ABOVE +08A8 ARABIC LETTER YEH WITH TWO DOTS BELOW AND HAMZA ABOVE +08A9 ARABIC LETTER YEH WITH TWO DOTS BELOW AND DOT ABOVE +@ Dependent consonants for Rohingya +08AA ARABIC LETTER REH WITH LOOP + = bottya-reh +08AB ARABIC LETTER WAW WITH DOT WITHIN + = nota-wa +08AC ARABIC LETTER ROHINGYA YEH + = bottya-yeh +@ Extended vowel signs for Rohingya +08E4 ARABIC CURLY FATHA +08E5 ARABIC CURLY DAMMA +08E6 ARABIC CURLY KASRA +08E7 ARABIC CURLY FATHATAN +08E8 ARABIC CURLY DAMMATAN +08E9 ARABIC CURLY KASRATAN +@ Tone marks for Rohingya +08EA ARABIC TONE ONE DOT ABOVE +08EB ARABIC TONE TWO DOTS ABOVE +08EC ARABIC TONE LOOP ABOVE +08ED ARABIC TONE ONE DOT BELOW +08EE ARABIC TONE TWO DOTS BELOW +08EF ARABIC TONE LOOP BELOW +@ Koranic annotation signs +08F0 ARABIC OPEN FATHATAN + = successive fathatan +08F1 ARABIC OPEN DAMMATAN + = successive dammatan +08F2 ARABIC OPEN KASRATAN + = successive kasratan +08F3 ARABIC SMALL HIGH WAW +@ Extended vowel signs for African languages +08F4 ARABIC FATHA WITH RING +08F5 ARABIC FATHA WITH DOT ABOVE +08F6 ARABIC KASRA WITH DOT BELOW + * also used in Philippine languages +08F7 ARABIC LEFT ARROWHEAD ABOVE +08F8 ARABIC RIGHT ARROWHEAD ABOVE +08F9 ARABIC LEFT ARROWHEAD BELOW +08FA ARABIC RIGHT ARROWHEAD BELOW +08FB ARABIC DOUBLE RIGHT ARROWHEAD ABOVE +08FC ARABIC DOUBLE RIGHT ARROWHEAD ABOVE WITH DOT +08FD ARABIC RIGHT ARROWHEAD ABOVE WITH DOT +@ Extended vowel sign for Philippine languages +08FE ARABIC DAMMA WITH DOT @@ 0900 Devanagari 097F @@+ @ Various signs @@ -4591,6 +4666,7 @@ x (combining grave accent - 0300) 0954 DEVANAGARI ACUTE ACCENT x (combining acute accent - 0301) +@ Dependent vowel sign 0955 DEVANAGARI VOWEL SIGN CANDRA LONG E * used in transliteration of Avestan @ Dependent vowel signs for Kashmiri @@ -4775,14 +4851,14 @@ 09ED BENGALI DIGIT SEVEN 09EE BENGALI DIGIT EIGHT 09EF BENGALI DIGIT NINE -@ Bengali-specific additions +@ Additions for Assamese 09F0 BENGALI LETTER RA WITH MIDDLE DIAGONAL - * Assamese 09F1 BENGALI LETTER RA WITH LOWER DIAGONAL = bengali letter va with lower diagonal (1.0) - * Assamese +@ Currency signs 09F2 BENGALI RUPEE MARK 09F3 BENGALI RUPEE SIGN +@ Historic symbols for fractional values 09F4 BENGALI CURRENCY NUMERATOR ONE * not in current usage 09F5 BENGALI CURRENCY NUMERATOR TWO @@ -4792,7 +4868,9 @@ 09F7 BENGALI CURRENCY NUMERATOR FOUR 09F8 BENGALI CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR 09F9 BENGALI CURRENCY DENOMINATOR SIXTEEN +@ Sign 09FA BENGALI ISSHAR +@ Currency sign 09FB BENGALI GANDA MARK @@ 0A00 Gurmukhi 0A7F @ Various signs @@ -5015,8 +5093,11 @@ 0AED GUJARATI DIGIT SEVEN 0AEE GUJARATI DIGIT EIGHT 0AEF GUJARATI DIGIT NINE +@ Abbreviation sign +0AF0 GUJARATI ABBREVIATION SIGN @ Currency sign 0AF1 GUJARATI RUPEE SIGN + * preferred spelling is 0AB0 0AC2 0AF0 @@ 0B00 Oriya 0B7F @ Various signs 0B01 ORIYA SIGN CANDRABINDU @@ -5133,8 +5214,9 @@ 0B6D ORIYA DIGIT SEVEN 0B6E ORIYA DIGIT EIGHT 0B6F ORIYA DIGIT NINE -@ Oriya-specific additions +@ Sign 0B70 ORIYA ISSHAR +@ Additional consonant 0B71 ORIYA LETTER WA x (oriya letter o - 0B13) x (oriya letter va - 0B35) @@ -5483,12 +5565,9 @@ 0CEF KANNADA DIGIT NINE @ Signs used in Sanskrit 0CF1 KANNADA SIGN JIHVAMULIYA - * marks a velar fricative occurring only before unvoiced velar stops - x (tibetan sign lce tsa can - 0F88) + x (vedic sign jihvamuliya - 1CF5) 0CF2 KANNADA SIGN UPADHMANIYA - * marks a bilabial fricative occurring only before unvoiced labial stops - x (tibetan sign mchu can - 0F89) - x (vedic sign ardhavisarga - 1CF2) + x (vedic sign upadhmaniya - 1CF6) @@ 0D00 Malayalam 0D7F @ Various signs 0D02 MALAYALAM SIGN ANUSVARA @@ -6075,6 +6154,9 @@ # 0EAB 0E99 0EDD LAO HO MO # 0EAB 0EA1 +@ Consonants for Khmu +0EDE LAO LETTER KHMU GO +0EDF LAO LETTER KHMU NYO @@ 0F00 Tibetan 0FFF @+ The Tibetan script is called the Bodhi script in Bhutan. @ Syllable @@ -6290,10 +6372,10 @@ 0F87 TIBETAN SIGN YANG RTAGS @ Transliteration head letters 0F88 TIBETAN SIGN LCE TSA CAN - x (kannada sign jihvamuliya - 0CF1) + x (vedic sign jihvamuliya - 1CF5) x (mongolian letter ali gali damaru - 1882) 0F89 TIBETAN SIGN MCHU CAN - x (kannada sign upadhmaniya - 0CF2) + x (vedic sign upadhmaniya - 1CF6) x (mongolian letter ali gali inverted ubadama - 1884) 0F8A TIBETAN SIGN GRU CAN RGYINGS * always followed by 0F82 @@ -6663,6 +6745,9 @@ 10C3 GEORGIAN CAPITAL LETTER WE 10C4 GEORGIAN CAPITAL LETTER HAR 10C5 GEORGIAN CAPITAL LETTER HOE +@ Additional letters for Ossetian +10C7 GEORGIAN CAPITAL LETTER YN +10CD GEORGIAN CAPITAL LETTER AEN @ Mkhedruli @+ This is the modern secular alphabet, which is caseless. 10D0 GEORGIAN LETTER AN @@ -6716,6 +6801,10 @@ @ Modifier letter 10FC MODIFIER LETTER GEORGIAN NAR # <super> 10DC +@ Additional letters for Ossetian and Abkhaz +10FD GEORGIAN LETTER AEN +10FE GEORGIAN LETTER HARD SIGN +10FF GEORGIAN LETTER LABIAL SIGN @@ 1100 Hangul Jamo 11FF @+ The aliases in this block represent the Jamo short names. @ Initial consonants @@ -6739,15 +6828,21 @@ = BB 1109 HANGUL CHOSEONG SIOS = S + * voiceless lenis alveolar fricative 110A HANGUL CHOSEONG SSANGSIOS = SS + * voiceless fortis alveolar fricative 110B HANGUL CHOSEONG IEUNG + * zero sound 110C HANGUL CHOSEONG CIEUC = J + * voiceless or voiced lenis alveolar affricate 110D HANGUL CHOSEONG SSANGCIEUC = JJ + * voiceless unaspirated fortis alveolar affricate 110E HANGUL CHOSEONG CHIEUCH = C + * voiceless aspirated alveolar affricate 110F HANGUL CHOSEONG KHIEUKH = K 1110 HANGUL CHOSEONG THIEUTH @@ -6756,6 +6851,7 @@ = P 1112 HANGUL CHOSEONG HIEUH = H + * voiceless glottal fricative 1113 HANGUL CHOSEONG NIEUN-KIYEOK 1114 HANGUL CHOSEONG SSANGNIEUN 1115 HANGUL CHOSEONG NIEUN-TIKEUT @@ -6798,10 +6894,15 @@ 113A HANGUL CHOSEONG SIOS-PHIEUPH 113B HANGUL CHOSEONG SIOS-HIEUH 113C HANGUL CHOSEONG CHITUEUMSIOS + * voiceless lenis dental fricative 113D HANGUL CHOSEONG CHITUEUMSSANGSIOS + * voicless fortis dental fricative 113E HANGUL CHOSEONG CEONGCHIEUMSIOS + * voiceless lenis retroflex fricative 113F HANGUL CHOSEONG CEONGCHIEUMSSANGSIOS + * voiceless fortis retroflex fricative 1140 HANGUL CHOSEONG PANSIOS + * voiced alveolar fricative 1141 HANGUL CHOSEONG IEUNG-KIYEOK 1142 HANGUL CHOSEONG IEUNG-TIKEUT 1143 HANGUL CHOSEONG IEUNG-MIEUM @@ -6814,19 +6915,27 @@ 114A HANGUL CHOSEONG IEUNG-THIEUTH 114B HANGUL CHOSEONG IEUNG-PHIEUPH 114C HANGUL CHOSEONG YESIEUNG + * velar nasal consonant 114D HANGUL CHOSEONG CIEUC-IEUNG 114E HANGUL CHOSEONG CHITUEUMCIEUC + * voiceless or voiced lenis dental affricate 114F HANGUL CHOSEONG CHITUEUMSSANGCIEUC + * voiceless unaspirated fortis dental affricate 1150 HANGUL CHOSEONG CEONGCHIEUMCIEUC + * voiceless or voiced lenis retroflex affricate 1151 HANGUL CHOSEONG CEONGCHIEUMSSANGCIEUC + * voiceless unaspirated fortis retroflex affricate 1152 HANGUL CHOSEONG CHIEUCH-KHIEUKH 1153 HANGUL CHOSEONG CHIEUCH-HIEUH 1154 HANGUL CHOSEONG CHITUEUMCHIEUCH + * voiceless aspirated dental affricate 1155 HANGUL CHOSEONG CEONGCHIEUMCHIEUCH + * voiceless aspirated retroflex affricate 1156 HANGUL CHOSEONG PHIEUPH-PIEUP 1157 HANGUL CHOSEONG KAPYEOUNPHIEUPH 1158 HANGUL CHOSEONG SSANGHIEUH 1159 HANGUL CHOSEONG YEORINHIEUH + * glottal stop 115A HANGUL CHOSEONG KIYEOK-TIKEUT 115B HANGUL CHOSEONG NIEUN-SIOS 115C HANGUL CHOSEONG NIEUN-CIEUC @@ -6918,6 +7027,7 @@ 119C HANGUL JUNGSEONG I-EU 119D HANGUL JUNGSEONG I-ARAEA 119E HANGUL JUNGSEONG ARAEA + * rounded open-mid back vowel 119F HANGUL JUNGSEONG ARAEA-EO 11A0 HANGUL JUNGSEONG ARAEA-U 11A1 HANGUL JUNGSEONG ARAEA-I @@ -6970,6 +7080,7 @@ = SS 11BC HANGUL JONGSEONG IEUNG = NG + * velar nasal consonant 11BD HANGUL JONGSEONG CIEUC = J 11BE HANGUL JONGSEONG CHIEUCH @@ -8599,7 +8710,7 @@ * 17B1 is the normal variant of this vowel 17B3 KHMER INDEPENDENT VOWEL QAU @ Inherent vowels -@+ These are for phonetic transcription to distinguish Indic language inherent vowels from Khmer inherent vowels. These characters are included solely for compatibility with particular applications; their use in other contexts is discouraged. +@+ These are invisible combining marks for phonetic transcription to distinguish Indic language inherent vowels from Khmer inherent vowels. These characters are included solely for compatibility with particular applications; their use in other contexts is discouraged. 17B4 KHMER VOWEL INHERENT AQ 17B5 KHMER VOWEL INHERENT AA @ Dependent vowel signs @@ -9338,6 +9449,8 @@ 1A2C TAI THAM LETTER NYA 1A2D TAI THAM LETTER RATA 1A2E TAI THAM LETTER HIGH RATHA + * an alternative glyph with the upper part shaped like 1A33 is used in Thailand and Laos + * contrast the sequence 1A2D 1A5B 1A2F TAI THAM LETTER DA 1A30 TAI THAM LETTER LOW RATHA 1A31 TAI THAM LETTER RANA @@ -9744,10 +9857,17 @@ = e 1BA9 SUNDANESE VOWEL SIGN PANEULEUNG = eu -@ Virama +@ Viramas 1BAA SUNDANESE SIGN PAMAAEH = virama * does not form conjuncts +1BAB SUNDANESE SIGN VIRAMA + * forms conjuncts in older orthography +@ Consonant signs +1BAC SUNDANESE CONSONANT SIGN PASANGAN MA + = subjoined ma +1BAD SUNDANESE CONSONANT SIGN PASANGAN WA + = subjoined wa @ Additional consonants 1BAE SUNDANESE LETTER KHA 1BAF SUNDANESE LETTER SYA @@ -9762,11 +9882,21 @@ 1BB7 SUNDANESE DIGIT SEVEN 1BB8 SUNDANESE DIGIT EIGHT 1BB9 SUNDANESE DIGIT NINE +@ Sign +1BBA SUNDANESE AVAGRAHA +@ Historic letters +1BBB SUNDANESE LETTER REU + * vocalic r +1BBC SUNDANESE LETTER LEU + * vocalic l +1BBD SUNDANESE LETTER BHA +1BBE SUNDANESE LETTER FINAL K +1BBF SUNDANESE LETTER FINAL M @@ 1BC0 Batak 1BFF @ Letters @+ Annotations for letters indicate different usage among the various alphabets sharing the Batak script. 1BC0 BATAK LETTER A - * letter a or ha for Karo and Pakpak + * letter a or ha for Karo and Pakpak 1BC1 BATAK LETTER SIMALUNGUN A 1BC2 BATAK LETTER HA * Toba letter ha or ka @@ -9989,6 +10119,20 @@ @ Punctuation 1C7E OL CHIKI PUNCTUATION MUCAAD 1C7F OL CHIKI PUNCTUATION DOUBLE MUCAAD +@@ 1CC0 Sundanese Supplement 1CCF +@ Punctuation +1CC0 SUNDANESE PUNCTUATION BINDU SURYA + * sun +1CC1 SUNDANESE PUNCTUATION BINDU PANGLONG + * half moon +1CC2 SUNDANESE PUNCTUATION BINDU PURNAMA + * full moon +1CC3 SUNDANESE PUNCTUATION BINDU CAKRA + * wheel +1CC4 SUNDANESE PUNCTUATION BINDU LEU SATANGA +1CC5 SUNDANESE PUNCTUATION BINDU KA SATANGA +1CC6 SUNDANESE PUNCTUATION BINDU DA SATANGA +1CC7 SUNDANESE PUNCTUATION BINDU BA SATANGA @@ 1CD0 Vedic Extensions 1CFF @ Tone marks for the Samaveda 1CD0 VEDIC TONE KARSHANA @@ -10074,8 +10218,18 @@ @+ Ardhavisarga denotes the sounds jihvamuliya and upadhmaniya (velar and bilabial voicelss fricatives) in Sanskrit. Its use is not limited to Vedic. 1CF2 VEDIC SIGN ARDHAVISARGA = vaidika jihvaamuuliiya upadhmaaniiya +1CF3 VEDIC SIGN ROTATED ARDHAVISARGA +@ Sign for Yajurvedic +1CF4 VEDIC TONE CANDRA ABOVE +@ Signs +1CF5 VEDIC SIGN JIHVAMULIYA + * marks a velar fricative occurring only before unvoiced velar stops x (kannada sign jihvamuliya - 0CF1) + x (tibetan sign lce tsa can - 0F88) +1CF6 VEDIC SIGN UPADHMANIYA + * marks a bilabial fricative occurring only before unvoiced labial stops x (kannada sign upadhmaniya - 0CF2) + x (tibetan sign mchu can - 0F89) @@ 1D00 Phonetic Extensions 1D7F @+ These are non-IPA phonetic extensions, mostly for the Uralic Phonetic Alphabet (UPA). @+ The small capitals, superscript, and subscript forms are for phonetic representations where style variations are semantically important. @@ -11594,6 +11748,7 @@ 2013 EN DASH 2014 EM DASH * may be used in pairs to offset parenthetical text + x (two-em dash - 2E3A) x (katakana-hiragana prolonged sound mark - 30FC) 2015 HORIZONTAL BAR = quotation dash @@ -11608,6 +11763,8 @@ x (low line - 005F) x (combining double low line - 0333) # 0020 0333 +@ Quotation marks and apostrophe +@+ Use of quotation marks differs by language. The character names cannot reflect actual usage for all languages. 2018 LEFT SINGLE QUOTATION MARK = single turned comma quotation mark * this is the preferred character (as opposed to 201B) @@ -11646,8 +11803,10 @@ 201F DOUBLE HIGH-REVERSED-9 QUOTATION MARK = double reversed comma quotation mark * has same semantic as 201C, but differs in appearance +@ General punctuation 2020 DAGGER = obelisk, obelus, long cross + x (turned dagger - 2E38) 2021 DOUBLE DAGGER = diesis, double obelisk 2022 BULLET @@ -11674,6 +11833,7 @@ x (presentation form for vertical horizontal ellipsis - FE19) # 002E 002E 002E 2027 HYPHENATION POINT + * visible symbol used to indicate correct positions for word breaking, as in dic·tion·ar·ies @ Format characters 2028 LINE SEPARATOR * may be used to represent this semantic unambiguously @@ -11733,6 +11893,7 @@ 2038 CARET x (up arrowhead - 2303) x (modifier letter low circumflex accent - A788) +@ Quotation marks 2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK = left pointing single guillemet * usually opening, sometimes closing @@ -11745,6 +11906,7 @@ x (greater-than sign - 003E) x (right-pointing angle bracket - 232A) x (right angle bracket - 3009) +@ General punctuation 203B REFERENCE MARK = Japanese kome = Urdu paragraph separator @@ -12061,8 +12223,8 @@ x (box drawings down single and horizontal double - 2564) x (postal mark - 3012) 20B9 INDIAN RUPEE SIGN - * official Rupee currency sign for India - * contrasts with script-specific Rupee signs and abbreviations + * official rupee currency sign for India + * contrasts with script-specific rupee signs and abbreviations x (devanagari letter ra - 0930) @@ 20D0 Combining Diacritical Marks for Symbols 20FF @ Combining diacritical marks for symbols @@ -12205,7 +12367,7 @@ x (copyright sign - 00A9) x (circled latin capital letter p - 24C5) 2118 SCRIPT CAPITAL P - = Weierstrass elliptic function + % WEIERSTRASS ELLIPTIC FUNCTION * actually this has the form of a lowercase calligraphic p, despite its name 2119 DOUBLE-STRUCK CAPITAL P # <font> 0050 latin capital letter p @@ -12640,16 +12802,18 @@ x (north east white arrow - 2B00) 21E7 UPWARDS WHITE ARROW = shift + = level 2 select (ISO 9995-7) 21E8 RIGHTWARDS WHITE ARROW + = group select (ISO 9995-7) 21E9 DOWNWARDS WHITE ARROW 21EA UPWARDS WHITE ARROW FROM BAR = caps lock 21EB UPWARDS WHITE ARROW ON PEDESTAL = level 2 lock 21EC UPWARDS WHITE ARROW ON PEDESTAL WITH HORIZONTAL BAR - = caps lock + = capitals (caps) lock 21ED UPWARDS WHITE ARROW ON PEDESTAL WITH VERTICAL BAR - = numerics lock + = numeric lock 21EE UPWARDS WHITE DOUBLE ARROW = level 3 select 21EF UPWARDS WHITE DOUBLE ARROW ON PEDESTAL @@ -12750,8 +12914,11 @@ * generic division operator x (solidus - 002F) x (fraction slash - 2044) + x (mathematical rising diagonal - 27CB) 2216 SET MINUS x (reverse solidus - 005C) + x (mathematical falling diagonal - 27CD) + x (reverse solidus operator - 29F5) 2217 ASTERISK OPERATOR x (asterisk - 002A) 2218 RING OPERATOR @@ -12781,7 +12948,7 @@ 2221 MEASURED ANGLE 2222 SPHERICAL ANGLE = angle arc -@ Operators +@ Relations 2223 DIVIDES = such that = APL stile @@ -13263,6 +13430,7 @@ x (equal and parallel to - 22D5) 2318 PLACE OF INTEREST SIGN = command key (1.0) + = operating system key (ISO 9995-7) 2319 TURNED NOT SIGN = line marker @ User interface symbols @@ -13686,21 +13854,28 @@ @+ * from ISO 2047 x (arabic question mark - 061F) @@ 2440 Optical Character Recognition 245F -@ OCR +@ OCR-A 2440 OCR HOOK 2441 OCR CHAIR 2442 OCR FORK 2443 OCR INVERTED FORK 2444 OCR BELT BUCKLE 2445 OCR BOW TIE + = unique asterisk x (bowtie - 22C8) +@ MICR +@+ These magnetic ink character recognition symbols are used on checks. The are derived from the E-13B font and are standardized in ISO 1004:1995. The Unicode character names include several misnomers. 2446 OCR BRANCH BANK IDENTIFICATION = transit 2447 OCR AMOUNT OF CHECK + = amount 2448 OCR DASH + % MICR ON US SYMBOL = on us 2449 OCR CUSTOMER ACCOUNT NUMBER + % MICR DASH SYMBOL = dash +@ OCR 244A OCR DOUBLE BACKSLASH @@ 2460 Enclosed Alphanumerics 24FF @ Circled numbers @@ -14249,6 +14424,7 @@ 25A8 SQUARE WITH UPPER RIGHT TO LOWER LEFT FILL 25A9 SQUARE WITH DIAGONAL CROSSHATCH FILL 25AA BLACK SMALL SQUARE + = square bullet x (black very small square - 2B1D) 25AB WHITE SMALL SQUARE x (white very small square - 2B1E) @@ -14710,6 +14886,7 @@ = legal term, jurisprudence 2697 ALEMBIC = chemical term, chemistry + x (alchemical symbol for retort - 1F76D) 2698 FLOWER = botanical term x (flower punctuation mark - 2055) @@ -15246,12 +15423,21 @@ 27CA VERTICAL BAR WITH HORIZONTAL STROKE x (parallel with horizontal stroke - 2AF2) x (triple vertical bar with horizontal stroke - 2AF5) +@ Miscellaneous symbol +27CB MATHEMATICAL RISING DIAGONAL + = \diagup + x (division slash - 2215) @ Division operator 27CC LONG DIVISION * graphically extends over the dividend x (division sign - 00F7) x (division slash - 2215) x (square root - 221A) +@ Miscellaneous symbol +27CD MATHEMATICAL FALLING DIAGONAL + = \diagdown + x (set minus - 2216) + x (reverse solidus operator - 29F5) @ Operators 27CE SQUARED LOGICAL AND = box min @@ -15318,6 +15504,7 @@ 27E5 WHITE SQUARE WITH RIGHTWARDS TICK = will always be (modal operator) @ Mathematical brackets +@+ These bracket characters are also used as punctuation outside of a mathematical context. 27E6 MATHEMATICAL LEFT WHITE SQUARE BRACKET = z notation left bag bracket x (left white square bracket - 301A) @@ -16032,6 +16219,7 @@ 29F5 REVERSE SOLIDUS OPERATOR x (reverse solidus - 005C) x (set minus - 2216) + x (mathematical falling diagonal - 27CD) 29F6 SOLIDUS WITH OVERBAR 29F7 REVERSE SOLIDUS WITH HORIZONTAL STROKE x (apl functional symbol backslash bar - 2340) @@ -16911,6 +17099,11 @@ 2CF1 COPTIC COMBINING SPIRITUS LENIS x (combining comma above - 0313) x (combining cyrillic psili pneumata - 0486) +@ Bohairic Coptic letters +2CF2 COPTIC CAPITAL LETTER BOHAIRIC KHEI + x (coptic capital letter khei - 03E6) +2CF3 COPTIC SMALL LETTER BOHAIRIC KHEI + x (coptic small letter khei - 03E7) @ Old Nubian punctuation 2CF9 COPTIC OLD NUBIAN FULL STOP 2CFA COPTIC OLD NUBIAN DIRECT QUESTION MARK @@ -16962,6 +17155,9 @@ 2D23 GEORGIAN SMALL LETTER WE 2D24 GEORGIAN SMALL LETTER HAR 2D25 GEORGIAN SMALL LETTER HOE +@ Additional letters for Ossetian +2D27 GEORGIAN SMALL LETTER YN +2D2D GEORGIAN SMALL LETTER AEN @@ 2D30 Tifinagh 2D7F @ Letters 2D30 TIFINAGH LETTER YA @@ -17022,6 +17218,8 @@ 2D64 TIFINAGH LETTER TAWELLEMET YAZ = harpoon yaz 2D65 TIFINAGH LETTER YAZZ +2D66 TIFINAGH LETTER YE +2D67 TIFINAGH LETTER YO @ Modifier letter 2D6F TIFINAGH MODIFIER LETTER LABIALIZATION MARK = tamatart @@ -17265,6 +17463,36 @@ 2E31 WORD SEPARATOR MIDDLE DOT * used in Avestan, Samaritan, ... x (middle dot - 00B7) +@ Palaeotype transliteration symbol +2E32 TURNED COMMA + * indicates nasalization + x (arabic comma - 060C) +@ Historic punctuation +2E33 RAISED DOT + * glyph position intermediate between 002E and 00B7 + x (full stop - 002E) + x (middle dot - 00B7) +2E34 RAISED COMMA + x (comma - 002C) +@ Palaeotype transliteration symbols +2E35 TURNED SEMICOLON + * indicates sudden glottal closure + x (arabic semicolon - 061B) +2E36 DAGGER WITH LEFT GUARD + * indicates retracted pronunciation +2E37 DAGGER WITH RIGHT GUARD + * indicates advanced pronunciation +2E38 TURNED DAGGER + * indicates retroflex pronunciation + x (dagger - 2020) +2E39 TOP HALF SECTION SIGN + * indicates pronunciation on one side of the mouth only + x (section sign - 00A7) +@ Dashes +2E3A TWO-EM DASH + = omission dash + x (em dash - 2014) +2E3B THREE-EM DASH @@ 2E80 CJK Radicals Supplement 2EFF @ CJK radicals supplement 2E80 CJK RADICAL REPEAT @@ -18499,6 +18727,7 @@ 3146 HANGUL LETTER SSANGSIOS # 110A hangul choseong ssangsios 3147 HANGUL LETTER IEUNG + * zero sound as initial or velar nasal consonant as final # 110B hangul choseong ieung 3148 HANGUL LETTER CIEUC # 110C hangul choseong cieuc @@ -18513,6 +18742,7 @@ 314D HANGUL LETTER PHIEUPH # 1111 hangul choseong phieuph 314E HANGUL LETTER HIEUH + * voiceless glottal fricative # 1112 hangul choseong hieuh 314F HANGUL LETTER A # 1161 hangul jungseong a @@ -18614,12 +18844,13 @@ 317E HANGUL LETTER SIOS-CIEUC # 1136 hangul choseong sios-cieuc 317F HANGUL LETTER PANSIOS + * voiced alveolar fricative # 1140 hangul choseong pansios 3180 HANGUL LETTER SSANGIEUNG = ssangyesieung # 1147 hangul choseong ssangieung 3181 HANGUL LETTER YESIEUNG - * old velar nasal + * velar nasal consonant # 114C hangul choseong yesieung 3182 HANGUL LETTER YESIEUNG-SIOS # 11F1 hangul jongseong yesieung-sios @@ -18630,7 +18861,7 @@ 3185 HANGUL LETTER SSANGHIEUH # 1158 hangul choseong ssanghieuh 3186 HANGUL LETTER YEORINHIEUH - * old glottal stop + * glottal stop # 1159 hangul choseong yeorinhieuh 3187 HANGUL LETTER YO-YA # 1184 hangul jungseong yo-ya @@ -18645,6 +18876,7 @@ 318C HANGUL LETTER YU-I # 1194 hangul jungseong yu-i 318D HANGUL LETTER ARAEA + * rounded open-mid back vowel # 119E hangul jungseong araea 318E HANGUL LETTER ARAEAE # 11A1 hangul jungseong araea-i @@ -19770,6 +20002,8 @@ # <square> 0047 0048 007A 3394 SQUARE THZ # <square> 0054 0048 007A +@ Abbreviations involving liter symbols +@+ The glyphs for these squared abbreviations may use the SI symbol for liter, "l" or "L", instead of a script l. 3395 SQUARE MU L # <square> 03BC 2113 3396 SQUARE ML @@ -19778,6 +20012,7 @@ # <square> 0064 2113 3398 SQUARE KL # <square> 006B 2113 +@ Squared Latin abbreviations 3399 SQUARE FM # <square> 0066 006D 339A SQUARE NM @@ -20054,7 +20289,7 @@ 4DFD HEXAGRAM FOR SMALL PREPONDERANCE 4DFE HEXAGRAM FOR AFTER COMPLETION 4DFF HEXAGRAM FOR BEFORE COMPLETION -@@ 4E00 CJK Unified Ideographs 9FCB +@@ 4E00 CJK Unified Ideographs 9FCC @@ A000 Yi Syllables A48F @@+ @ Syllables @@ -21767,6 +22002,14 @@ A672 COMBINING CYRILLIC THOUSAND MILLIONS SIGN @ Punctuation mark A673 SLAVONIC ASTERISK @ Combining marks for Old Cyrillic +A674 COMBINING CYRILLIC LETTER UKRAINIAN IE +A675 COMBINING CYRILLIC LETTER I +A676 COMBINING CYRILLIC LETTER YI +A677 COMBINING CYRILLIC LETTER U +A678 COMBINING CYRILLIC LETTER HARD SIGN +A679 COMBINING CYRILLIC LETTER YERU +A67A COMBINING CYRILLIC LETTER SOFT SIGN +A67B COMBINING CYRILLIC LETTER OMEGA A67C COMBINING CYRILLIC KAVYKA * indicates an alternative reading to part of a word x (combining breve - 0306) @@ -21805,6 +22048,8 @@ A694 CYRILLIC CAPITAL LETTER HWE A695 CYRILLIC SMALL LETTER HWE A696 CYRILLIC CAPITAL LETTER SHWE A697 CYRILLIC SMALL LETTER SHWE +@ Combining mark for Old Cyrillic +A69F COMBINING CYRILLIC LETTER IOTIFIED E @@ A6A0 Bamum A6FF @ Syllables A6A0 BAMUM LETTER A @@ -22086,7 +22331,7 @@ A78C LATIN SMALL LETTER SALTILLO x (latin letter glottal stop - 0294) x (modifier letter apostrophe - 02BC) x (modifier letter glottal stop - 02C0) -@ African letter +@ Additional letter A78D LATIN CAPITAL LETTER TURNED H * used in the Dan/Gio orthography in Liberia * lowercase is 0265 @@ -22094,9 +22339,14 @@ A78D LATIN CAPITAL LETTER TURNED H A78E LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT * voiceless lateral retroflex fricative * used to transcribe Toda -@ Janalif letters +@ Additional letters A790 LATIN CAPITAL LETTER N WITH DESCENDER A791 LATIN SMALL LETTER N WITH DESCENDER + * Janalif +A792 LATIN CAPITAL LETTER C WITH BAR + = Cambrian symbol +A793 LATIN SMALL LETTER C WITH BAR + * Nanai @ Latvian letters for pre-1921 orthography A7A0 LATIN CAPITAL LETTER G WITH OBLIQUE STROKE A7A1 LATIN SMALL LETTER G WITH OBLIQUE STROKE @@ -22110,6 +22360,17 @@ A7A8 LATIN CAPITAL LETTER S WITH OBLIQUE STROKE A7A9 LATIN SMALL LETTER S WITH OBLIQUE STROKE * also used in pre-1950 Lower Sorbian orthography x (latin small letter long s with diagonal stroke - 1E9C) +@ Additional letter +A7AA LATIN CAPITAL LETTER H WITH HOOK + * lowercase is 0266 + * used in Chad +@ Additions for Extended IPA +A7F8 MODIFIER LETTER CAPITAL H WITH STROKE + * faucalized + # <super> 0126 +A7F9 MODIFIER LETTER SMALL LIGATURE OE + * labialized: open-rounded + # <super> 0153 @ Addition for UPA A7FA LATIN LETTER SMALL CAPITAL TURNED M @ Ancient Roman epigraphic letters @@ -23002,6 +23263,46 @@ AADE TAI VIET SYMBOL HO HOI * marks start of text in songs and poems AADF TAI VIET SYMBOL KOI KOI * marks end of text in songs and poems +@@ AAE0 Meetei Mayek Extensions AAFF +@+ The characters in this block are extensions for historical orthographies of Meetei and are not specified in the Manupuri Government order No. 1/2/78-SS/E. +@ Independent vowel signs +AAE0 MEETEI MAYEK LETTER E +AAE1 MEETEI MAYEK LETTER O +@ Consonants +AAE2 MEETEI MAYEK LETTER CHA +AAE3 MEETEI MAYEK LETTER NYA +AAE4 MEETEI MAYEK LETTER TTA +AAE5 MEETEI MAYEK LETTER TTHA +AAE6 MEETEI MAYEK LETTER DDA +AAE7 MEETEI MAYEK LETTER DDHA +AAE8 MEETEI MAYEK LETTER NNA +AAE9 MEETEI MAYEK LETTER SHA +AAEA MEETEI MAYEK LETTER SSA +@ Dependent vowel signs +AAEB MEETEI MAYEK VOWEL SIGN II +AAEC MEETEI MAYEK VOWEL SIGN UU +AAED MEETEI MAYEK VOWEL SIGN AAI +AAEE MEETEI MAYEK VOWEL SIGN AU +AAEF MEETEI MAYEK VOWEL SIGN AAU +@ Punctuation +AAF0 MEETEI MAYEK CHEIKHAN + = danda +AAF1 MEETEI MAYEK AHANG KHUDAM + = question mark +@ Sign +AAF2 MEETEI MAYEK ANJI + * a philosophical sign + x (devanagari om - 0950) +@ Repetition marks +@+ These marks have fallen into disuse. +AAF3 MEETEI MAYEK SYLLABLE REPETITION MARK +AAF4 MEETEI MAYEK WORD REPETITION MARK +@ Sign +AAF5 MEETEI MAYEK VOWEL SIGN VISARGA +@ Virama +AAF6 MEETEI MAYEK VIRAMA + * used to form conjuncts in historical orthographies + x (myanmar sign virama - 1039) @@ AB00 Ethiopic Extended-A AB2F @ Gamo-Gofa-Dawro and Basketo AB01 ETHIOPIC SYLLABLE TTHU @@ -23238,6 +23539,7 @@ D7FB HANGUL JONGSEONG PHIEUPH-THIEUTH @@ F900 CJK Compatibility Ideographs FAFF @@+ @+ This block, despite its name, contains a number of unified CJK ideographs. Those characters are individually identified by annotations. +@+ Subheaders identifying sources for subranges do not indicate required usage or preclude mappings to other sources. For example, many pronunciation variants from KS X 1001:1998 are also mapped to a J source. @ Pronunciation variants from KS X 1001:1998 F900 CJK COMPATIBILITY IDEOGRAPH-F900 : 8C48 @@ -23847,6 +24149,11 @@ FA2C CJK COMPATIBILITY IDEOGRAPH-FA2C : 9928 FA2D CJK COMPATIBILITY IDEOGRAPH-FA2D : 9DB4 +@ Korean compatibility ideographs +FA2E CJK COMPATIBILITY IDEOGRAPH-FA2E + : 90DE +FA2F CJK COMPATIBILITY IDEOGRAPH-FA2F + : 96B7 @ JIS X 0213 compatibility ideographs FA30 CJK COMPATIBILITY IDEOGRAPH-FA30 : 4FAE @@ -25589,7 +25896,7 @@ FE0C VARIATION SELECTOR-13 FE0D VARIATION SELECTOR-14 FE0E VARIATION SELECTOR-15 FE0F VARIATION SELECTOR-16 -@@ FE10 Vertical forms FE1F +@@ FE10 Vertical Forms FE1F @+ These characters are compatibility characters needed to map to GB 18030. @ Glyphs for vertical variants FE10 PRESENTATION FORM FOR VERTICAL COMMA @@ -27624,6 +27931,102 @@ FFFF <not a character> 10939 LYDIAN LETTER C @ Punctuation 1093F LYDIAN TRIANGULAR MARK +@@ 10980 Meroitic Hieroglyphs 1099F +@ Vowel letters +10980 MEROITIC HIEROGLYPHIC LETTER A + x (egyptian hieroglyph a001 - 13000) +10981 MEROITIC HIEROGLYPHIC LETTER E + x (egyptian hieroglyph h006 - 13184) +10982 MEROITIC HIEROGLYPHIC LETTER I + x (egyptian hieroglyph a026 - 1301E) +10983 MEROITIC HIEROGLYPHIC LETTER O + x (egyptian hieroglyph f001 - 130FE) +@ Consonant letters +10984 MEROITIC HIEROGLYPHIC LETTER YA + x (egyptian hieroglyph m017a - 131CC) +10985 MEROITIC HIEROGLYPHIC LETTER WA + x (egyptian hieroglyph v004 - 1336F) +10986 MEROITIC HIEROGLYPHIC LETTER BA + x (egyptian hieroglyph e011 - 130DE) +10987 MEROITIC HIEROGLYPHIC LETTER BA-2 + x (egyptian hieroglyph d058 - 130C0) +10988 MEROITIC HIEROGLYPHIC LETTER PA + x (egyptian hieroglyph q003 - 132AA) +10989 MEROITIC HIEROGLYPHIC LETTER MA + x (egyptian hieroglyph g017 - 13153) +1098A MEROITIC HIEROGLYPHIC LETTER NA + x (egyptian hieroglyph n035 - 13216) +1098B MEROITIC HIEROGLYPHIC LETTER NA-2 +1098C MEROITIC HIEROGLYPHIC LETTER NE + x (egyptian hieroglyph m022a - 131D2) +1098D MEROITIC HIEROGLYPHIC LETTER NE-2 +1098E MEROITIC HIEROGLYPHIC LETTER RA + x (egyptian hieroglyph d021 - 1308B) +1098F MEROITIC HIEROGLYPHIC LETTER RA-2 +10990 MEROITIC HIEROGLYPHIC LETTER LA + x (egyptian hieroglyph e023 - 130ED) +10991 MEROITIC HIEROGLYPHIC LETTER KHA + x (egyptian hieroglyph aa001 - 1340D) +10992 MEROITIC HIEROGLYPHIC LETTER HHA + x (egyptian hieroglyph w011 - 133BC) +10993 MEROITIC HIEROGLYPHIC LETTER SA + x (egyptian hieroglyph m008 - 131B7) +10994 MEROITIC HIEROGLYPHIC LETTER SA-2 + x (egyptian hieroglyph o034 - 13283) +10995 MEROITIC HIEROGLYPHIC LETTER SE + x (egyptian hieroglyph o034 - 13283) +10996 MEROITIC HIEROGLYPHIC LETTER KA + x (egyptian hieroglyph g038 - 1316C) +10997 MEROITIC HIEROGLYPHIC LETTER QA + x (egyptian hieroglyph n029 - 1320E) +10998 MEROITIC HIEROGLYPHIC LETTER TA + x (egyptian hieroglyph v013 - 1337F) +10999 MEROITIC HIEROGLYPHIC LETTER TA-2 + x (egyptian hieroglyph n016 - 131FE) +1099A MEROITIC HIEROGLYPHIC LETTER TE + x (egyptian hieroglyph n016 - 131FE) + x (egyptian hieroglyph o004 - 13254) +1099B MEROITIC HIEROGLYPHIC LETTER TE-2 + x (egyptian hieroglyph o004 - 13254) +1099C MEROITIC HIEROGLYPHIC LETTER TO + x (egyptian hieroglyph n021 - 13205) +1099D MEROITIC HIEROGLYPHIC LETTER DA + x (egyptian hieroglyph d006 - 1307B) +@ Symbols +1099E MEROITIC HIEROGLYPHIC SYMBOL VIDJ + x (ankh - 2625) + x (egyptian hieroglyph s034 - 132F9) +1099F MEROITIC HIEROGLYPHIC SYMBOL VIDJ-2 +@@ 109A0 Meroitic Cursive 109FF +@ Vowel letters +109A0 MEROITIC CURSIVE LETTER A +109A1 MEROITIC CURSIVE LETTER E +109A2 MEROITIC CURSIVE LETTER I +109A3 MEROITIC CURSIVE LETTER O +@ Consonant letters +109A4 MEROITIC CURSIVE LETTER YA +109A5 MEROITIC CURSIVE LETTER WA +109A6 MEROITIC CURSIVE LETTER BA +109A7 MEROITIC CURSIVE LETTER PA +109A8 MEROITIC CURSIVE LETTER MA +109A9 MEROITIC CURSIVE LETTER NA +109AA MEROITIC CURSIVE LETTER NE +109AB MEROITIC CURSIVE LETTER RA +109AC MEROITIC CURSIVE LETTER LA +109AD MEROITIC CURSIVE LETTER KHA +109AE MEROITIC CURSIVE LETTER HHA +109AF MEROITIC CURSIVE LETTER SA +109B0 MEROITIC CURSIVE LETTER ARCHAIC SA +109B1 MEROITIC CURSIVE LETTER SE +109B2 MEROITIC CURSIVE LETTER KA +109B3 MEROITIC CURSIVE LETTER QA +109B4 MEROITIC CURSIVE LETTER TA +109B5 MEROITIC CURSIVE LETTER TE +109B6 MEROITIC CURSIVE LETTER TO +109B7 MEROITIC CURSIVE LETTER DA +@ Logograms +109BE MEROITIC CURSIVE LOGOGRAM RMT +109BF MEROITIC CURSIVE LOGOGRAM IMN @@ 10A00 Kharoshthi 10A5F @ Vowels 10A00 KHAROSHTHI LETTER A @@ -28255,6 +28658,346 @@ FFFF <not a character> * paragraph delimiter 110C0 KAITHI DANDA 110C1 KAITHI DOUBLE DANDA +@@ 110D0 Sora Sompeng 110FF +@ Consonants +110D0 SORA SOMPENG LETTER SAH +110D1 SORA SOMPENG LETTER TAH +110D2 SORA SOMPENG LETTER BAH +110D3 SORA SOMPENG LETTER CAH +110D4 SORA SOMPENG LETTER DAH +110D5 SORA SOMPENG LETTER GAH +110D6 SORA SOMPENG LETTER MAH +110D7 SORA SOMPENG LETTER NGAH +110D8 SORA SOMPENG LETTER LAH +110D9 SORA SOMPENG LETTER NAH +110DA SORA SOMPENG LETTER VAH +110DB SORA SOMPENG LETTER PAH +110DC SORA SOMPENG LETTER YAH +110DD SORA SOMPENG LETTER RAH +110DE SORA SOMPENG LETTER HAH +110DF SORA SOMPENG LETTER KAH +110E0 SORA SOMPENG LETTER JAH +110E1 SORA SOMPENG LETTER NYAH +@ Vowels +110E2 SORA SOMPENG LETTER AH +110E3 SORA SOMPENG LETTER EEH +110E4 SORA SOMPENG LETTER IH +110E5 SORA SOMPENG LETTER UH +110E6 SORA SOMPENG LETTER OH +110E7 SORA SOMPENG LETTER EH +@ Other letter +110E8 SORA SOMPENG LETTER MAE +@ Digits +110F0 SORA SOMPENG DIGIT ZERO +110F1 SORA SOMPENG DIGIT ONE +110F2 SORA SOMPENG DIGIT TWO +110F3 SORA SOMPENG DIGIT THREE +110F4 SORA SOMPENG DIGIT FOUR +110F5 SORA SOMPENG DIGIT FIVE +110F6 SORA SOMPENG DIGIT SIX +110F7 SORA SOMPENG DIGIT SEVEN +110F8 SORA SOMPENG DIGIT EIGHT +110F9 SORA SOMPENG DIGIT NINE +@@ 11100 Chakma 1114F +@ Various signs +11100 CHAKMA SIGN CANDRABINDU + = caanaphupudaa +11101 CHAKMA SIGN ANUSVARA + = ekaphudaa +11102 CHAKMA SIGN VISARGA + = dviphudaa +@ Independent vowels +11103 CHAKMA LETTER AA + = pichapujhaa aa +11104 CHAKMA LETTER I + = delabhaangagaa i +11105 CHAKMA LETTER U + = bacacu u +11106 CHAKMA LETTER E + = lejaubaa e +@ Consonants +11107 CHAKMA LETTER KAA + = cucyaangyaa kaa +11108 CHAKMA LETTER KHAA + = grajaangyaa khaa +11109 CHAKMA LETTER GAA + = caandyaa gaa +1110A CHAKMA LETTER GHAA + = tinaddaalyaa ghaa +1110B CHAKMA LETTER NGAA + = cilaama ngaa +1110C CHAKMA LETTER CAA + = dvibhalyaa caa +1110D CHAKMA LETTER CHAA + = majaraa chaa +1110E CHAKMA LETTER JAA + = dvipadalaa haa +1110F CHAKMA LETTER JHAA + = uraauraa jhaa +11110 CHAKMA LETTER NYAA + = silaacyaa nyaa +11111 CHAKMA LETTER TTAA + = dviyaadaat ttaa +11112 CHAKMA LETTER TTHAA + = phudaadviyaat tthaa +11113 CHAKMA LETTER DDAA + = aadudaangaat ddaa +11114 CHAKMA LETTER DDHAA + = lejabharaat ddhaa +11115 CHAKMA LETTER NNAA + = pettttuyaa nnaa +11116 CHAKMA LETTER TAA + = ghangadaat taa +11117 CHAKMA LETTER THAA + = jagadaat thaa +11118 CHAKMA LETTER DAA + = dolaniit daa +11119 CHAKMA LETTER DHAA + = talamuyaat dhaa +1111A CHAKMA LETTER NAA + = phaarabaanyaa naa +1111B CHAKMA LETTER PAA + = paalyaa paa +1111C CHAKMA LETTER PHAA + = ubaraphudaa phaa +1111D CHAKMA LETTER BAA + = ubaramuyaa baa +1111E CHAKMA LETTER BHAA + = ciraddaalyaa bhaa +1111F CHAKMA LETTER MAA + = bugatpadalaa maa +11120 CHAKMA LETTER YYAA + = cimayyaa yyaa +11121 CHAKMA LETTER YAA + = jilyaa yaa +11122 CHAKMA LETTER RAA + = dvidaayyaa raa +11123 CHAKMA LETTER LAA + = talamuyaa laa +11124 CHAKMA LETTER WAA + = bajhonyaa waa +11125 CHAKMA LETTER SAA + = bhudibukyaa saa +11126 CHAKMA LETTER HAA + = ubaramuyaa haa +@ Dependent vowel signs +11127 CHAKMA VOWEL SIGN A + = ubaratulyaa a +11128 CHAKMA VOWEL SIGN I + = bahryaa i +11129 CHAKMA VOWEL SIGN II + = baaniiphadaa ii +1112A CHAKMA VOWEL SIGN U + = ekattaana u +1112B CHAKMA VOWEL SIGN UU + = dvittaana uu +1112C CHAKMA VOWEL SIGN E + = ekaara e +1112D CHAKMA VOWEL SIGN AI + = delabhaanga ai +1112E CHAKMA VOWEL SIGN O + = okaara o + : 11131 11127 +1112F CHAKMA VOWEL SIGN AU + = aukaara au + : 11132 11127 +11130 CHAKMA VOWEL SIGN OI + = oikaara oi +11131 CHAKMA O MARK +11132 CHAKMA AU MARK +@ Various signs +11133 CHAKMA VIRAMA + * used to form conjuncts + x (myanmar sign virama - 1039) +11134 CHAKMA MAAYYAA + * killer + x (myanmar sign asat - 103A) +@ Digits +11136 CHAKMA DIGIT ZERO +11137 CHAKMA DIGIT ONE +11138 CHAKMA DIGIT TWO +11139 CHAKMA DIGIT THREE +1113A CHAKMA DIGIT FOUR +1113B CHAKMA DIGIT FIVE +1113C CHAKMA DIGIT SIX +1113D CHAKMA DIGIT SEVEN +1113E CHAKMA DIGIT EIGHT +1113F CHAKMA DIGIT NINE +@ Punctuation +11140 CHAKMA SECTION MARK + = phulacihna +11141 CHAKMA DANDA + = ekacilyaa +11142 CHAKMA DOUBLE DANDA + = dvicilyaa +11143 CHAKMA QUESTION MARK + = pujhaar +@@ 11180 Sharada 111DF +@ Various signs +11180 SHARADA SIGN CANDRABINDU +11181 SHARADA SIGN ANUSVARA +11182 SHARADA SIGN VISARGA +@ Independent vowels +11183 SHARADA LETTER A +11184 SHARADA LETTER AA +11185 SHARADA LETTER I +11186 SHARADA LETTER II +11187 SHARADA LETTER U +11188 SHARADA LETTER UU +11189 SHARADA LETTER VOCALIC R +1118A SHARADA LETTER VOCALIC RR +1118B SHARADA LETTER VOCALIC L +1118C SHARADA LETTER VOCALIC LL +1118D SHARADA LETTER E +1118E SHARADA LETTER AI +1118F SHARADA LETTER O +11190 SHARADA LETTER AU +@ Consonants +11191 SHARADA LETTER KA +11192 SHARADA LETTER KHA +11193 SHARADA LETTER GA +11194 SHARADA LETTER GHA +11195 SHARADA LETTER NGA +11196 SHARADA LETTER CA +11197 SHARADA LETTER CHA +11198 SHARADA LETTER JA +11199 SHARADA LETTER JHA +1119A SHARADA LETTER NYA +1119B SHARADA LETTER TTA +1119C SHARADA LETTER TTHA +1119D SHARADA LETTER DDA +1119E SHARADA LETTER DDHA +1119F SHARADA LETTER NNA +111A0 SHARADA LETTER TA +111A1 SHARADA LETTER THA +111A2 SHARADA LETTER DA +111A3 SHARADA LETTER DHA +111A4 SHARADA LETTER NA +111A5 SHARADA LETTER PA +111A6 SHARADA LETTER PHA +111A7 SHARADA LETTER BA +111A8 SHARADA LETTER BHA +111A9 SHARADA LETTER MA +111AA SHARADA LETTER YA +111AB SHARADA LETTER RA +111AC SHARADA LETTER LA +111AD SHARADA LETTER LLA +111AE SHARADA LETTER VA +111AF SHARADA LETTER SHA +111B0 SHARADA LETTER SSA +111B1 SHARADA LETTER SA +111B2 SHARADA LETTER HA +@ Dependent vowel signs +111B3 SHARADA VOWEL SIGN AA +111B4 SHARADA VOWEL SIGN I +111B5 SHARADA VOWEL SIGN II +111B6 SHARADA VOWEL SIGN U +111B7 SHARADA VOWEL SIGN UU +111B8 SHARADA VOWEL SIGN VOCALIC R +111B9 SHARADA VOWEL SIGN VOCALIC RR +111BA SHARADA VOWEL SIGN VOCALIC L +111BB SHARADA VOWEL SIGN VOCALIC LL +111BC SHARADA VOWEL SIGN E +111BD SHARADA VOWEL SIGN AI +111BE SHARADA VOWEL SIGN O +111BF SHARADA VOWEL SIGN AU +@ Virama +111C0 SHARADA SIGN VIRAMA +@ Various signs +111C1 SHARADA SIGN AVAGRAHA +111C2 SHARADA SIGN JIHVAMULIYA +111C3 SHARADA SIGN UPADHMANIYA +111C4 SHARADA OM +@ Punctuation +111C5 SHARADA DANDA +111C6 SHARADA DOUBLE DANDA +111C7 SHARADA ABBREVIATION SIGN +111C8 SHARADA SEPARATOR +@ Digits +111D0 SHARADA DIGIT ZERO +111D1 SHARADA DIGIT ONE +111D2 SHARADA DIGIT TWO +111D3 SHARADA DIGIT THREE +111D4 SHARADA DIGIT FOUR +111D5 SHARADA DIGIT FIVE +111D6 SHARADA DIGIT SIX +111D7 SHARADA DIGIT SEVEN +111D8 SHARADA DIGIT EIGHT +111D9 SHARADA DIGIT NINE +@@ 11680 Takri 116CF +@ Independent vowels +11680 TAKRI LETTER A +11681 TAKRI LETTER AA +11682 TAKRI LETTER I +11683 TAKRI LETTER II +11684 TAKRI LETTER U +11685 TAKRI LETTER UU +11686 TAKRI LETTER E +11687 TAKRI LETTER AI +11688 TAKRI LETTER O +11689 TAKRI LETTER AU +@ Consonants +1168A TAKRI LETTER KA +1168B TAKRI LETTER KHA +1168C TAKRI LETTER GA +1168D TAKRI LETTER GHA +1168E TAKRI LETTER NGA +1168F TAKRI LETTER CA +11690 TAKRI LETTER CHA +11691 TAKRI LETTER JA +11692 TAKRI LETTER JHA +11693 TAKRI LETTER NYA +11694 TAKRI LETTER TTA +11695 TAKRI LETTER TTHA +11696 TAKRI LETTER DDA +11697 TAKRI LETTER DDHA +11698 TAKRI LETTER NNA +11699 TAKRI LETTER TA +1169A TAKRI LETTER THA +1169B TAKRI LETTER DA +1169C TAKRI LETTER DHA +1169D TAKRI LETTER NA +1169E TAKRI LETTER PA +1169F TAKRI LETTER PHA +116A0 TAKRI LETTER BA +116A1 TAKRI LETTER BHA +116A2 TAKRI LETTER MA +116A3 TAKRI LETTER YA +116A4 TAKRI LETTER RA +116A5 TAKRI LETTER LA +116A6 TAKRI LETTER VA +116A7 TAKRI LETTER SHA +116A8 TAKRI LETTER SA +116A9 TAKRI LETTER HA +116AA TAKRI LETTER RRA +@ Various signs +116AB TAKRI SIGN ANUSVARA +116AC TAKRI SIGN VISARGA +@ Dependent vowel signs +116AD TAKRI VOWEL SIGN AA +116AE TAKRI VOWEL SIGN I +116AF TAKRI VOWEL SIGN II +116B0 TAKRI VOWEL SIGN U +116B1 TAKRI VOWEL SIGN UU +116B2 TAKRI VOWEL SIGN E +116B3 TAKRI VOWEL SIGN AI +116B4 TAKRI VOWEL SIGN O +116B5 TAKRI VOWEL SIGN AU +@ Virama +116B6 TAKRI SIGN VIRAMA +@ Nukta +116B7 TAKRI SIGN NUKTA +@ Digits +116C0 TAKRI DIGIT ZERO +116C1 TAKRI DIGIT ONE +116C2 TAKRI DIGIT TWO +116C3 TAKRI DIGIT THREE +116C4 TAKRI DIGIT FOUR +116C5 TAKRI DIGIT FIVE +116C6 TAKRI DIGIT SIX +116C7 TAKRI DIGIT SEVEN +116C8 TAKRI DIGIT EIGHT +116C9 TAKRI DIGIT NINE @@ 12000 Cuneiform 123FF @ Signs 12000 CUNEIFORM SIGN A @@ -30499,7 +31242,7 @@ FFFF <not a character> 1342D EGYPTIAN HIEROGLYPH AA031 1342E EGYPTIAN HIEROGLYPH AA032 @@ 16800 Bamum Supplement 16A3F -@ Characters found through Phase A +@ Characters found through Phase A 16800 BAMUM LETTER PHASE-A NGKUE MFON 16801 BAMUM LETTER PHASE-A GBIEE FON 16802 BAMUM LETTER PHASE-A PON MFON PIPAEMGBIEE @@ -30587,7 +31330,7 @@ FFFF <not a character> 16854 BAMUM LETTER PHASE-A NEN 16855 BAMUM LETTER PHASE-A NAQ 16856 BAMUM LETTER PHASE-A MBAQ -@ Characters found through Phase B +@ Characters found through Phase B 16857 BAMUM LETTER PHASE-B NSHUET 16858 BAMUM LETTER PHASE-B TU MAEMGBIEE 16859 BAMUM LETTER PHASE-B SIEE @@ -30645,7 +31388,7 @@ FFFF <not a character> 1688C BAMUM LETTER PHASE-B MA 1688D BAMUM LETTER PHASE-B KIQ 1688E BAMUM LETTER PHASE-B NGOM -@ Characters found through Phase C +@ Characters found through Phase C 1688F BAMUM LETTER PHASE-C NGKUE MAEMBA 16890 BAMUM LETTER PHASE-C NZA 16891 BAMUM LETTER PHASE-C YUM @@ -30745,7 +31488,7 @@ FFFF <not a character> 168EE BAMUM LETTER PHASE-C PIN 168EF BAMUM LETTER PHASE-C PEN 168F0 BAMUM LETTER PHASE-C TET -@ Characters found through Phase D +@ Characters found through Phase D 168F1 BAMUM LETTER PHASE-D MBUO 168F2 BAMUM LETTER PHASE-D WAP 168F3 BAMUM LETTER PHASE-D NJI @@ -30870,7 +31613,7 @@ FFFF <not a character> 16964 BAMUM LETTER PHASE-D SAQ 16965 BAMUM LETTER PHASE-D FAA * used before 169B8 for faamae '8' in Phases A-D -@ Characters found through Phase E +@ Characters found through Phase E 16966 BAMUM LETTER PHASE-E NDAP * i in Phase F 16967 BAMUM LETTER PHASE-E TOON @@ -31045,7 +31788,7 @@ FFFF <not a character> 16A01 BAMUM LETTER PHASE-E FAQ 16A02 BAMUM LETTER PHASE-E GHOM * used after 169F9 for koghom '10' in Phases A-D -@ Characters found through Phase F +@ Characters found through Phase F 16A03 BAMUM LETTER PHASE-F KA 16A04 BAMUM LETTER PHASE-F U 16A05 BAMUM LETTER PHASE-F KU @@ -31101,6 +31844,194 @@ FFFF <not a character> 16A37 BAMUM LETTER PHASE-F SAMBA 16A38 BAMUM LETTER PHASE-F VUEQ * used after 169F9 for kovue '9' in Phases A-D +@@ 16F00 Miao 16F9F +@ Consonant onsets +16F00 MIAO LETTER PA + * used for ba in Dry Yi +16F01 MIAO LETTER BA +16F02 MIAO LETTER YI PA + * used for pa in Dry Yi +16F03 MIAO LETTER PLA + * used in Sichuan Hmong +16F04 MIAO LETTER MA +16F05 MIAO LETTER MHA +16F06 MIAO LETTER ARCHAIC MA + * used in Pollard's early orthography +16F07 MIAO LETTER FA +16F08 MIAO LETTER VA +16F09 MIAO LETTER VFA + * used in Black Yi +16F0A MIAO LETTER TA + * used for da in Dry Yi +16F0B MIAO LETTER DA +16F0C MIAO LETTER YI TTA + * used in Hei Yi +16F0D MIAO LETTER YI TA + * used for ta in Dry Yi +16F0E MIAO LETTER TTA +16F0F MIAO LETTER DDA +16F10 MIAO LETTER NA +16F11 MIAO LETTER NHA +16F12 MIAO LETTER YI NNA + * used in Hei Yi +16F13 MIAO LETTER ARCHAIC NA + * used in Pollard's early orthography +16F14 MIAO LETTER NNA +16F15 MIAO LETTER NNHA +16F16 MIAO LETTER LA +16F17 MIAO LETTER LYA + * used in Black Yi +16F18 MIAO LETTER LHA +16F19 MIAO LETTER LHYA + * used in Black Yi +16F1A MIAO LETTER TLHA +16F1B MIAO LETTER DLHA +16F1C MIAO LETTER TLHYA +16F1D MIAO LETTER DLHYA +16F1E MIAO LETTER KA + * used for ga in Dry Yi +16F1F MIAO LETTER GA +16F20 MIAO LETTER YI KA + * used for ka in Dry Yi +16F21 MIAO LETTER QA +16F22 MIAO LETTER QGA +16F23 MIAO LETTER NGA +16F24 MIAO LETTER NGHA +16F25 MIAO LETTER ARCHAIC NGA + * used in Pollard's early orthography +16F26 MIAO LETTER HA +16F27 MIAO LETTER XA +@+ * archaic character used in a post-1949 reformed orthography +16F28 MIAO LETTER GHA +16F29 MIAO LETTER GHHA +16F2A MIAO LETTER TSSA +16F2B MIAO LETTER DZZA +16F2C MIAO LETTER NYA +16F2D MIAO LETTER NYHA +16F2E MIAO LETTER TSHA + * used for dzha in Dry Yi +16F2F MIAO LETTER DZHA +16F30 MIAO LETTER YI TSHA + * used for tsha in Dry Yi +16F31 MIAO LETTER YI DZHA + * used in Hei Yi +16F32 MIAO LETTER REFORMED TSHA +@+ * archaic character used in a post-1949 reformed orthography +16F33 MIAO LETTER SHA +16F34 MIAO LETTER SSA +16F35 MIAO LETTER ZHA + * used in Black Yi +16F36 MIAO LETTER ZSHA + * used in Black Yi +16F37 MIAO LETTER TSA + * used for dza in Dry Yi +16F38 MIAO LETTER DZA +16F39 MIAO LETTER YI TSA + * used for tsa in Dry Yi +16F3A MIAO LETTER SA +16F3B MIAO LETTER ZA +16F3C MIAO LETTER ZSA + * used in Black Yi +16F3D MIAO LETTER ZZA +16F3E MIAO LETTER ZZSA + * used in Black Yi +16F3F MIAO LETTER ARCHAIC ZZA + * used in Pollard's early orthography +16F40 MIAO LETTER ZZYA + * used in Black Yi +16F41 MIAO LETTER ZZSYA + * used in Black Yi +16F42 MIAO LETTER WA +16F43 MIAO LETTER AH + * glottal stop +16F44 MIAO LETTER HHA + * used in Black Yi +@ Modifiers +16F50 MIAO LETTER NASALIZATION +16F51 MIAO SIGN ASPIRATION +16F52 MIAO SIGN REFORMED VOICING +@+ * archaic character used in a post-1949 reformed orthography +16F53 MIAO SIGN REFORMED ASPIRATION +@+ * archaic character used in a post-1949 reformed orthography +@ Vowels and finals +16F54 MIAO VOWEL SIGN A +16F55 MIAO VOWEL SIGN AA + * used in Eastern Lisu +16F56 MIAO VOWEL SIGN AHH + * used in Gan Yi +16F57 MIAO VOWEL SIGN AN +16F58 MIAO VOWEL SIGN ANG + * also used for aw +16F59 MIAO VOWEL SIGN O +16F5A MIAO VOWEL SIGN OO +16F5B MIAO VOWEL SIGN WO + * used in Hei Yi +16F5C MIAO VOWEL SIGN W +16F5D MIAO VOWEL SIGN E +16F5E MIAO VOWEL SIGN EN +16F5F MIAO VOWEL SIGN ENG +16F60 MIAO VOWEL SIGN OEY +16F61 MIAO VOWEL SIGN I +16F62 MIAO VOWEL SIGN IA +16F63 MIAO VOWEL SIGN IAN +16F64 MIAO VOWEL SIGN IANG + * also used for iaw +16F65 MIAO VOWEL SIGN IO +16F66 MIAO VOWEL SIGN IE +16F67 MIAO VOWEL SIGN II + * used in Eastern Lisu +16F68 MIAO VOWEL SIGN IU +16F69 MIAO VOWEL SIGN ING + * also used for in +16F6A MIAO VOWEL SIGN U +16F6B MIAO VOWEL SIGN UA +16F6C MIAO VOWEL SIGN UAN +16F6D MIAO VOWEL SIGN UANG + * also used for uaw +16F6E MIAO VOWEL SIGN UU + * used in Eastern Lisu +16F6F MIAO VOWEL SIGN UEI +16F70 MIAO VOWEL SIGN UNG +16F71 MIAO VOWEL SIGN Y +16F72 MIAO VOWEL SIGN YI +16F73 MIAO VOWEL SIGN AE +16F74 MIAO VOWEL SIGN AEE + * used in Eastern Lisu +16F75 MIAO VOWEL SIGN ERR +16F76 MIAO VOWEL SIGN ROUNDED ERR + * used in Eastern Lisu +16F77 MIAO VOWEL SIGN ER +16F78 MIAO VOWEL SIGN ROUNDED ER + * used in Eastern Lisu +16F79 MIAO VOWEL SIGN AI +16F7A MIAO VOWEL SIGN EI +16F7B MIAO VOWEL SIGN AU +16F7C MIAO VOWEL SIGN OU +16F7D MIAO VOWEL SIGN N +16F7E MIAO VOWEL SIGN NG +@ Positioning tone marks +@+ These are used to position the vowel off of the baseline position to indicate a changed tone. +16F8F MIAO TONE RIGHT +16F90 MIAO TONE TOP RIGHT +16F91 MIAO TONE ABOVE +16F92 MIAO TONE BELOW +@ Baseline tone marks +@+ These are used in Chuxiong Ahmao instead of the positioning tone marks. +16F93 MIAO LETTER TONE-2 +16F94 MIAO LETTER TONE-3 +16F95 MIAO LETTER TONE-4 +16F96 MIAO LETTER TONE-5 +16F97 MIAO LETTER TONE-6 +16F98 MIAO LETTER TONE-7 +16F99 MIAO LETTER TONE-8 +@ Archaic baseline tone marks +@+ These are archaic characters used in a post-1949 reformed orthography. +16F9A MIAO LETTER REFORMED TONE-1 +16F9B MIAO LETTER REFORMED TONE-2 +16F9C MIAO LETTER REFORMED TONE-4 +16F9D MIAO LETTER REFORMED TONE-5 +16F9E MIAO LETTER REFORMED TONE-6 +16F9F MIAO LETTER REFORMED TONE-8 @@ 1B000 Kana Supplement 1B0FF @ Historic Katakana 1B000 KATAKANA LETTER ARCHAIC E @@ -34056,6 +34987,355 @@ FFFF <not a character> # <font> 0038 digit eight 1D7FF MATHEMATICAL MONOSPACE DIGIT NINE # <font> 0039 digit nine +@@ 1EE00 Arabic Mathematical Alphabetic Symbols 1EEFF +@ Isolated symbols +1EE00 ARABIC MATHEMATICAL ALEF + x (arabic letter alef isolated form - FE8D) + # <font> 0627 arabic letter alef +1EE01 ARABIC MATHEMATICAL BEH + x (arabic letter beh isolated form - FE8F) + # <font> 0628 arabic letter beh +1EE02 ARABIC MATHEMATICAL JEEM + x (arabic letter jeem isolated form - FE9D) + # <font> 062C arabic letter jeem +1EE03 ARABIC MATHEMATICAL DAL + x (arabic letter dal isolated form - FEA9) + # <font> 062F arabic letter dal +1EE05 ARABIC MATHEMATICAL WAW + x (arabic letter waw isolated form - FEED) + # <font> 0648 arabic letter waw +1EE06 ARABIC MATHEMATICAL ZAIN + x (arabic letter zain isolated form - FEAF) + # <font> 0632 arabic letter zain +1EE07 ARABIC MATHEMATICAL HAH + x (arabic letter hah isolated form - FEA1) + # <font> 062D arabic letter hah +1EE08 ARABIC MATHEMATICAL TAH + x (arabic letter tah isolated form - FEC1) + # <font> 0637 arabic letter tah +1EE09 ARABIC MATHEMATICAL YEH + x (arabic letter yeh isolated form - FEF1) + # <font> 064A arabic letter yeh +1EE0A ARABIC MATHEMATICAL KAF + x (arabic letter kaf isolated form - FED9) + # <font> 0643 arabic letter kaf +1EE0B ARABIC MATHEMATICAL LAM + x (arabic letter lam isolated form - FEDD) + # <font> 0644 arabic letter lam +1EE0C ARABIC MATHEMATICAL MEEM + x (arabic letter meem isolated form - FEE1) + # <font> 0645 arabic letter meem +1EE0D ARABIC MATHEMATICAL NOON + x (arabic letter noon isolated form - FEE5) + # <font> 0646 arabic letter noon +1EE0E ARABIC MATHEMATICAL SEEN + x (arabic letter seen isolated form - FEB1) + # <font> 0633 arabic letter seen +1EE0F ARABIC MATHEMATICAL AIN + x (arabic letter ain isolated form - FEC9) + # <font> 0639 arabic letter ain +1EE10 ARABIC MATHEMATICAL FEH + x (arabic letter feh isolated form - FED1) + # <font> 0641 arabic letter feh +1EE11 ARABIC MATHEMATICAL SAD + x (arabic letter sad isolated form - FEB9) + # <font> 0635 arabic letter sad +1EE12 ARABIC MATHEMATICAL QAF + x (arabic letter qaf isolated form - FED5) + # <font> 0642 arabic letter qaf +1EE13 ARABIC MATHEMATICAL REH + x (arabic letter reh isolated form - FEAD) + # <font> 0631 arabic letter reh +1EE14 ARABIC MATHEMATICAL SHEEN + x (arabic letter sheen isolated form - FEB5) + # <font> 0634 arabic letter sheen +1EE15 ARABIC MATHEMATICAL TEH + x (arabic letter teh isolated form - FE95) + # <font> 062A arabic letter teh +1EE16 ARABIC MATHEMATICAL THEH + x (arabic letter theh isolated form - FE99) + # <font> 062B arabic letter theh +1EE17 ARABIC MATHEMATICAL KHAH + x (arabic letter khah isolated form - FEA5) + # <font> 062E arabic letter khah +1EE18 ARABIC MATHEMATICAL THAL + x (arabic letter thal isolated form - FEAB) + # <font> 0630 arabic letter thal +1EE19 ARABIC MATHEMATICAL DAD + x (arabic letter dad isolated form - FEBD) + # <font> 0636 arabic letter dad +1EE1A ARABIC MATHEMATICAL ZAH + x (arabic letter zah isolated form - FEC5) + # <font> 0638 arabic letter zah +1EE1B ARABIC MATHEMATICAL GHAIN + x (arabic letter ghain isolated form - FECD) + # <font> 063A arabic letter ghain +1EE1C ARABIC MATHEMATICAL DOTLESS BEH + x (arabic letter dotless beh - 066E) + # <font> 066E arabic letter dotless beh +1EE1D ARABIC MATHEMATICAL DOTLESS NOON + x (arabic letter noon ghunna isolated form - FB9E) + # <font> 06BA arabic letter noon ghunna +1EE1E ARABIC MATHEMATICAL DOTLESS FEH + x (arabic letter dotless feh - 06A1) + # <font> 06A1 arabic letter dotless feh +1EE1F ARABIC MATHEMATICAL DOTLESS QAF + x (arabic letter dotless qaf - 066F) + # <font> 066F arabic letter dotless qaf +@ Initial symbols +1EE21 ARABIC MATHEMATICAL INITIAL BEH + x (arabic letter beh initial form - FE91) + # <font> 0628 arabic letter beh +1EE22 ARABIC MATHEMATICAL INITIAL JEEM + x (arabic letter jeem initial form - FE9F) + # <font> 062C arabic letter jeem +1EE24 ARABIC MATHEMATICAL INITIAL HEH + x (arabic letter heh initial form - FEEB) + # <font> 0647 arabic letter heh +1EE27 ARABIC MATHEMATICAL INITIAL HAH + x (arabic letter hah initial form - FEA3) + # <font> 062D arabic letter hah +1EE29 ARABIC MATHEMATICAL INITIAL YEH + x (arabic letter yeh initial form - FEF3) + # <font> 064A arabic letter yeh +1EE2A ARABIC MATHEMATICAL INITIAL KAF + x (arabic letter kaf initial form - FEDB) + # <font> 0643 arabic letter kaf +1EE2B ARABIC MATHEMATICAL INITIAL LAM + x (arabic letter lam initial form - FEDF) + # <font> 0644 arabic letter lam +1EE2C ARABIC MATHEMATICAL INITIAL MEEM + x (arabic letter meem initial form - FEE3) + # <font> 0645 arabic letter meem +1EE2D ARABIC MATHEMATICAL INITIAL NOON + x (arabic letter noon initial form - FEE7) + # <font> 0646 arabic letter noon +1EE2E ARABIC MATHEMATICAL INITIAL SEEN + x (arabic letter seen initial form - FEB3) + # <font> 0633 arabic letter seen +1EE2F ARABIC MATHEMATICAL INITIAL AIN + x (arabic letter ain initial form - FECB) + # <font> 0639 arabic letter ain +1EE30 ARABIC MATHEMATICAL INITIAL FEH + x (arabic letter feh initial form - FED3) + # <font> 0641 arabic letter feh +1EE31 ARABIC MATHEMATICAL INITIAL SAD + x (arabic letter sad initial form - FEBB) + # <font> 0635 arabic letter sad +1EE32 ARABIC MATHEMATICAL INITIAL QAF + x (arabic letter qaf initial form - FED7) + # <font> 0642 arabic letter qaf +1EE34 ARABIC MATHEMATICAL INITIAL SHEEN + x (arabic letter sheen initial form - FEB7) + # <font> 0634 arabic letter sheen +1EE35 ARABIC MATHEMATICAL INITIAL TEH + x (arabic letter teh initial form - FE97) + # <font> 062A arabic letter teh +1EE36 ARABIC MATHEMATICAL INITIAL THEH + x (arabic letter theh initial form - FE9B) + # <font> 062B arabic letter theh +1EE37 ARABIC MATHEMATICAL INITIAL KHAH + x (arabic letter khah initial form - FEA7) + # <font> 062E arabic letter khah +1EE39 ARABIC MATHEMATICAL INITIAL DAD + x (arabic letter dad initial form - FEBF) + # <font> 0636 arabic letter dad +1EE3B ARABIC MATHEMATICAL INITIAL GHAIN + x (arabic letter ghain initial form - FECF) + # <font> 063A arabic letter ghain +@ Tailed symbols +1EE42 ARABIC MATHEMATICAL TAILED JEEM + # <font> 062C arabic letter jeem +1EE47 ARABIC MATHEMATICAL TAILED HAH + # <font> 062D arabic letter hah +1EE49 ARABIC MATHEMATICAL TAILED YEH + # <font> 064A arabic letter yeh +1EE4B ARABIC MATHEMATICAL TAILED LAM + # <font> 0644 arabic letter lam +1EE4D ARABIC MATHEMATICAL TAILED NOON + # <font> 0646 arabic letter noon +1EE4E ARABIC MATHEMATICAL TAILED SEEN + # <font> 0633 arabic letter seen +1EE4F ARABIC MATHEMATICAL TAILED AIN + # <font> 0639 arabic letter ain +1EE51 ARABIC MATHEMATICAL TAILED SAD + # <font> 0635 arabic letter sad +1EE52 ARABIC MATHEMATICAL TAILED QAF + # <font> 0642 arabic letter qaf +1EE54 ARABIC MATHEMATICAL TAILED SHEEN + # <font> 0634 arabic letter sheen +1EE57 ARABIC MATHEMATICAL TAILED KHAH + # <font> 062E arabic letter khah +1EE59 ARABIC MATHEMATICAL TAILED DAD + # <font> 0636 arabic letter dad +1EE5B ARABIC MATHEMATICAL TAILED GHAIN + # <font> 063A arabic letter ghain +1EE5D ARABIC MATHEMATICAL TAILED DOTLESS NOON + # <font> 06BA arabic letter noon ghunna +1EE5F ARABIC MATHEMATICAL TAILED DOTLESS QAF + # <font> 066F arabic letter dotless qaf +@ Stretched symbols +1EE61 ARABIC MATHEMATICAL STRETCHED BEH + # <font> 0628 arabic letter beh +1EE62 ARABIC MATHEMATICAL STRETCHED JEEM + # <font> 062C arabic letter jeem +1EE64 ARABIC MATHEMATICAL STRETCHED HEH + # <font> 0647 arabic letter heh +1EE67 ARABIC MATHEMATICAL STRETCHED HAH + # <font> 062D arabic letter hah +1EE68 ARABIC MATHEMATICAL STRETCHED TAH + # <font> 0637 arabic letter tah +1EE69 ARABIC MATHEMATICAL STRETCHED YEH + # <font> 064A arabic letter yeh +1EE6A ARABIC MATHEMATICAL STRETCHED KAF + # <font> 0643 arabic letter kaf +1EE6C ARABIC MATHEMATICAL STRETCHED MEEM + # <font> 0645 arabic letter meem +1EE6D ARABIC MATHEMATICAL STRETCHED NOON + # <font> 0646 arabic letter noon +1EE6E ARABIC MATHEMATICAL STRETCHED SEEN + # <font> 0633 arabic letter seen +1EE6F ARABIC MATHEMATICAL STRETCHED AIN + # <font> 0639 arabic letter ain +1EE70 ARABIC MATHEMATICAL STRETCHED FEH + # <font> 0641 arabic letter feh +1EE71 ARABIC MATHEMATICAL STRETCHED SAD + # <font> 0635 arabic letter sad +1EE72 ARABIC MATHEMATICAL STRETCHED QAF + # <font> 0642 arabic letter qaf +1EE74 ARABIC MATHEMATICAL STRETCHED SHEEN + # <font> 0634 arabic letter sheen +1EE75 ARABIC MATHEMATICAL STRETCHED TEH + # <font> 062A arabic letter teh +1EE76 ARABIC MATHEMATICAL STRETCHED THEH + # <font> 062B arabic letter theh +1EE77 ARABIC MATHEMATICAL STRETCHED KHAH + # <font> 062E arabic letter khah +1EE79 ARABIC MATHEMATICAL STRETCHED DAD + # <font> 0636 arabic letter dad +1EE7A ARABIC MATHEMATICAL STRETCHED ZAH + # <font> 0638 arabic letter zah +1EE7B ARABIC MATHEMATICAL STRETCHED GHAIN + # <font> 063A arabic letter ghain +1EE7C ARABIC MATHEMATICAL STRETCHED DOTLESS BEH + # <font> 066E arabic letter dotless beh +1EE7E ARABIC MATHEMATICAL STRETCHED DOTLESS FEH + # <font> 06A1 arabic letter dotless feh +@ Looped symbols +1EE80 ARABIC MATHEMATICAL LOOPED ALEF + # <font> 0627 arabic letter alef +1EE81 ARABIC MATHEMATICAL LOOPED BEH + # <font> 0628 arabic letter beh +1EE82 ARABIC MATHEMATICAL LOOPED JEEM + # <font> 062C arabic letter jeem +1EE83 ARABIC MATHEMATICAL LOOPED DAL + # <font> 062F arabic letter dal +1EE84 ARABIC MATHEMATICAL LOOPED HEH + # <font> 0647 arabic letter heh +1EE85 ARABIC MATHEMATICAL LOOPED WAW + # <font> 0648 arabic letter waw +1EE86 ARABIC MATHEMATICAL LOOPED ZAIN + # <font> 0632 arabic letter zain +1EE87 ARABIC MATHEMATICAL LOOPED HAH + # <font> 062D arabic letter hah +1EE88 ARABIC MATHEMATICAL LOOPED TAH + # <font> 0637 arabic letter tah +1EE89 ARABIC MATHEMATICAL LOOPED YEH + # <font> 064A arabic letter yeh +1EE8B ARABIC MATHEMATICAL LOOPED LAM + # <font> 0644 arabic letter lam +1EE8C ARABIC MATHEMATICAL LOOPED MEEM + # <font> 0645 arabic letter meem +1EE8D ARABIC MATHEMATICAL LOOPED NOON + # <font> 0646 arabic letter noon +1EE8E ARABIC MATHEMATICAL LOOPED SEEN + # <font> 0633 arabic letter seen +1EE8F ARABIC MATHEMATICAL LOOPED AIN + # <font> 0639 arabic letter ain +1EE90 ARABIC MATHEMATICAL LOOPED FEH + # <font> 0641 arabic letter feh +1EE91 ARABIC MATHEMATICAL LOOPED SAD + # <font> 0635 arabic letter sad +1EE92 ARABIC MATHEMATICAL LOOPED QAF + # <font> 0642 arabic letter qaf +1EE93 ARABIC MATHEMATICAL LOOPED REH + # <font> 0631 arabic letter reh +1EE94 ARABIC MATHEMATICAL LOOPED SHEEN + # <font> 0634 arabic letter sheen +1EE95 ARABIC MATHEMATICAL LOOPED TEH + # <font> 062A arabic letter teh +1EE96 ARABIC MATHEMATICAL LOOPED THEH + # <font> 062B arabic letter theh +1EE97 ARABIC MATHEMATICAL LOOPED KHAH + # <font> 062E arabic letter khah +1EE98 ARABIC MATHEMATICAL LOOPED THAL + # <font> 0630 arabic letter thal +1EE99 ARABIC MATHEMATICAL LOOPED DAD + # <font> 0636 arabic letter dad +1EE9A ARABIC MATHEMATICAL LOOPED ZAH + # <font> 0638 arabic letter zah +1EE9B ARABIC MATHEMATICAL LOOPED GHAIN + # <font> 063A arabic letter ghain +@ Double-struck symbols +1EEA1 ARABIC MATHEMATICAL DOUBLE-STRUCK BEH + # <font> 0628 arabic letter beh +1EEA2 ARABIC MATHEMATICAL DOUBLE-STRUCK JEEM + # <font> 062C arabic letter jeem +1EEA3 ARABIC MATHEMATICAL DOUBLE-STRUCK DAL + # <font> 062F arabic letter dal +1EEA5 ARABIC MATHEMATICAL DOUBLE-STRUCK WAW + # <font> 0648 arabic letter waw +1EEA6 ARABIC MATHEMATICAL DOUBLE-STRUCK ZAIN + # <font> 0632 arabic letter zain +1EEA7 ARABIC MATHEMATICAL DOUBLE-STRUCK HAH + # <font> 062D arabic letter hah +1EEA8 ARABIC MATHEMATICAL DOUBLE-STRUCK TAH + # <font> 0637 arabic letter tah +1EEA9 ARABIC MATHEMATICAL DOUBLE-STRUCK YEH + # <font> 064A arabic letter yeh +1EEAB ARABIC MATHEMATICAL DOUBLE-STRUCK LAM + # <font> 0644 arabic letter lam +1EEAC ARABIC MATHEMATICAL DOUBLE-STRUCK MEEM + # <font> 0645 arabic letter meem +1EEAD ARABIC MATHEMATICAL DOUBLE-STRUCK NOON + # <font> 0646 arabic letter noon +1EEAE ARABIC MATHEMATICAL DOUBLE-STRUCK SEEN + # <font> 0633 arabic letter seen +1EEAF ARABIC MATHEMATICAL DOUBLE-STRUCK AIN + # <font> 0639 arabic letter ain +1EEB0 ARABIC MATHEMATICAL DOUBLE-STRUCK FEH + # <font> 0641 arabic letter feh +1EEB1 ARABIC MATHEMATICAL DOUBLE-STRUCK SAD + # <font> 0635 arabic letter sad +1EEB2 ARABIC MATHEMATICAL DOUBLE-STRUCK QAF + # <font> 0642 arabic letter qaf +1EEB3 ARABIC MATHEMATICAL DOUBLE-STRUCK REH + # <font> 0631 arabic letter reh +1EEB4 ARABIC MATHEMATICAL DOUBLE-STRUCK SHEEN + # <font> 0634 arabic letter sheen +1EEB5 ARABIC MATHEMATICAL DOUBLE-STRUCK TEH + # <font> 062A arabic letter teh +1EEB6 ARABIC MATHEMATICAL DOUBLE-STRUCK THEH + # <font> 062B arabic letter theh +1EEB7 ARABIC MATHEMATICAL DOUBLE-STRUCK KHAH + # <font> 062E arabic letter khah +1EEB8 ARABIC MATHEMATICAL DOUBLE-STRUCK THAL + # <font> 0630 arabic letter thal +1EEB9 ARABIC MATHEMATICAL DOUBLE-STRUCK DAD + # <font> 0636 arabic letter dad +1EEBA ARABIC MATHEMATICAL DOUBLE-STRUCK ZAH + # <font> 0638 arabic letter zah +1EEBB ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN + # <font> 063A arabic letter ghain +@ Stretching operators +@+ The following operators stretch based on the width of the text that is displayed below or above them. +1EEF0 ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL + * used in Arabic mathematics to denote summation + * stretched at the tatweel + x (n-ary summation - 2211) +1EEF1 ARABIC MATHEMATICAL OPERATOR HAH WITH DAL + * used in Persian mathematics to denote limits + * stretched between the hah and the dal @@ 1F000 Mahjong Tiles 1F02F @ Prevailing wind tiles 1F000 MAHJONG TILE EAST WIND @@ -34238,7 +35518,8 @@ FFFF <not a character> 1F092 DOMINO TILE VERTICAL-06-05 1F093 DOMINO TILE VERTICAL-06-06 @@ 1F0A0 Playing Cards 1F0FF -@+ These characters are used to represent the 52-card and 56-card variants of modern playing cards, as well as the 56-card Minor Arcana of the Western Tarot. +@+ These characters are used to represent the 52-card and 56-card variants of modern playing cards, as well as the 56-card Minor Arcana of the Western Tarot. The glyphs shown in the charts have only a symbolic and schematic equivalence to particular varieties of actual playing cards. +@ Back of card 1F0A0 PLAYING CARD BACK @ Spades or swords 1F0A1 PLAYING CARD ACE OF SPADES @@ -34293,6 +35574,7 @@ FFFF <not a character> 1F0CC PLAYING CARD KNIGHT OF DIAMONDS 1F0CD PLAYING CARD QUEEN OF DIAMONDS 1F0CE PLAYING CARD KING OF DIAMONDS +@ Joker 1F0CF PLAYING CARD BLACK JOKER @ Clubs or wands 1F0D1 PLAYING CARD ACE OF CLUBS @@ -34309,6 +35591,7 @@ FFFF <not a character> 1F0DC PLAYING CARD KNIGHT OF CLUBS 1F0DD PLAYING CARD QUEEN OF CLUBS 1F0DE PLAYING CARD KING OF CLUBS +@ Joker 1F0DF PLAYING CARD WHITE JOKER * may also be red @@ 1F100 Enclosed Alphanumeric Supplement 1F1FF @@ -34533,6 +35816,17 @@ FFFF <not a character> 1F167 NEGATIVE CIRCLED LATIN CAPITAL LETTER X 1F168 NEGATIVE CIRCLED LATIN CAPITAL LETTER Y 1F169 NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +@ Raised squared Latin sequences +1F16A RAISED MC SIGN + = marque de commerce + * used in Canada + x (trade mark sign - 2122) + # <super> 004D 0043 +1F16B RAISED MD SIGN + = marque déposée + * used in Canada + x (registered sign - 00AE) + # <super> 004D 0044 @ White on black squared Latin letters @+ The square edges may be slightly rounded. 1F170 NEGATIVE SQUARED LATIN CAPITAL LETTER A @@ -35330,6 +36624,7 @@ FFFF <not a character> @ Comic style symbols 1F4A0 DIAMOND SHAPE WITH A DOT INSIDE = kawaii, cute + * meaning of cuteness is based on association of glyph with shape of a flower x (white diamond with centred dot - 27D0) 1F4A1 ELECTRIC LIGHT BULB = idea @@ -35565,6 +36860,16 @@ FFFF <not a character> 1F53D DOWN-POINTING SMALL RED TRIANGLE = play arrow down x (black down-pointing small triangle - 25BE) +@ Religious symbols +1F540 CIRCLED CROSS POMMEE + * Orthodox typikon symbol for great feast service +1F541 CROSS POMMEE WITH HALF-CIRCLE BELOW + * Orthodox typikon symbol for vigil service +1F542 CROSS POMMEE + * Orthodox typikon symbol for Polyeleos + x (four teardrop-spoked asterisk - 2722) +1F543 NOTCHED LEFT SEMICIRCLE WITH THREE DOTS + * Orthodox typikon symbol for lower rank feast @ Clock face symbols 1F550 CLOCK FACE ONE OCLOCK x (watch - 231A) @@ -35602,6 +36907,7 @@ FFFF <not a character> @@ 1F600 Emoticons 1F64F @+ The emoticons have been organized by mouth shape to make it easier to locate the different characters in the code chart. @ Faces +1F600 GRINNING FACE 1F601 GRINNING FACE WITH SMILING EYES 1F602 FACE WITH TEARS OF JOY 1F603 SMILING FACE WITH OPEN MOUTH @@ -35620,33 +36926,45 @@ FFFF <not a character> 1F60F SMIRKING FACE 1F610 NEUTRAL FACE * used for the West Wind in some Mahjong annotation +1F611 EXPRESSIONLESS FACE 1F612 UNAMUSED FACE 1F613 FACE WITH COLD SWEAT 1F614 PENSIVE FACE +1F615 CONFUSED FACE 1F616 CONFOUNDED FACE +1F617 KISSING FACE 1F618 FACE THROWING A KISS +1F619 KISSING FACE WITH SMILING EYES 1F61A KISSING FACE WITH CLOSED EYES +1F61B FACE WITH STUCK-OUT TONGUE 1F61C FACE WITH STUCK-OUT TONGUE AND WINKING EYE * kidding, not serious 1F61D FACE WITH STUCK-OUT TONGUE AND TIGHTLY-CLOSED EYES * kidding, not serious 1F61E DISAPPOINTED FACE x (white frowning face - 2639) +1F61F WORRIED FACE 1F620 ANGRY FACE 1F621 POUTING FACE 1F622 CRYING FACE 1F623 PERSEVERING FACE 1F624 FACE WITH LOOK OF TRIUMPH 1F625 DISAPPOINTED BUT RELIEVED FACE +1F626 FROWNING FACE WITH OPEN MOUTH +1F627 ANGUISHED FACE 1F628 FEARFUL FACE 1F629 WEARY FACE 1F62A SLEEPY FACE 1F62B TIRED FACE +1F62C GRIMACING FACE 1F62D LOUDLY CRYING FACE +1F62E FACE WITH OPEN MOUTH +1F62F HUSHED FACE 1F630 FACE WITH OPEN MOUTH AND COLD SWEAT 1F631 FACE SCREAMING IN FEAR 1F632 ASTONISHED FACE 1F633 FLUSHED FACE +1F634 SLEEPING FACE 1F635 DIZZY FACE 1F636 FACE WITHOUT MOUTH * used for the South Wind in some Mahjong annotation @@ -35981,6 +37299,7 @@ FFFF <not a character> 1F76C ALCHEMICAL SYMBOL FOR BATH OF VAPOURS = balneum vaporis 1F76D ALCHEMICAL SYMBOL FOR RETORT + x (alembic - 2697) @ Time 1F76E ALCHEMICAL SYMBOL FOR HOUR x (hourglass - 231B) diff --git a/lib/unicore/NormalizationCorrections.txt b/lib/unicore/NormalizationCorrections.txt index 9c9c2e4420..61800b82ad 100644 --- a/lib/unicore/NormalizationCorrections.txt +++ b/lib/unicore/NormalizationCorrections.txt @@ -1,14 +1,14 @@ -# NormalizationCorrections-6.0.0.txt -# Date: 2010-05-19, 11:21:00 PDT [KW] +# NormalizationCorrections-6.1.0.txt +# Date: 2011-06-23, 00:46:00 GMT [KW, LI] # # This file is a normative contributory data file in the # Unicode Character Database. # -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # -# The normalization stabilization policy of the Unicode -# Consortium ordinarily precludes any change to the decomposition +# The normalization stability policy of the Unicode Consortium +# ordinarily precludes any change to the decomposition # for any character, once established in a relevant version # of the UnicodeData.txt data file. However, under certain # exceptional (and rare) conditions, an error in a decomposition diff --git a/lib/unicore/PropList.txt b/lib/unicore/PropList.txt index eeeb81845e..f9dcb2ae74 100644 --- a/lib/unicore/PropList.txt +++ b/lib/unicore/PropList.txt @@ -1,8 +1,8 @@ -# PropList-6.0.0.txt -# Date: 2010-08-19, 00:48:28 GMT [MD] +# PropList-6.1.0.txt +# Date: 2011-11-30, 01:49:54 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -50,6 +50,7 @@ 2212 ; Dash # Sm MINUS SIGN 2E17 ; Dash # Pd DOUBLE OBLIQUE HYPHEN 2E1A ; Dash # Pd HYPHEN WITH DIAERESIS +2E3A..2E3B ; Dash # Pd [2] TWO-EM DASH..THREE-EM DASH 301C ; Dash # Pd WAVE DASH 3030 ; Dash # Pd WAVY DASH 30A0 ; Dash # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN @@ -58,7 +59,7 @@ FE58 ; Dash # Pd SMALL EM DASH FE63 ; Dash # Pd SMALL HYPHEN-MINUS FF0D ; Dash # Pd FULLWIDTH HYPHEN-MINUS -# Total code points: 25 +# Total code points: 27 # ================================================ @@ -158,6 +159,7 @@ A92F ; Terminal_Punctuation # Po KAYAH LI SIGN SHYA A9C7..A9C9 ; Terminal_Punctuation # Po [3] JAVANESE PADA PANGKAT..JAVANESE PADA LUNGSI AA5D..AA5F ; Terminal_Punctuation # Po [3] CHAM PUNCTUATION DANDA..CHAM PUNCTUATION TRIPLE DANDA AADF ; Terminal_Punctuation # Po TAI VIET SYMBOL KOI KOI +AAF0..AAF1 ; Terminal_Punctuation # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM ABEB ; Terminal_Punctuation # Po MEETEI MAYEK CHEIKHEI FE50..FE52 ; Terminal_Punctuation # Po [3] SMALL COMMA..SMALL FULL STOP FE54..FE57 ; Terminal_Punctuation # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK @@ -175,9 +177,11 @@ FF64 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA 10B3A..10B3F ; Terminal_Punctuation # Po [6] TINY TWO DOTS OVER ONE DOT PUNCTUATION..LARGE ONE RING OVER TWO RINGS PUNCTUATION 11047..1104D ; Terminal_Punctuation # Po [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS 110BE..110C1 ; Terminal_Punctuation # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +11141..11143 ; Terminal_Punctuation # Po [3] CHAKMA DANDA..CHAKMA QUESTION MARK +111C5..111C6 ; Terminal_Punctuation # Po [2] SHARADA DANDA..SHARADA DOUBLE DANDA 12470..12473 ; Terminal_Punctuation # Po [4] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON -# Total code points: 169 +# Total code points: 176 # ================================================ @@ -320,8 +324,41 @@ FF3E ; Other_Math # Sk FULLWIDTH CIRCUMFLEX ACCENT 1D7AA..1D7C2 ; Other_Math # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA 1D7C4..1D7CB ; Other_Math # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1D7CE..1D7FF ; Other_Math # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE - -# Total code points: 1217 +1EE00..1EE03 ; Other_Math # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; Other_Math # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; Other_Math # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; Other_Math # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; Other_Math # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; Other_Math # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; Other_Math # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; Other_Math # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; Other_Math # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; Other_Math # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; Other_Math # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; Other_Math # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; Other_Math # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; Other_Math # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; Other_Math # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; Other_Math # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; Other_Math # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; Other_Math # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; Other_Math # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; Other_Math # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; Other_Math # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; Other_Math # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; Other_Math # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN + +# Total code points: 1358 # ================================================ @@ -365,6 +402,8 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 081B..0823 ; Other_Alphabetic # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A 0825..0827 ; Other_Alphabetic # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082C ; Other_Alphabetic # Mn [4] SAMARITAN VOWEL SIGN LONG I..SAMARITAN VOWEL SIGN SUKUN +08E4..08E9 ; Other_Alphabetic # Mn [6] ARABIC CURLY FATHA..ARABIC CURLY KASRATAN +08F0..08FE ; Other_Alphabetic # Mn [15] ARABIC OPEN FATHATAN..ARABIC DAMMA WITH DOT 0900..0902 ; Other_Alphabetic # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 0903 ; Other_Alphabetic # Mc DEVANAGARI SIGN VISARGA 093A ; Other_Alphabetic # Mn DEVANAGARI VOWEL SIGN OE @@ -525,6 +564,7 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 1BA2..1BA5 ; Other_Alphabetic # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU 1BA6..1BA7 ; Other_Alphabetic # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG 1BA8..1BA9 ; Other_Alphabetic # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAC..1BAD ; Other_Alphabetic # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA 1BE7 ; Other_Alphabetic # Mc BATAK VOWEL SIGN E 1BE8..1BE9 ; Other_Alphabetic # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE 1BEA..1BEC ; Other_Alphabetic # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O @@ -534,9 +574,11 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 1C24..1C2B ; Other_Alphabetic # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU 1C2C..1C33 ; Other_Alphabetic # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T 1C34..1C35 ; Other_Alphabetic # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG -1CF2 ; Other_Alphabetic # Mc VEDIC SIGN ARDHAVISARGA +1CF2..1CF3 ; Other_Alphabetic # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA 24B6..24E9 ; Other_Alphabetic # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z 2DE0..2DFF ; Other_Alphabetic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS +A674..A67B ; Other_Alphabetic # Mn [8] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC LETTER OMEGA +A69F ; Other_Alphabetic # Mn COMBINING CYRILLIC LETTER IOTIFIED E A823..A824 ; Other_Alphabetic # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I A825..A826 ; Other_Alphabetic # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E A827 ; Other_Alphabetic # Mc SYLOTI NAGRI VOWEL SIGN OO @@ -564,6 +606,10 @@ AAB0 ; Other_Alphabetic # Mn TAI VIET MAI KANG AAB2..AAB4 ; Other_Alphabetic # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U AAB7..AAB8 ; Other_Alphabetic # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA AABE ; Other_Alphabetic # Mn TAI VIET VOWEL AM +AAEB ; Other_Alphabetic # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; Other_Alphabetic # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF5 ; Other_Alphabetic # Mc MEETEI MAYEK VOWEL SIGN VISARGA ABE3..ABE4 ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP ABE5 ; Other_Alphabetic # Mn MEETEI MAYEK VOWEL SIGN ANAP ABE6..ABE7 ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP @@ -581,8 +627,23 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 110B0..110B2 ; Other_Alphabetic # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II 110B3..110B6 ; Other_Alphabetic # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B7..110B8 ; Other_Alphabetic # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU - -# Total code points: 795 +11100..11102 ; Other_Alphabetic # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; Other_Alphabetic # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; Other_Alphabetic # Mc CHAKMA VOWEL SIGN E +1112D..11132 ; Other_Alphabetic # Mn [6] CHAKMA VOWEL SIGN AI..CHAKMA AU MARK +11180..11181 ; Other_Alphabetic # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; Other_Alphabetic # Mc SHARADA SIGN VISARGA +111B3..111B5 ; Other_Alphabetic # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; Other_Alphabetic # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF ; Other_Alphabetic # Mc SHARADA VOWEL SIGN AU +116AB ; Other_Alphabetic # Mn TAKRI SIGN ANUSVARA +116AC ; Other_Alphabetic # Mc TAKRI SIGN VISARGA +116AD ; Other_Alphabetic # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; Other_Alphabetic # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; Other_Alphabetic # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +16F51..16F7E ; Other_Alphabetic # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG + +# Total code points: 922 # ================================================ @@ -591,16 +652,15 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 3021..3029 ; Ideographic # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE 3038..303A ; Ideographic # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY 3400..4DB5 ; Ideographic # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 -4E00..9FCB ; Ideographic # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB -F900..FA2D ; Ideographic # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; Ideographic # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +4E00..9FCC ; Ideographic # Lo [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC +F900..FA6D ; Ideographic # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 20000..2A6D6 ; Ideographic # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 2A700..2B734 ; Ideographic # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 2B740..2B81D ; Ideographic # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2F800..2FA1D ; Ideographic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 75630 +# Total code points: 75633 # ================================================ @@ -645,6 +705,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 07EB..07F3 ; Diacritic # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE 07F4..07F5 ; Diacritic # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE 0818..0819 ; Diacritic # Mn [2] SAMARITAN MARK OCCLUSION..SAMARITAN MARK DAGESH +08E4..08FE ; Diacritic # Mn [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT 093C ; Diacritic # Mn DEVANAGARI SIGN NUKTA 094D ; Diacritic # Mn DEVANAGARI SIGN VIRAMA 0951..0954 ; Diacritic # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT @@ -689,6 +750,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 1B44 ; Diacritic # Mc BALINESE ADEG ADEG 1B6B..1B73 ; Diacritic # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG 1BAA ; Diacritic # Mc SUNDANESE SIGN PAMAAEH +1BAB ; Diacritic # Mn SUNDANESE SIGN VIRAMA 1C36..1C37 ; Diacritic # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA 1C78..1C7D ; Diacritic # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD 1CD0..1CD2 ; Diacritic # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA @@ -697,8 +759,8 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 1CE1 ; Diacritic # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA 1CE2..1CE8 ; Diacritic # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL 1CED ; Diacritic # Mn VEDIC SIGN TIRYAK -1D2C..1D61 ; Diacritic # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D6A ; Diacritic # L& [9] LATIN SUBSCRIPT SMALL LETTER I..GREEK SUBSCRIPT SMALL LETTER CHI +1CF4 ; Diacritic # Mn VEDIC TONE CANDRA ABOVE +1D2C..1D6A ; Diacritic # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI 1DC4..1DCF ; Diacritic # Mn [12] COMBINING MACRON-ACUTE..COMBINING ZIGZAG BELOW 1DFD..1DFF ; Diacritic # Mn [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW 1FBD ; Diacritic # Sk GREEK KORONIS @@ -709,7 +771,8 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 1FFD..1FFE ; Diacritic # Sk [2] GREEK OXIA..GREEK DASIA 2CEF..2CF1 ; Diacritic # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS 2E2F ; Diacritic # Lm VERTICAL TILDE -302A..302F ; Diacritic # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK +302A..302D ; Diacritic # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; Diacritic # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK 3099..309A ; Diacritic # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK 309B..309C ; Diacritic # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK 30FC ; Diacritic # Lm KATAKANA-HIRAGANA PROLONGED SOUND MARK @@ -720,6 +783,7 @@ A6F0..A6F1 ; Diacritic # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINI A717..A71F ; Diacritic # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK A720..A721 ; Diacritic # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE A788 ; Diacritic # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT +A7F8..A7F9 ; Diacritic # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A8C4 ; Diacritic # Mn SAURASHTRA SIGN VIRAMA A8E0..A8F1 ; Diacritic # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA A92B..A92D ; Diacritic # Mn [3] KAYAH LI TONE PLOPHU..KAYAH LI TONE CALYA PLOPHU @@ -732,6 +796,7 @@ AABF ; Diacritic # Mn TAI VIET TONE MAI EK AAC0 ; Diacritic # Lo TAI VIET TONE MAI NUENG AAC1 ; Diacritic # Mn TAI VIET TONE MAI THO AAC2 ; Diacritic # Lo TAI VIET TONE MAI SONG +AAF6 ; Diacritic # Mn MEETEI MAYEK VIRAMA ABEC ; Diacritic # Mc MEETEI MAYEK LUM IYEK ABED ; Diacritic # Mn MEETEI MAYEK APUN IYEK FB1E ; Diacritic # Mn HEBREW POINT JUDEO-SPANISH VARIKA @@ -742,13 +807,19 @@ FF70 ; Diacritic # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND FF9E..FF9F ; Diacritic # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK FFE3 ; Diacritic # Sk FULLWIDTH MACRON 110B9..110BA ; Diacritic # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +11133..11134 ; Diacritic # Mn [2] CHAKMA VIRAMA..CHAKMA MAAYYAA +111C0 ; Diacritic # Mc SHARADA SIGN VIRAMA +116B6 ; Diacritic # Mc TAKRI SIGN VIRAMA +116B7 ; Diacritic # Mn TAKRI SIGN NUKTA +16F8F..16F92 ; Diacritic # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16F93..16F9F ; Diacritic # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 1D167..1D169 ; Diacritic # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 1D16D..1D172 ; Diacritic # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 1D17B..1D182 ; Diacritic # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE 1D185..1D18B ; Diacritic # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE 1D1AA..1D1AD ; Diacritic # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO -# Total code points: 639 +# Total code points: 693 # ================================================ @@ -758,6 +829,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 07FA ; Extender # Lm NKO LAJANYALAN 0E46 ; Extender # Lm THAI CHARACTER MAIYAMOK 0EC6 ; Extender # Lm LAO KO LA +180A ; Extender # Po MONGOLIAN NIRUGU 1843 ; Extender # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN 1AA7 ; Extender # Lm TAI THAM SIGN MAI YAMOK 1C36 ; Extender # Mn LEPCHA SIGN RAN @@ -771,27 +843,33 @@ A60C ; Extender # Lm VAI SYLLABLE LENGTHENER A9CF ; Extender # Lm JAVANESE PANGRANGKEP AA70 ; Extender # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION AADD ; Extender # Lm TAI VIET SYMBOL SAM +AAF3..AAF4 ; Extender # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK -# Total code points: 28 +# Total code points: 31 # ================================================ +00AA ; Other_Lowercase # Lo FEMININE ORDINAL INDICATOR +00BA ; Other_Lowercase # Lo MASCULINE ORDINAL INDICATOR 02B0..02B8 ; Other_Lowercase # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y 02C0..02C1 ; Other_Lowercase # Lm [2] MODIFIER LETTER GLOTTAL STOP..MODIFIER LETTER REVERSED GLOTTAL STOP 02E0..02E4 ; Other_Lowercase # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP 0345 ; Other_Lowercase # Mn COMBINING GREEK YPOGEGRAMMENI 037A ; Other_Lowercase # Lm GREEK YPOGEGRAMMENI -1D2C..1D61 ; Other_Lowercase # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI +1D2C..1D6A ; Other_Lowercase # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI 1D78 ; Other_Lowercase # Lm MODIFIER LETTER CYRILLIC EN 1D9B..1DBF ; Other_Lowercase # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA -2090..2094 ; Other_Lowercase # Lm [5] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA +2071 ; Other_Lowercase # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; Other_Lowercase # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; Other_Lowercase # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T 2170..217F ; Other_Lowercase # Nl [16] SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL ONE THOUSAND 24D0..24E9 ; Other_Lowercase # So [26] CIRCLED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z -2C7D ; Other_Lowercase # Lm MODIFIER LETTER CAPITAL V +2C7C..2C7D ; Other_Lowercase # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V A770 ; Other_Lowercase # Lm MODIFIER LETTER US +A7F8..A7F9 ; Other_Lowercase # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE -# Total code points: 159 +# Total code points: 183 # ================================================ @@ -838,11 +916,12 @@ FFFFE..FFFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-FFFFE>..<noncha 0DCF ; Other_Grapheme_Extend # Mc SINHALA VOWEL SIGN AELA-PILLA 0DDF ; Other_Grapheme_Extend # Mc SINHALA VOWEL SIGN GAYANUKITTA 200C..200D ; Other_Grapheme_Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER +302E..302F ; Other_Grapheme_Extend # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK FF9E..FF9F ; Other_Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK 1D165 ; Other_Grapheme_Extend # Mc MUSICAL SYMBOL COMBINING STEM 1D16E..1D172 ; Other_Grapheme_Extend # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5 -# Total code points: 23 +# Total code points: 25 # ================================================ @@ -868,7 +947,7 @@ FF9E..FF9F ; Other_Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND # ================================================ 3400..4DB5 ; Unified_Ideograph # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 -4E00..9FCB ; Unified_Ideograph # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB +4E00..9FCC ; Unified_Ideograph # Lo [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC FA0E..FA0F ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA0E..CJK COMPATIBILITY IDEOGRAPH-FA0F FA11 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA11 FA13..FA14 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA13..CJK COMPATIBILITY IDEOGRAPH-FA14 @@ -880,12 +959,13 @@ FA27..FA29 ; Unified_Ideograph # Lo [3] CJK COMPATIBILITY IDEOGRAPH-FA27..C 2A700..2B734 ; Unified_Ideograph # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 2B740..2B81D ; Unified_Ideograph # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D -# Total code points: 74616 +# Total code points: 74617 # ================================================ 034F ; Other_Default_Ignorable_Code_Point # Mn COMBINING GRAPHEME JOINER 115F..1160 ; Other_Default_Ignorable_Code_Point # Lo [2] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG FILLER +17B4..17B5 ; Other_Default_Ignorable_Code_Point # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 2065..2069 ; Other_Default_Ignorable_Code_Point # Cn [5] <reserved-2065>..<reserved-2069> 3164 ; Other_Default_Ignorable_Code_Point # Lo HANGUL FILLER FFA0 ; Other_Default_Ignorable_Code_Point # Lo HALFWIDTH HANGUL FILLER @@ -895,7 +975,7 @@ E0002..E001F ; Other_Default_Ignorable_Code_Point # Cn [30] <reserved-E0002>.. E0080..E00FF ; Other_Default_Ignorable_Code_Point # Cn [128] <reserved-E0080>..<reserved-E00FF> E01F0..E0FFF ; Other_Default_Ignorable_Code_Point # Cn [3600] <reserved-E01F0>..<reserved-E0FFF> -# Total code points: 3778 +# Total code points: 3780 # ================================================ @@ -923,7 +1003,7 @@ E0020..E007F ; Deprecated # Cf [96] TAG SPACE..CANCEL TAG 03F3 ; Soft_Dotted # L& GREEK LETTER YOT 0456 ; Soft_Dotted # L& CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I 0458 ; Soft_Dotted # L& CYRILLIC SMALL LETTER JE -1D62 ; Soft_Dotted # L& LATIN SUBSCRIPT SMALL LETTER I +1D62 ; Soft_Dotted # Lm LATIN SUBSCRIPT SMALL LETTER I 1D96 ; Soft_Dotted # L& LATIN SMALL LETTER I WITH RETROFLEX HOOK 1DA4 ; Soft_Dotted # Lm MODIFIER LETTER SMALL I WITH STROKE 1DA8 ; Soft_Dotted # Lm MODIFIER LETTER SMALL J WITH CROSSED-TAIL @@ -931,7 +1011,7 @@ E0020..E007F ; Deprecated # Cf [96] TAG SPACE..CANCEL TAG 1ECB ; Soft_Dotted # L& LATIN SMALL LETTER I WITH DOT BELOW 2071 ; Soft_Dotted # Lm SUPERSCRIPT LATIN SMALL LETTER I 2148..2149 ; Soft_Dotted # L& [2] DOUBLE-STRUCK ITALIC SMALL I..DOUBLE-STRUCK ITALIC SMALL J -2C7C ; Soft_Dotted # L& LATIN SUBSCRIPT SMALL LETTER J +2C7C ; Soft_Dotted # Lm LATIN SUBSCRIPT SMALL LETTER J 1D422..1D423 ; Soft_Dotted # L& [2] MATHEMATICAL BOLD SMALL I..MATHEMATICAL BOLD SMALL J 1D456..1D457 ; Soft_Dotted # L& [2] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL ITALIC SMALL J 1D48A..1D48B ; Soft_Dotted # L& [2] MATHEMATICAL BOLD ITALIC SMALL I..MATHEMATICAL BOLD ITALIC SMALL J @@ -1014,6 +1094,7 @@ A8CE..A8CF ; STerm # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA A92F ; STerm # Po KAYAH LI SIGN SHYA A9C8..A9C9 ; STerm # Po [2] JAVANESE PADA LINGSA..JAVANESE PADA LUNGSI AA5D..AA5F ; STerm # Po [3] CHAM PUNCTUATION DANDA..CHAM PUNCTUATION TRIPLE DANDA +AAF0..AAF1 ; STerm # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM ABEB ; STerm # Po MEETEI MAYEK CHEIKHEI FE52 ; STerm # Po SMALL FULL STOP FE56..FE57 ; STerm # Po [2] SMALL QUESTION MARK..SMALL EXCLAMATION MARK @@ -1024,8 +1105,10 @@ FF61 ; STerm # Po HALFWIDTH IDEOGRAPHIC FULL STOP 10A56..10A57 ; STerm # Po [2] KHAROSHTHI PUNCTUATION DANDA..KHAROSHTHI PUNCTUATION DOUBLE DANDA 11047..11048 ; STerm # Po [2] BRAHMI DANDA..BRAHMI DOUBLE DANDA 110BE..110C1 ; STerm # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +11141..11143 ; STerm # Po [3] CHAKMA DANDA..CHAKMA QUESTION MARK +111C5..111C6 ; STerm # Po [2] SHARADA DANDA..SHARADA DOUBLE DANDA -# Total code points: 76 +# Total code points: 83 # ================================================ @@ -1072,14 +1155,15 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S 007E ; Pattern_Syntax # Sm TILDE 00A1 ; Pattern_Syntax # Po INVERTED EXCLAMATION MARK 00A2..00A5 ; Pattern_Syntax # Sc [4] CENT SIGN..YEN SIGN -00A6..00A7 ; Pattern_Syntax # So [2] BROKEN BAR..SECTION SIGN +00A6 ; Pattern_Syntax # So BROKEN BAR +00A7 ; Pattern_Syntax # Po SECTION SIGN 00A9 ; Pattern_Syntax # So COPYRIGHT SIGN 00AB ; Pattern_Syntax # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 00AC ; Pattern_Syntax # Sm NOT SIGN 00AE ; Pattern_Syntax # So REGISTERED SIGN 00B0 ; Pattern_Syntax # So DEGREE SIGN 00B1 ; Pattern_Syntax # Sm PLUS-MINUS SIGN -00B6 ; Pattern_Syntax # So PILCROW SIGN +00B6 ; Pattern_Syntax # Po PILCROW SIGN 00BB ; Pattern_Syntax # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 00BF ; Pattern_Syntax # Po INVERTED QUESTION MARK 00D7 ; Pattern_Syntax # Sm MULTIPLICATION SIGN @@ -1173,11 +1257,7 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S 27C0..27C4 ; Pattern_Syntax # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET 27C5 ; Pattern_Syntax # Ps LEFT S-SHAPED BAG DELIMITER 27C6 ; Pattern_Syntax # Pe RIGHT S-SHAPED BAG DELIMITER -27C7..27CA ; Pattern_Syntax # Sm [4] OR WITH DOT INSIDE..VERTICAL BAR WITH HORIZONTAL STROKE -27CB ; Pattern_Syntax # Cn <reserved-27CB> -27CC ; Pattern_Syntax # Sm LONG DIVISION -27CD ; Pattern_Syntax # Cn <reserved-27CD> -27CE..27E5 ; Pattern_Syntax # Sm [24] SQUARED LOGICAL AND..WHITE SQUARE WITH RIGHTWARDS TICK +27C7..27E5 ; Pattern_Syntax # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK 27E6 ; Pattern_Syntax # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET 27E7 ; Pattern_Syntax # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET 27E8 ; Pattern_Syntax # Ps MATHEMATICAL LEFT ANGLE BRACKET @@ -1260,8 +1340,9 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S 2E29 ; Pattern_Syntax # Pe RIGHT DOUBLE PARENTHESIS 2E2A..2E2E ; Pattern_Syntax # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK 2E2F ; Pattern_Syntax # Lm VERTICAL TILDE -2E30..2E31 ; Pattern_Syntax # Po [2] RING POINT..WORD SEPARATOR MIDDLE DOT -2E32..2E7F ; Pattern_Syntax # Cn [78] <reserved-2E32>..<reserved-2E7F> +2E30..2E39 ; Pattern_Syntax # Po [10] RING POINT..TOP HALF SECTION SIGN +2E3A..2E3B ; Pattern_Syntax # Pd [2] TWO-EM DASH..THREE-EM DASH +2E3C..2E7F ; Pattern_Syntax # Cn [68] <reserved-2E3C>..<reserved-2E7F> 3001..3003 ; Pattern_Syntax # Po [3] IDEOGRAPHIC COMMA..DITTO MARK 3008 ; Pattern_Syntax # Ps LEFT ANGLE BRACKET 3009 ; Pattern_Syntax # Pe RIGHT ANGLE BRACKET diff --git a/lib/unicore/PropValueAliases.txt b/lib/unicore/PropValueAliases.txt index 819e800e13..2f7bde28ec 100644 --- a/lib/unicore/PropValueAliases.txt +++ b/lib/unicore/PropValueAliases.txt @@ -1,15 +1,14 @@ -# PropertyValueAliases-6.0.0.txt -# Date: 2010-07-17, 22:44:06 GMT [MD] +# PropertyValueAliases-6.1.0.txt +# Date: 2011-12-07, 23:40:57 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # # This file contains aliases for property values used in the UCD. # These names can be used for XML formats of UCD data, for regular-expression # property tests, and other programmatic textual descriptions of Unicode data. -# For information on which properties are normative, see UCD.html. # # The names may be translated in appropriate environments, and additional # aliases may be useful. @@ -23,7 +22,6 @@ # property value name is used. # # Second Field: The second field is an abbreviated name. -# If there is no abbreviated name available, the field is marked with "n/a". # # Third Field: The third field is a long name. # @@ -35,7 +33,7 @@ # Loose matching should be applied to all property names and property values, with # the exception of String Property values. With loose matching of property names and # values, the case distinctions, whitespace, and '_' are ignored. For Numeric Property -# values, numeric equivalences are applied: thus "01.00" is equivalent to "1". +# values, numeric equivalencies are applied: thus "01.00" is equivalent to "1". # # NOTE: Property value names are NOT unique across properties. For example: # @@ -57,61 +55,62 @@ # ASCII_Hex_Digit (AHex) -AHex; N ; No ; F ; False -AHex; Y ; Yes ; T ; True +AHex; N ; No ; F ; False +AHex; Y ; Yes ; T ; True # Age (age) -age; n/a ; 1.1 -age; n/a ; 2.0 -age; n/a ; 2.1 -age; n/a ; 3.0 -age; n/a ; 3.1 -age; n/a ; 3.2 -age; n/a ; 4.0 -age; n/a ; 4.1 -age; n/a ; 5.0 -age; n/a ; 5.1 -age; n/a ; 5.2 -age; n/a ; 6.0 -age; n/a ; unassigned +age; 1.1 ; V1_1 +age; 2.0 ; V2_0 +age; 2.1 ; V2_1 +age; 3.0 ; V3_0 +age; 3.1 ; V3_1 +age; 3.2 ; V3_2 +age; 4.0 ; V4_0 +age; 4.1 ; V4_1 +age; 5.0 ; V5_0 +age; 5.1 ; V5_1 +age; 5.2 ; V5_2 +age; 6.0 ; V6_0 +age; 6.1 ; V6_1 +age; NA ; Unassigned # Alphabetic (Alpha) -Alpha; N ; No ; F ; False -Alpha; Y ; Yes ; T ; True +Alpha; N ; No ; F ; False +Alpha; Y ; Yes ; T ; True # Bidi_Class (bc) -bc ; AL ; Arabic_Letter -bc ; AN ; Arabic_Number -bc ; B ; Paragraph_Separator -bc ; BN ; Boundary_Neutral -bc ; CS ; Common_Separator -bc ; EN ; European_Number -bc ; ES ; European_Separator -bc ; ET ; European_Terminator -bc ; L ; Left_To_Right -bc ; LRE ; Left_To_Right_Embedding -bc ; LRO ; Left_To_Right_Override -bc ; NSM ; Nonspacing_Mark -bc ; ON ; Other_Neutral -bc ; PDF ; Pop_Directional_Format -bc ; R ; Right_To_Left -bc ; RLE ; Right_To_Left_Embedding -bc ; RLO ; Right_To_Left_Override -bc ; S ; Segment_Separator -bc ; WS ; White_Space +bc ; AL ; Arabic_Letter +bc ; AN ; Arabic_Number +bc ; B ; Paragraph_Separator +bc ; BN ; Boundary_Neutral +bc ; CS ; Common_Separator +bc ; EN ; European_Number +bc ; ES ; European_Separator +bc ; ET ; European_Terminator +bc ; L ; Left_To_Right +bc ; LRE ; Left_To_Right_Embedding +bc ; LRO ; Left_To_Right_Override +bc ; NSM ; Nonspacing_Mark +bc ; ON ; Other_Neutral +bc ; PDF ; Pop_Directional_Format +bc ; R ; Right_To_Left +bc ; RLE ; Right_To_Left_Embedding +bc ; RLO ; Right_To_Left_Override +bc ; S ; Segment_Separator +bc ; WS ; White_Space # Bidi_Control (Bidi_C) -Bidi_C; N ; No ; F ; False -Bidi_C; Y ; Yes ; T ; True +Bidi_C; N ; No ; F ; False +Bidi_C; Y ; Yes ; T ; True # Bidi_Mirrored (Bidi_M) -Bidi_M; N ; No ; F ; False -Bidi_M; Y ; Yes ; T ; True +Bidi_M; N ; No ; F ; False +Bidi_M; Y ; Yes ; T ; True # Bidi_Mirroring_Glyph (bmg) @@ -119,239 +118,286 @@ Bidi_M; Y ; Yes ; T # Block (blk) -blk; n/a ; Aegean_Numbers -blk; n/a ; Alchemical_Symbols -blk; n/a ; Alphabetic_Presentation_Forms -blk; n/a ; Ancient_Greek_Musical_Notation -blk; n/a ; Ancient_Greek_Numbers -blk; n/a ; Ancient_Symbols -blk; n/a ; Arabic -blk; n/a ; Arabic_Presentation_Forms_A ; Arabic_Presentation_Forms-A -blk; n/a ; Arabic_Presentation_Forms_B -blk; n/a ; Arabic_Supplement -blk; n/a ; Armenian -blk; n/a ; Arrows -blk; n/a ; Avestan -blk; n/a ; Balinese -blk; n/a ; Bamum -blk; n/a ; Bamum_Supplement -blk; n/a ; Basic_Latin ; ASCII -blk; n/a ; Batak -blk; n/a ; Bengali -blk; n/a ; Block_Elements -blk; n/a ; Bopomofo -blk; n/a ; Bopomofo_Extended -blk; n/a ; Box_Drawing -blk; n/a ; Brahmi -blk; n/a ; Braille_Patterns -blk; n/a ; Buginese -blk; n/a ; Buhid -blk; n/a ; Byzantine_Musical_Symbols -blk; n/a ; Carian -blk; n/a ; Cham -blk; n/a ; Cherokee -blk; n/a ; CJK_Compatibility -blk; n/a ; CJK_Compatibility_Forms -blk; n/a ; CJK_Compatibility_Ideographs -blk; n/a ; CJK_Compatibility_Ideographs_Supplement -blk; n/a ; CJK_Radicals_Supplement -blk; n/a ; CJK_Strokes -blk; n/a ; CJK_Symbols_And_Punctuation -blk; n/a ; CJK_Unified_Ideographs -blk; n/a ; CJK_Unified_Ideographs_Extension_A -blk; n/a ; CJK_Unified_Ideographs_Extension_B -blk; n/a ; CJK_Unified_Ideographs_Extension_C -blk; n/a ; CJK_Unified_Ideographs_Extension_D -blk; n/a ; Combining_Diacritical_Marks -blk; n/a ; Combining_Diacritical_Marks_For_Symbols; Combining_Marks_For_Symbols -blk; n/a ; Combining_Diacritical_Marks_Supplement -blk; n/a ; Combining_Half_Marks -blk; n/a ; Common_Indic_Number_Forms -blk; n/a ; Control_Pictures -blk; n/a ; Coptic -blk; n/a ; Counting_Rod_Numerals -blk; n/a ; Cuneiform -blk; n/a ; Cuneiform_Numbers_And_Punctuation -blk; n/a ; Currency_Symbols -blk; n/a ; Cypriot_Syllabary -blk; n/a ; Cyrillic -blk; n/a ; Cyrillic_Extended_A -blk; n/a ; Cyrillic_Extended_B -blk; n/a ; Cyrillic_Supplement ; Cyrillic_Supplementary -blk; n/a ; Deseret -blk; n/a ; Devanagari -blk; n/a ; Devanagari_Extended -blk; n/a ; Dingbats -blk; n/a ; Domino_Tiles -blk; n/a ; Egyptian_Hieroglyphs -blk; n/a ; Emoticons -blk; n/a ; Enclosed_Alphanumeric_Supplement -blk; n/a ; Enclosed_Alphanumerics -blk; n/a ; Enclosed_CJK_Letters_And_Months -blk; n/a ; Enclosed_Ideographic_Supplement -blk; n/a ; Ethiopic -blk; n/a ; Ethiopic_Extended -blk; n/a ; Ethiopic_Extended_A -blk; n/a ; Ethiopic_Supplement -blk; n/a ; General_Punctuation -blk; n/a ; Geometric_Shapes -blk; n/a ; Georgian -blk; n/a ; Georgian_Supplement -blk; n/a ; Glagolitic -blk; n/a ; Gothic -blk; n/a ; Greek_And_Coptic ; Greek -blk; n/a ; Greek_Extended -blk; n/a ; Gujarati -blk; n/a ; Gurmukhi -blk; n/a ; Halfwidth_And_Fullwidth_Forms -blk; n/a ; Hangul_Compatibility_Jamo -blk; n/a ; Hangul_Jamo -blk; n/a ; Hangul_Jamo_Extended_A -blk; n/a ; Hangul_Jamo_Extended_B -blk; n/a ; Hangul_Syllables -blk; n/a ; Hanunoo -blk; n/a ; Hebrew -blk; n/a ; High_Private_Use_Surrogates -blk; n/a ; High_Surrogates -blk; n/a ; Hiragana -blk; n/a ; Ideographic_Description_Characters -blk; n/a ; Imperial_Aramaic -blk; n/a ; Inscriptional_Pahlavi -blk; n/a ; Inscriptional_Parthian -blk; n/a ; IPA_Extensions -blk; n/a ; Javanese -blk; n/a ; Kaithi -blk; n/a ; Kana_Supplement -blk; n/a ; Kanbun -blk; n/a ; Kangxi_Radicals -blk; n/a ; Kannada -blk; n/a ; Katakana -blk; n/a ; Katakana_Phonetic_Extensions -blk; n/a ; Kayah_Li -blk; n/a ; Kharoshthi -blk; n/a ; Khmer -blk; n/a ; Khmer_Symbols -blk; n/a ; Lao -blk; n/a ; Latin_1_Supplement ; Latin_1 -blk; n/a ; Latin_Extended_A -blk; n/a ; Latin_Extended_Additional -blk; n/a ; Latin_Extended_B -blk; n/a ; Latin_Extended_C -blk; n/a ; Latin_Extended_D -blk; n/a ; Lepcha -blk; n/a ; Letterlike_Symbols -blk; n/a ; Limbu -blk; n/a ; Linear_B_Ideograms -blk; n/a ; Linear_B_Syllabary -blk; n/a ; Lisu -blk; n/a ; Low_Surrogates -blk; n/a ; Lycian -blk; n/a ; Lydian -blk; n/a ; Mahjong_Tiles -blk; n/a ; Malayalam -blk; n/a ; Mandaic -blk; n/a ; Mathematical_Alphanumeric_Symbols -blk; n/a ; Mathematical_Operators -blk; n/a ; Meetei_Mayek -blk; n/a ; Miscellaneous_Mathematical_Symbols_A -blk; n/a ; Miscellaneous_Mathematical_Symbols_B -blk; n/a ; Miscellaneous_Symbols -blk; n/a ; Miscellaneous_Symbols_And_Arrows -blk; n/a ; Miscellaneous_Symbols_And_Pictographs -blk; n/a ; Miscellaneous_Technical -blk; n/a ; Modifier_Tone_Letters -blk; n/a ; Mongolian -blk; n/a ; Musical_Symbols -blk; n/a ; Myanmar -blk; n/a ; Myanmar_Extended_A -blk; n/a ; New_Tai_Lue -blk; n/a ; NKo -blk; n/a ; No_Block -blk; n/a ; Number_Forms -blk; n/a ; Ogham -blk; n/a ; Ol_Chiki -blk; n/a ; Old_Italic -blk; n/a ; Old_Persian -blk; n/a ; Old_South_Arabian -blk; n/a ; Old_Turkic -blk; n/a ; Optical_Character_Recognition -blk; n/a ; Oriya -blk; n/a ; Osmanya -blk; n/a ; Phags_Pa -blk; n/a ; Phaistos_Disc -blk; n/a ; Phoenician -blk; n/a ; Phonetic_Extensions -blk; n/a ; Phonetic_Extensions_Supplement -blk; n/a ; Playing_Cards -blk; n/a ; Private_Use_Area ; Private_Use -blk; n/a ; Rejang -blk; n/a ; Rumi_Numeral_Symbols -blk; n/a ; Runic -blk; n/a ; Samaritan -blk; n/a ; Saurashtra -blk; n/a ; Shavian -blk; n/a ; Sinhala -blk; n/a ; Small_Form_Variants -blk; n/a ; Spacing_Modifier_Letters -blk; n/a ; Specials -blk; n/a ; Sundanese -blk; n/a ; Superscripts_And_Subscripts -blk; n/a ; Supplemental_Arrows_A -blk; n/a ; Supplemental_Arrows_B -blk; n/a ; Supplemental_Mathematical_Operators -blk; n/a ; Supplemental_Punctuation -blk; n/a ; Supplementary_Private_Use_Area_A -blk; n/a ; Supplementary_Private_Use_Area_B -blk; n/a ; Syloti_Nagri -blk; n/a ; Syriac -blk; n/a ; Tagalog -blk; n/a ; Tagbanwa -blk; n/a ; Tags -blk; n/a ; Tai_Le -blk; n/a ; Tai_Tham -blk; n/a ; Tai_Viet -blk; n/a ; Tai_Xuan_Jing_Symbols -blk; n/a ; Tamil -blk; n/a ; Telugu -blk; n/a ; Thaana -blk; n/a ; Thai -blk; n/a ; Tibetan -blk; n/a ; Tifinagh -blk; n/a ; Transport_And_Map_Symbols -blk; n/a ; Ugaritic -blk; n/a ; Unified_Canadian_Aboriginal_Syllabics; Canadian_Syllabics -blk; n/a ; Unified_Canadian_Aboriginal_Syllabics_Extended -blk; n/a ; Vai -blk; n/a ; Variation_Selectors -blk; n/a ; Variation_Selectors_Supplement -blk; n/a ; Vedic_Extensions -blk; n/a ; Vertical_Forms -blk; n/a ; Yi_Radicals -blk; n/a ; Yi_Syllables -blk; n/a ; Yijing_Hexagram_Symbols +blk; Aegean_Numbers ; Aegean_Numbers +blk; Alchemical ; Alchemical_Symbols +blk; Alphabetic_PF ; Alphabetic_Presentation_Forms +blk; Ancient_Greek_Music ; Ancient_Greek_Musical_Notation +blk; Ancient_Greek_Numbers ; Ancient_Greek_Numbers +blk; Ancient_Symbols ; Ancient_Symbols +blk; Arabic ; Arabic +blk; Arabic_Ext_A ; Arabic_Extended_A +blk; Arabic_Math ; Arabic_Mathematical_Alphabetic_Symbols +blk; Arabic_PF_A ; Arabic_Presentation_Forms_A ; Arabic_Presentation_Forms-A +blk; Arabic_PF_B ; Arabic_Presentation_Forms_B +blk; Arabic_Sup ; Arabic_Supplement +blk; Armenian ; Armenian +blk; Arrows ; Arrows +blk; ASCII ; Basic_Latin +blk; Avestan ; Avestan +blk; Balinese ; Balinese +blk; Bamum ; Bamum +blk; Bamum_Sup ; Bamum_Supplement +blk; Batak ; Batak +blk; Bengali ; Bengali +blk; Block_Elements ; Block_Elements +blk; Bopomofo ; Bopomofo +blk; Bopomofo_Ext ; Bopomofo_Extended +blk; Box_Drawing ; Box_Drawing +blk; Brahmi ; Brahmi +blk; Braille ; Braille_Patterns +blk; Buginese ; Buginese +blk; Buhid ; Buhid +blk; Byzantine_Music ; Byzantine_Musical_Symbols +blk; Carian ; Carian +blk; Chakma ; Chakma +blk; Cham ; Cham +blk; Cherokee ; Cherokee +blk; CJK ; CJK_Unified_Ideographs +blk; CJK_Compat ; CJK_Compatibility +blk; CJK_Compat_Forms ; CJK_Compatibility_Forms +blk; CJK_Compat_Ideographs ; CJK_Compatibility_Ideographs +blk; CJK_Compat_Ideographs_Sup ; CJK_Compatibility_Ideographs_Supplement +blk; CJK_Ext_A ; CJK_Unified_Ideographs_Extension_A +blk; CJK_Ext_B ; CJK_Unified_Ideographs_Extension_B +blk; CJK_Ext_C ; CJK_Unified_Ideographs_Extension_C +blk; CJK_Ext_D ; CJK_Unified_Ideographs_Extension_D +blk; CJK_Radicals_Sup ; CJK_Radicals_Supplement +blk; CJK_Strokes ; CJK_Strokes +blk; CJK_Symbols ; CJK_Symbols_And_Punctuation +blk; Compat_Jamo ; Hangul_Compatibility_Jamo +blk; Control_Pictures ; Control_Pictures +blk; Coptic ; Coptic +blk; Counting_Rod ; Counting_Rod_Numerals +blk; Cuneiform ; Cuneiform +blk; Cuneiform_Numbers ; Cuneiform_Numbers_And_Punctuation +blk; Currency_Symbols ; Currency_Symbols +blk; Cypriot_Syllabary ; Cypriot_Syllabary +blk; Cyrillic ; Cyrillic +blk; Cyrillic_Ext_A ; Cyrillic_Extended_A +blk; Cyrillic_Ext_B ; Cyrillic_Extended_B +blk; Cyrillic_Sup ; Cyrillic_Supplement ; Cyrillic_Supplementary +blk; Deseret ; Deseret +blk; Devanagari ; Devanagari +blk; Devanagari_Ext ; Devanagari_Extended +blk; Diacriticals ; Combining_Diacritical_Marks +blk; Diacriticals_For_Symbols ; Combining_Diacritical_Marks_For_Symbols; Combining_Marks_For_Symbols +blk; Diacriticals_Sup ; Combining_Diacritical_Marks_Supplement +blk; Dingbats ; Dingbats +blk; Domino ; Domino_Tiles +blk; Egyptian_Hieroglyphs ; Egyptian_Hieroglyphs +blk; Emoticons ; Emoticons +blk; Enclosed_Alphanum ; Enclosed_Alphanumerics +blk; Enclosed_Alphanum_Sup ; Enclosed_Alphanumeric_Supplement +blk; Enclosed_CJK ; Enclosed_CJK_Letters_And_Months +blk; Enclosed_Ideographic_Sup ; Enclosed_Ideographic_Supplement +blk; Ethiopic ; Ethiopic +blk; Ethiopic_Ext ; Ethiopic_Extended +blk; Ethiopic_Ext_A ; Ethiopic_Extended_A +blk; Ethiopic_Sup ; Ethiopic_Supplement +blk; Geometric_Shapes ; Geometric_Shapes +blk; Georgian ; Georgian +blk; Georgian_Sup ; Georgian_Supplement +blk; Glagolitic ; Glagolitic +blk; Gothic ; Gothic +blk; Greek ; Greek_And_Coptic +blk; Greek_Ext ; Greek_Extended +blk; Gujarati ; Gujarati +blk; Gurmukhi ; Gurmukhi +blk; Half_And_Full_Forms ; Halfwidth_And_Fullwidth_Forms +blk; Half_Marks ; Combining_Half_Marks +blk; Hangul ; Hangul_Syllables +blk; Hanunoo ; Hanunoo +blk; Hebrew ; Hebrew +blk; High_PU_Surrogates ; High_Private_Use_Surrogates +blk; High_Surrogates ; High_Surrogates +blk; Hiragana ; Hiragana +blk; IDC ; Ideographic_Description_Characters +blk; Imperial_Aramaic ; Imperial_Aramaic +blk; Indic_Number_Forms ; Common_Indic_Number_Forms +blk; Inscriptional_Pahlavi ; Inscriptional_Pahlavi +blk; Inscriptional_Parthian ; Inscriptional_Parthian +blk; IPA_Ext ; IPA_Extensions +blk; Jamo ; Hangul_Jamo +blk; Jamo_Ext_A ; Hangul_Jamo_Extended_A +blk; Jamo_Ext_B ; Hangul_Jamo_Extended_B +blk; Javanese ; Javanese +blk; Kaithi ; Kaithi +blk; Kana_Sup ; Kana_Supplement +blk; Kanbun ; Kanbun +blk; Kangxi ; Kangxi_Radicals +blk; Kannada ; Kannada +blk; Katakana ; Katakana +blk; Katakana_Ext ; Katakana_Phonetic_Extensions +blk; Kayah_Li ; Kayah_Li +blk; Kharoshthi ; Kharoshthi +blk; Khmer ; Khmer +blk; Khmer_Symbols ; Khmer_Symbols +blk; Lao ; Lao +blk; Latin_1_Sup ; Latin_1_Supplement ; Latin_1 +blk; Latin_Ext_A ; Latin_Extended_A +blk; Latin_Ext_Additional ; Latin_Extended_Additional +blk; Latin_Ext_B ; Latin_Extended_B +blk; Latin_Ext_C ; Latin_Extended_C +blk; Latin_Ext_D ; Latin_Extended_D +blk; Lepcha ; Lepcha +blk; Letterlike_Symbols ; Letterlike_Symbols +blk; Limbu ; Limbu +blk; Linear_B_Ideograms ; Linear_B_Ideograms +blk; Linear_B_Syllabary ; Linear_B_Syllabary +blk; Lisu ; Lisu +blk; Low_Surrogates ; Low_Surrogates +blk; Lycian ; Lycian +blk; Lydian ; Lydian +blk; Mahjong ; Mahjong_Tiles +blk; Malayalam ; Malayalam +blk; Mandaic ; Mandaic +blk; Math_Alphanum ; Mathematical_Alphanumeric_Symbols +blk; Math_Operators ; Mathematical_Operators +blk; Meetei_Mayek ; Meetei_Mayek +blk; Meetei_Mayek_Ext ; Meetei_Mayek_Extensions +blk; Meroitic_Cursive ; Meroitic_Cursive +blk; Meroitic_Hieroglyphs ; Meroitic_Hieroglyphs +blk; Miao ; Miao +blk; Misc_Arrows ; Miscellaneous_Symbols_And_Arrows +blk; Misc_Math_Symbols_A ; Miscellaneous_Mathematical_Symbols_A +blk; Misc_Math_Symbols_B ; Miscellaneous_Mathematical_Symbols_B +blk; Misc_Pictographs ; Miscellaneous_Symbols_And_Pictographs +blk; Misc_Symbols ; Miscellaneous_Symbols +blk; Misc_Technical ; Miscellaneous_Technical +blk; Modifier_Letters ; Spacing_Modifier_Letters +blk; Modifier_Tone_Letters ; Modifier_Tone_Letters +blk; Mongolian ; Mongolian +blk; Music ; Musical_Symbols +blk; Myanmar ; Myanmar +blk; Myanmar_Ext_A ; Myanmar_Extended_A +blk; NB ; No_Block +blk; New_Tai_Lue ; New_Tai_Lue +blk; NKo ; NKo +blk; Number_Forms ; Number_Forms +blk; OCR ; Optical_Character_Recognition +blk; Ogham ; Ogham +blk; Ol_Chiki ; Ol_Chiki +blk; Old_Italic ; Old_Italic +blk; Old_Persian ; Old_Persian +blk; Old_South_Arabian ; Old_South_Arabian +blk; Old_Turkic ; Old_Turkic +blk; Oriya ; Oriya +blk; Osmanya ; Osmanya +blk; Phags_Pa ; Phags_Pa +blk; Phaistos ; Phaistos_Disc +blk; Phoenician ; Phoenician +blk; Phonetic_Ext ; Phonetic_Extensions +blk; Phonetic_Ext_Sup ; Phonetic_Extensions_Supplement +blk; Playing_Cards ; Playing_Cards +blk; PUA ; Private_Use_Area ; Private_Use +blk; Punctuation ; General_Punctuation +blk; Rejang ; Rejang +blk; Rumi ; Rumi_Numeral_Symbols +blk; Runic ; Runic +blk; Samaritan ; Samaritan +blk; Saurashtra ; Saurashtra +blk; Sharada ; Sharada +blk; Shavian ; Shavian +blk; Sinhala ; Sinhala +blk; Small_Forms ; Small_Form_Variants +blk; Sora_Sompeng ; Sora_Sompeng +blk; Specials ; Specials +blk; Sundanese ; Sundanese +blk; Sundanese_Sup ; Sundanese_Supplement +blk; Sup_Arrows_A ; Supplemental_Arrows_A +blk; Sup_Arrows_B ; Supplemental_Arrows_B +blk; Sup_Math_Operators ; Supplemental_Mathematical_Operators +blk; Sup_PUA_A ; Supplementary_Private_Use_Area_A +blk; Sup_PUA_B ; Supplementary_Private_Use_Area_B +blk; Sup_Punctuation ; Supplemental_Punctuation +blk; Super_And_Sub ; Superscripts_And_Subscripts +blk; Syloti_Nagri ; Syloti_Nagri +blk; Syriac ; Syriac +blk; Tagalog ; Tagalog +blk; Tagbanwa ; Tagbanwa +blk; Tags ; Tags +blk; Tai_Le ; Tai_Le +blk; Tai_Tham ; Tai_Tham +blk; Tai_Viet ; Tai_Viet +blk; Tai_Xuan_Jing ; Tai_Xuan_Jing_Symbols +blk; Takri ; Takri +blk; Tamil ; Tamil +blk; Telugu ; Telugu +blk; Thaana ; Thaana +blk; Thai ; Thai +blk; Tibetan ; Tibetan +blk; Tifinagh ; Tifinagh +blk; Transport_And_Map ; Transport_And_Map_Symbols +blk; UCAS ; Unified_Canadian_Aboriginal_Syllabics; Canadian_Syllabics +blk; UCAS_Ext ; Unified_Canadian_Aboriginal_Syllabics_Extended +blk; Ugaritic ; Ugaritic +blk; Vai ; Vai +blk; Vedic_Ext ; Vedic_Extensions +blk; Vertical_Forms ; Vertical_Forms +blk; VS ; Variation_Selectors +blk; VS_Sup ; Variation_Selectors_Supplement +blk; Yi_Radicals ; Yi_Radicals +blk; Yi_Syllables ; Yi_Syllables +blk; Yijing ; Yijing_Hexagram_Symbols # Canonical_Combining_Class (ccc) -ccc; 0; NR ; Not_Reordered -ccc; 1; OV ; Overlay -ccc; 7; NK ; Nukta -ccc; 8; KV ; Kana_Voicing -ccc; 9; VR ; Virama -ccc; 200; ATBL ; Attached_Below_Left -ccc; 202; ATB ; Attached_Below -ccc; 214; ATA ; Attached_Above -ccc; 216; ATAR ; Attached_Above_Right -ccc; 218; BL ; Below_Left -ccc; 220; B ; Below -ccc; 222; BR ; Below_Right -ccc; 224; L ; Left -ccc; 226; R ; Right -ccc; 228; AL ; Above_Left -ccc; 230; A ; Above -ccc; 232; AR ; Above_Right -ccc; 233; DB ; Double_Below -ccc; 234; DA ; Double_Above -ccc; 240; IS ; Iota_Subscript +ccc; 0; NR ; Not_Reordered +ccc; 1; OV ; Overlay +ccc; 7; NK ; Nukta +ccc; 8; KV ; Kana_Voicing +ccc; 9; VR ; Virama +ccc; 10; CCC10 ; CCC10 +ccc; 11; CCC11 ; CCC11 +ccc; 12; CCC12 ; CCC12 +ccc; 13; CCC13 ; CCC13 +ccc; 14; CCC14 ; CCC14 +ccc; 15; CCC15 ; CCC15 +ccc; 16; CCC16 ; CCC16 +ccc; 17; CCC17 ; CCC17 +ccc; 18; CCC18 ; CCC18 +ccc; 19; CCC19 ; CCC19 +ccc; 20; CCC20 ; CCC20 +ccc; 21; CCC21 ; CCC21 +ccc; 22; CCC22 ; CCC22 +ccc; 23; CCC23 ; CCC23 +ccc; 24; CCC24 ; CCC24 +ccc; 25; CCC25 ; CCC25 +ccc; 26; CCC26 ; CCC26 +ccc; 27; CCC27 ; CCC27 +ccc; 28; CCC28 ; CCC28 +ccc; 29; CCC29 ; CCC29 +ccc; 30; CCC30 ; CCC30 +ccc; 31; CCC31 ; CCC31 +ccc; 32; CCC32 ; CCC32 +ccc; 33; CCC33 ; CCC33 +ccc; 34; CCC34 ; CCC34 +ccc; 35; CCC35 ; CCC35 +ccc; 36; CCC36 ; CCC36 +ccc; 84; CCC84 ; CCC84 +ccc; 91; CCC91 ; CCC91 +ccc; 103; CCC103 ; CCC103 +ccc; 107; CCC107 ; CCC107 +ccc; 118; CCC118 ; CCC118 +ccc; 122; CCC122 ; CCC122 +ccc; 129; CCC129 ; CCC129 +ccc; 130; CCC130 ; CCC130 +ccc; 132; CCC133 ; CCC133 +ccc; 200; ATBL ; Attached_Below_Left +ccc; 202; ATB ; Attached_Below +ccc; 214; ATA ; Attached_Above +ccc; 216; ATAR ; Attached_Above_Right +ccc; 218; BL ; Below_Left +ccc; 220; B ; Below +ccc; 222; BR ; Below_Right +ccc; 224; L ; Left +ccc; 226; R ; Right +ccc; 228; AL ; Above_Left +ccc; 230; A ; Above +ccc; 232; AR ; Above_Right +ccc; 233; DB ; Double_Below +ccc; 234; DA ; Double_Above +ccc; 240; IS ; Iota_Subscript # Case_Folding (cf) @@ -359,53 +405,53 @@ ccc; 240; IS ; Iota_Subscript # Case_Ignorable (CI) -CI ; N ; No ; F ; False -CI ; Y ; Yes ; T ; True +CI ; N ; No ; F ; False +CI ; Y ; Yes ; T ; True # Cased (Cased) -Cased; N ; No ; F ; False -Cased; Y ; Yes ; T ; True +Cased; N ; No ; F ; False +Cased; Y ; Yes ; T ; True # Changes_When_Casefolded (CWCF) -CWCF; N ; No ; F ; False -CWCF; Y ; Yes ; T ; True +CWCF; N ; No ; F ; False +CWCF; Y ; Yes ; T ; True # Changes_When_Casemapped (CWCM) -CWCM; N ; No ; F ; False -CWCM; Y ; Yes ; T ; True +CWCM; N ; No ; F ; False +CWCM; Y ; Yes ; T ; True # Changes_When_Lowercased (CWL) -CWL; N ; No ; F ; False -CWL; Y ; Yes ; T ; True +CWL; N ; No ; F ; False +CWL; Y ; Yes ; T ; True # Changes_When_NFKC_Casefolded (CWKCF) -CWKCF; N ; No ; F ; False -CWKCF; Y ; Yes ; T ; True +CWKCF; N ; No ; F ; False +CWKCF; Y ; Yes ; T ; True # Changes_When_Titlecased (CWT) -CWT; N ; No ; F ; False -CWT; Y ; Yes ; T ; True +CWT; N ; No ; F ; False +CWT; Y ; Yes ; T ; True # Changes_When_Uppercased (CWU) -CWU; N ; No ; F ; False -CWU; Y ; Yes ; T ; True +CWU; N ; No ; F ; False +CWU; Y ; Yes ; T ; True # Composition_Exclusion (CE) -CE ; N ; No ; F ; False -CE ; Y ; Yes ; T ; True +CE ; N ; No ; F ; False +CE ; Y ; Yes ; T ; True # Dash (Dash) -Dash; N ; No ; F ; False -Dash; Y ; Yes ; T ; True +Dash; N ; No ; F ; False +Dash; Y ; Yes ; T ; True # Decomposition_Mapping (dm) @@ -413,73 +459,73 @@ Dash; Y ; Yes ; T # Decomposition_Type (dt) -dt ; Can ; Canonical ; can -dt ; Com ; Compat ; com -dt ; Enc ; Circle ; enc -dt ; Fin ; Final ; fin -dt ; Font ; font -dt ; Fra ; Fraction ; fra -dt ; Init ; Initial ; init -dt ; Iso ; Isolated ; iso -dt ; Med ; Medial ; med -dt ; Nar ; Narrow ; nar -dt ; Nb ; Nobreak ; nb -dt ; None ; none -dt ; Sml ; Small ; sml -dt ; Sqr ; Square ; sqr -dt ; Sub ; sub -dt ; Sup ; Super ; sup -dt ; Vert ; Vertical ; vert -dt ; Wide ; wide +dt ; Can ; Canonical ; can +dt ; Com ; Compat ; com +dt ; Enc ; Circle ; enc +dt ; Fin ; Final ; fin +dt ; Font ; Font ; font +dt ; Fra ; Fraction ; fra +dt ; Init ; Initial ; init +dt ; Iso ; Isolated ; iso +dt ; Med ; Medial ; med +dt ; Nar ; Narrow ; nar +dt ; Nb ; Nobreak ; nb +dt ; None ; None ; none +dt ; Sml ; Small ; sml +dt ; Sqr ; Square ; sqr +dt ; Sub ; Sub ; sub +dt ; Sup ; Super ; sup +dt ; Vert ; Vertical ; vert +dt ; Wide ; Wide ; wide # Default_Ignorable_Code_Point (DI) -DI ; N ; No ; F ; False -DI ; Y ; Yes ; T ; True +DI ; N ; No ; F ; False +DI ; Y ; Yes ; T ; True # Deprecated (Dep) -Dep; N ; No ; F ; False -Dep; Y ; Yes ; T ; True +Dep; N ; No ; F ; False +Dep; Y ; Yes ; T ; True # Diacritic (Dia) -Dia; N ; No ; F ; False -Dia; Y ; Yes ; T ; True +Dia; N ; No ; F ; False +Dia; Y ; Yes ; T ; True # East_Asian_Width (ea) -ea ; A ; Ambiguous -ea ; F ; Fullwidth -ea ; H ; Halfwidth -ea ; N ; Neutral -ea ; Na ; Narrow -ea ; W ; Wide +ea ; A ; Ambiguous +ea ; F ; Fullwidth +ea ; H ; Halfwidth +ea ; N ; Neutral +ea ; Na ; Narrow +ea ; W ; Wide # Expands_On_NFC (XO_NFC) -XO_NFC; N ; No ; F ; False -XO_NFC; Y ; Yes ; T ; True +XO_NFC; N ; No ; F ; False +XO_NFC; Y ; Yes ; T ; True # Expands_On_NFD (XO_NFD) -XO_NFD; N ; No ; F ; False -XO_NFD; Y ; Yes ; T ; True +XO_NFD; N ; No ; F ; False +XO_NFD; Y ; Yes ; T ; True # Expands_On_NFKC (XO_NFKC) -XO_NFKC; N ; No ; F ; False -XO_NFKC; Y ; Yes ; T ; True +XO_NFKC; N ; No ; F ; False +XO_NFKC; Y ; Yes ; T ; True # Expands_On_NFKD (XO_NFKD) -XO_NFKD; N ; No ; F ; False -XO_NFKD; Y ; Yes ; T ; True +XO_NFKD; N ; No ; F ; False +XO_NFKD; Y ; Yes ; T ; True # Extender (Ext) -Ext; N ; No ; F ; False -Ext; Y ; Yes ; T ; True +Ext; N ; No ; F ; False +Ext; Y ; Yes ; T ; True # FC_NFKC_Closure (FC_NFKC) @@ -487,118 +533,118 @@ Ext; Y ; Yes ; T # Full_Composition_Exclusion (Comp_Ex) -Comp_Ex; N ; No ; F ; False -Comp_Ex; Y ; Yes ; T ; True +Comp_Ex; N ; No ; F ; False +Comp_Ex; Y ; Yes ; T ; True # General_Category (gc) -gc ; C ; Other # Cc | Cf | Cn | Co | Cs -gc ; Cc ; Control ; cntrl -gc ; Cf ; Format -gc ; Cn ; Unassigned -gc ; Co ; Private_Use -gc ; Cs ; Surrogate -gc ; L ; Letter # Ll | Lm | Lo | Lt | Lu -gc ; LC ; Cased_Letter # Ll | Lt | Lu -gc ; Ll ; Lowercase_Letter -gc ; Lm ; Modifier_Letter -gc ; Lo ; Other_Letter -gc ; Lt ; Titlecase_Letter -gc ; Lu ; Uppercase_Letter -gc ; M ; Mark # Mc | Me | Mn -gc ; Mc ; Spacing_Mark -gc ; Me ; Enclosing_Mark -gc ; Mn ; Nonspacing_Mark -gc ; N ; Number # Nd | Nl | No -gc ; Nd ; Decimal_Number ; digit -gc ; Nl ; Letter_Number -gc ; No ; Other_Number -gc ; P ; Punctuation ; punct # Pc | Pd | Pe | Pf | Pi | Po | Ps -gc ; Pc ; Connector_Punctuation -gc ; Pd ; Dash_Punctuation -gc ; Pe ; Close_Punctuation -gc ; Pf ; Final_Punctuation -gc ; Pi ; Initial_Punctuation -gc ; Po ; Other_Punctuation -gc ; Ps ; Open_Punctuation -gc ; S ; Symbol # Sc | Sk | Sm | So -gc ; Sc ; Currency_Symbol -gc ; Sk ; Modifier_Symbol -gc ; Sm ; Math_Symbol -gc ; So ; Other_Symbol -gc ; Z ; Separator # Zl | Zp | Zs -gc ; Zl ; Line_Separator -gc ; Zp ; Paragraph_Separator -gc ; Zs ; Space_Separator +gc ; C ; Other # Cc | Cf | Cn | Co | Cs +gc ; Cc ; Control ; cntrl +gc ; Cf ; Format +gc ; Cn ; Unassigned +gc ; Co ; Private_Use +gc ; Cs ; Surrogate +gc ; L ; Letter # Ll | Lm | Lo | Lt | Lu +gc ; LC ; Cased_Letter # Ll | Lt | Lu +gc ; Ll ; Lowercase_Letter +gc ; Lm ; Modifier_Letter +gc ; Lo ; Other_Letter +gc ; Lt ; Titlecase_Letter +gc ; Lu ; Uppercase_Letter +gc ; M ; Mark ; Combining_Mark # Mc | Me | Mn +gc ; Mc ; Spacing_Mark +gc ; Me ; Enclosing_Mark +gc ; Mn ; Nonspacing_Mark +gc ; N ; Number # Nd | Nl | No +gc ; Nd ; Decimal_Number ; digit +gc ; Nl ; Letter_Number +gc ; No ; Other_Number +gc ; P ; Punctuation ; punct # Pc | Pd | Pe | Pf | Pi | Po | Ps +gc ; Pc ; Connector_Punctuation +gc ; Pd ; Dash_Punctuation +gc ; Pe ; Close_Punctuation +gc ; Pf ; Final_Punctuation +gc ; Pi ; Initial_Punctuation +gc ; Po ; Other_Punctuation +gc ; Ps ; Open_Punctuation +gc ; S ; Symbol # Sc | Sk | Sm | So +gc ; Sc ; Currency_Symbol +gc ; Sk ; Modifier_Symbol +gc ; Sm ; Math_Symbol +gc ; So ; Other_Symbol +gc ; Z ; Separator # Zl | Zp | Zs +gc ; Zl ; Line_Separator +gc ; Zp ; Paragraph_Separator +gc ; Zs ; Space_Separator # Grapheme_Base (Gr_Base) -Gr_Base; N ; No ; F ; False -Gr_Base; Y ; Yes ; T ; True +Gr_Base; N ; No ; F ; False +Gr_Base; Y ; Yes ; T ; True # Grapheme_Cluster_Break (GCB) -GCB; CN ; Control -GCB; CR ; CR -GCB; EX ; Extend -GCB; L ; L -GCB; LF ; LF -GCB; LV ; LV -GCB; LVT ; LVT -GCB; PP ; Prepend -GCB; SM ; SpacingMark -GCB; T ; T -GCB; V ; V -GCB; XX ; Other +GCB; CN ; Control +GCB; CR ; CR +GCB; EX ; Extend +GCB; L ; L +GCB; LF ; LF +GCB; LV ; LV +GCB; LVT ; LVT +GCB; PP ; Prepend +GCB; SM ; SpacingMark +GCB; T ; T +GCB; V ; V +GCB; XX ; Other # Grapheme_Extend (Gr_Ext) -Gr_Ext; N ; No ; F ; False -Gr_Ext; Y ; Yes ; T ; True +Gr_Ext; N ; No ; F ; False +Gr_Ext; Y ; Yes ; T ; True # Grapheme_Link (Gr_Link) -Gr_Link; N ; No ; F ; False -Gr_Link; Y ; Yes ; T ; True +Gr_Link; N ; No ; F ; False +Gr_Link; Y ; Yes ; T ; True # Hangul_Syllable_Type (hst) -hst; L ; Leading_Jamo -hst; LV ; LV_Syllable -hst; LVT ; LVT_Syllable -hst; NA ; Not_Applicable -hst; T ; Trailing_Jamo -hst; V ; Vowel_Jamo +hst; L ; Leading_Jamo +hst; LV ; LV_Syllable +hst; LVT ; LVT_Syllable +hst; NA ; Not_Applicable +hst; T ; Trailing_Jamo +hst; V ; Vowel_Jamo # Hex_Digit (Hex) -Hex; N ; No ; F ; False -Hex; Y ; Yes ; T ; True +Hex; N ; No ; F ; False +Hex; Y ; Yes ; T ; True # Hyphen (Hyphen) -Hyphen; N ; No ; F ; False -Hyphen; Y ; Yes ; T ; True +Hyphen; N ; No ; F ; False +Hyphen; Y ; Yes ; T ; True # IDS_Binary_Operator (IDSB) -IDSB; N ; No ; F ; False -IDSB; Y ; Yes ; T ; True +IDSB; N ; No ; F ; False +IDSB; Y ; Yes ; T ; True # IDS_Trinary_Operator (IDST) -IDST; N ; No ; F ; False -IDST; Y ; Yes ; T ; True +IDST; N ; No ; F ; False +IDST; Y ; Yes ; T ; True # ID_Continue (IDC) -IDC; N ; No ; F ; False -IDC; Y ; Yes ; T ; True +IDC; N ; No ; F ; False +IDC; Y ; Yes ; T ; True # ID_Start (IDS) -IDS; N ; No ; F ; False -IDS; Y ; Yes ; T ; True +IDS; N ; No ; F ; False +IDS; Y ; Yes ; T ; True # ISO_Comment (isc) @@ -606,188 +652,233 @@ IDS; Y ; Yes ; T # Ideographic (Ideo) -Ideo; N ; No ; F ; False -Ideo; Y ; Yes ; T ; True +Ideo; N ; No ; F ; False +Ideo; Y ; Yes ; T ; True + +# Indic_Matra_Category (InMC) + +InMC; Bottom ; Bottom +InMC; Bottom_And_Right ; Bottom_And_Right +InMC; Invisible ; Invisible +InMC; Left ; Left +InMC; Left_And_Right ; Left_And_Right +InMC; NA ; NA +InMC; Overstruck ; Overstruck +InMC; Right ; Right +InMC; Top ; Top +InMC; Top_And_Bottom ; Top_And_Bottom +InMC; Top_And_Bottom_And_Right ; Top_And_Bottom_And_Right +InMC; Top_And_Left ; Top_And_Left +InMC; Top_And_Left_And_Right ; Top_And_Left_And_Right +InMC; Top_And_Right ; Top_And_Right +InMC; Visual_Order_Left ; Visual_Order_Left + +# Indic_Syllabic_Category (InSC) + +InSC; Avagraha ; Avagraha +InSC; Bindu ; Bindu +InSC; Consonant ; Consonant +InSC; Consonant_Dead ; Consonant_Dead +InSC; Consonant_Final ; Consonant_Final +InSC; Consonant_Head_Letter ; Consonant_Head_Letter +InSC; Consonant_Medial ; Consonant_Medial +InSC; Consonant_Placeholder ; Consonant_Placeholder +InSC; Consonant_Repha ; Consonant_Repha +InSC; Consonant_Subjoined ; Consonant_Subjoined +InSC; Modifying_Letter ; Modifying_Letter +InSC; Nukta ; Nukta +InSC; Other ; Other +InSC; Register_Shifter ; Register_Shifter +InSC; Tone_Letter ; Tone_Letter +InSC; Tone_Mark ; Tone_Mark +InSC; Virama ; Virama +InSC; Visarga ; Visarga +InSC; Vowel ; Vowel +InSC; Vowel_Dependent ; Vowel_Dependent +InSC; Vowel_Independent ; Vowel_Independent # Jamo_Short_Name (JSN) # @missing: 0000..10FFFF; Jamo_Short_Name; <none> -JSN; A ; A -JSN; AE ; AE -JSN; B ; B -JSN; BB ; BB -JSN; BS ; BS -JSN; C ; C -JSN; D ; D -JSN; DD ; DD -JSN; E ; E -JSN; EO ; EO -JSN; EU ; EU -JSN; G ; G -JSN; GG ; GG -JSN; GS ; GS -JSN; H ; H -JSN; I ; I -JSN; J ; J -JSN; JJ ; JJ -JSN; K ; K -JSN; L ; L -JSN; LB ; LB -JSN; LG ; LG -JSN; LH ; LH -JSN; LM ; LM -JSN; LP ; LP -JSN; LS ; LS -JSN; LT ; LT -JSN; M ; M -JSN; N ; N -JSN; NG ; NG -JSN; NH ; NH -JSN; NJ ; NJ -JSN; O ; O -JSN; OE ; OE -JSN; P ; P -JSN; R ; R -JSN; S ; S -JSN; SS ; SS -JSN; T ; T -JSN; U ; U -JSN; WA ; WA -JSN; WAE ; WAE -JSN; WE ; WE -JSN; WEO ; WEO -JSN; WI ; WI -JSN; YA ; YA -JSN; YAE ; YAE -JSN; YE ; YE -JSN; YEO ; YEO -JSN; YI ; YI -JSN; YO ; YO -JSN; YU ; YU +JSN; A ; A +JSN; AE ; AE +JSN; B ; B +JSN; BB ; BB +JSN; BS ; BS +JSN; C ; C +JSN; D ; D +JSN; DD ; DD +JSN; E ; E +JSN; EO ; EO +JSN; EU ; EU +JSN; G ; G +JSN; GG ; GG +JSN; GS ; GS +JSN; H ; H +JSN; I ; I +JSN; J ; J +JSN; JJ ; JJ +JSN; K ; K +JSN; L ; L +JSN; LB ; LB +JSN; LG ; LG +JSN; LH ; LH +JSN; LM ; LM +JSN; LP ; LP +JSN; LS ; LS +JSN; LT ; LT +JSN; M ; M +JSN; N ; N +JSN; NG ; NG +JSN; NH ; NH +JSN; NJ ; NJ +JSN; O ; O +JSN; OE ; OE +JSN; P ; P +JSN; R ; R +JSN; S ; S +JSN; SS ; SS +JSN; T ; T +JSN; U ; U +JSN; WA ; WA +JSN; WAE ; WAE +JSN; WE ; WE +JSN; WEO ; WEO +JSN; WI ; WI +JSN; YA ; YA +JSN; YAE ; YAE +JSN; YE ; YE +JSN; YEO ; YEO +JSN; YI ; YI +JSN; YO ; YO +JSN; YU ; YU # Join_Control (Join_C) -Join_C; N ; No ; F ; False -Join_C; Y ; Yes ; T ; True +Join_C; N ; No ; F ; False +Join_C; Y ; Yes ; T ; True # Joining_Group (jg) -jg ; n/a ; Ain -jg ; n/a ; Alaph -jg ; n/a ; Alef -jg ; n/a ; Beh -jg ; n/a ; Beth -jg ; n/a ; Burushaski_Yeh_Barree -jg ; n/a ; Dal -jg ; n/a ; Dalath_Rish -jg ; n/a ; E -jg ; n/a ; Farsi_Yeh -jg ; n/a ; Fe -jg ; n/a ; Feh -jg ; n/a ; Final_Semkath -jg ; n/a ; Gaf -jg ; n/a ; Gamal -jg ; n/a ; Hah -jg ; n/a ; He -jg ; n/a ; Heh -jg ; n/a ; Heh_Goal -jg ; n/a ; Heth -jg ; n/a ; Kaf -jg ; n/a ; Kaph -jg ; n/a ; Khaph -jg ; n/a ; Knotted_Heh -jg ; n/a ; Lam -jg ; n/a ; Lamadh -jg ; n/a ; Meem -jg ; n/a ; Mim -jg ; n/a ; No_Joining_Group -jg ; n/a ; Noon -jg ; n/a ; Nun -jg ; n/a ; Nya -jg ; n/a ; Pe -jg ; n/a ; Qaf -jg ; n/a ; Qaph -jg ; n/a ; Reh -jg ; n/a ; Reversed_Pe -jg ; n/a ; Sad -jg ; n/a ; Sadhe -jg ; n/a ; Seen -jg ; n/a ; Semkath -jg ; n/a ; Shin -jg ; n/a ; Swash_Kaf -jg ; n/a ; Syriac_Waw -jg ; n/a ; Tah -jg ; n/a ; Taw -jg ; n/a ; Teh_Marbuta -jg ; n/a ; Teh_Marbuta_Goal ; Hamza_On_Heh_Goal -jg ; n/a ; Teth -jg ; n/a ; Waw -jg ; n/a ; Yeh -jg ; n/a ; Yeh_Barree -jg ; n/a ; Yeh_With_Tail -jg ; n/a ; Yudh -jg ; n/a ; Yudh_He -jg ; n/a ; Zain -jg ; n/a ; Zhain +jg ; Ain ; Ain +jg ; Alaph ; Alaph +jg ; Alef ; Alef +jg ; Beh ; Beh +jg ; Beth ; Beth +jg ; Burushaski_Yeh_Barree ; Burushaski_Yeh_Barree +jg ; Dal ; Dal +jg ; Dalath_Rish ; Dalath_Rish +jg ; E ; E +jg ; Farsi_Yeh ; Farsi_Yeh +jg ; Fe ; Fe +jg ; Feh ; Feh +jg ; Final_Semkath ; Final_Semkath +jg ; Gaf ; Gaf +jg ; Gamal ; Gamal +jg ; Hah ; Hah +jg ; He ; He +jg ; Heh ; Heh +jg ; Heh_Goal ; Heh_Goal +jg ; Heth ; Heth +jg ; Kaf ; Kaf +jg ; Kaph ; Kaph +jg ; Khaph ; Khaph +jg ; Knotted_Heh ; Knotted_Heh +jg ; Lam ; Lam +jg ; Lamadh ; Lamadh +jg ; Meem ; Meem +jg ; Mim ; Mim +jg ; No_Joining_Group ; No_Joining_Group +jg ; Noon ; Noon +jg ; Nun ; Nun +jg ; Nya ; Nya +jg ; Pe ; Pe +jg ; Qaf ; Qaf +jg ; Qaph ; Qaph +jg ; Reh ; Reh +jg ; Reversed_Pe ; Reversed_Pe +jg ; Rohingya_Yeh ; Rohingya_Yeh +jg ; Sad ; Sad +jg ; Sadhe ; Sadhe +jg ; Seen ; Seen +jg ; Semkath ; Semkath +jg ; Shin ; Shin +jg ; Swash_Kaf ; Swash_Kaf +jg ; Syriac_Waw ; Syriac_Waw +jg ; Tah ; Tah +jg ; Taw ; Taw +jg ; Teh_Marbuta ; Teh_Marbuta +jg ; Teh_Marbuta_Goal ; Hamza_On_Heh_Goal +jg ; Teth ; Teth +jg ; Waw ; Waw +jg ; Yeh ; Yeh +jg ; Yeh_Barree ; Yeh_Barree +jg ; Yeh_With_Tail ; Yeh_With_Tail +jg ; Yudh ; Yudh +jg ; Yudh_He ; Yudh_He +jg ; Zain ; Zain +jg ; Zhain ; Zhain # Joining_Type (jt) -jt ; C ; Join_Causing -jt ; D ; Dual_Joining -jt ; L ; Left_Joining -jt ; R ; Right_Joining -jt ; T ; Transparent -jt ; U ; Non_Joining +jt ; C ; Join_Causing +jt ; D ; Dual_Joining +jt ; L ; Left_Joining +jt ; R ; Right_Joining +jt ; T ; Transparent +jt ; U ; Non_Joining # Line_Break (lb) -lb ; AI ; Ambiguous -lb ; AL ; Alphabetic -lb ; B2 ; Break_Both -lb ; BA ; Break_After -lb ; BB ; Break_Before -lb ; BK ; Mandatory_Break -lb ; CB ; Contingent_Break -lb ; CL ; Close_Punctuation -lb ; CM ; Combining_Mark -lb ; CP ; Close_Parenthesis -lb ; CR ; Carriage_Return -lb ; EX ; Exclamation -lb ; GL ; Glue -lb ; H2 ; H2 -lb ; H3 ; H3 -lb ; HY ; Hyphen -lb ; ID ; Ideographic -lb ; IN ; Inseparable ; Inseperable -lb ; IS ; Infix_Numeric -lb ; JL ; JL -lb ; JT ; JT -lb ; JV ; JV -lb ; LF ; Line_Feed -lb ; NL ; Next_Line -lb ; NS ; Nonstarter -lb ; NU ; Numeric -lb ; OP ; Open_Punctuation -lb ; PO ; Postfix_Numeric -lb ; PR ; Prefix_Numeric -lb ; QU ; Quotation -lb ; SA ; Complex_Context -lb ; SG ; Surrogate -lb ; SP ; Space -lb ; SY ; Break_Symbols -lb ; WJ ; Word_Joiner -lb ; XX ; Unknown -lb ; ZW ; ZWSpace +lb ; AI ; Ambiguous +lb ; AL ; Alphabetic +lb ; B2 ; Break_Both +lb ; BA ; Break_After +lb ; BB ; Break_Before +lb ; BK ; Mandatory_Break +lb ; CB ; Contingent_Break +lb ; CJ ; Conditional_Japanese_Starter +lb ; CL ; Close_Punctuation +lb ; CM ; Combining_Mark +lb ; CP ; Close_Parenthesis +lb ; CR ; Carriage_Return +lb ; EX ; Exclamation +lb ; GL ; Glue +lb ; H2 ; H2 +lb ; H3 ; H3 +lb ; HL ; Hebrew_Letter +lb ; HY ; Hyphen +lb ; ID ; Ideographic +lb ; IN ; Inseparable ; Inseperable +lb ; IS ; Infix_Numeric +lb ; JL ; JL +lb ; JT ; JT +lb ; JV ; JV +lb ; LF ; Line_Feed +lb ; NL ; Next_Line +lb ; NS ; Nonstarter +lb ; NU ; Numeric +lb ; OP ; Open_Punctuation +lb ; PO ; Postfix_Numeric +lb ; PR ; Prefix_Numeric +lb ; QU ; Quotation +lb ; SA ; Complex_Context +lb ; SG ; Surrogate +lb ; SP ; Space +lb ; SY ; Break_Symbols +lb ; WJ ; Word_Joiner +lb ; XX ; Unknown +lb ; ZW ; ZWSpace # Logical_Order_Exception (LOE) -LOE; N ; No ; F ; False -LOE; Y ; Yes ; T ; True +LOE; N ; No ; F ; False +LOE; Y ; Yes ; T ; True # Lowercase (Lower) -Lower; N ; No ; F ; False -Lower; Y ; Yes ; T ; True +Lower; N ; No ; F ; False +Lower; Y ; Yes ; T ; True # Lowercase_Mapping (lc) @@ -795,19 +886,19 @@ Lower; Y ; Yes ; T # Math (Math) -Math; N ; No ; F ; False -Math; Y ; Yes ; T ; True +Math; N ; No ; F ; False +Math; Y ; Yes ; T ; True # NFC_Quick_Check (NFC_QC) -NFC_QC; M ; Maybe -NFC_QC; N ; No -NFC_QC; Y ; Yes +NFC_QC; M ; Maybe +NFC_QC; N ; No +NFC_QC; Y ; Yes # NFD_Quick_Check (NFD_QC) -NFD_QC; N ; No -NFD_QC; Y ; Yes +NFD_QC; N ; No +NFD_QC; Y ; Yes # NFKC_Casefold (NFKC_CF) @@ -815,14 +906,14 @@ NFD_QC; Y ; Yes # NFKC_Quick_Check (NFKC_QC) -NFKC_QC; M ; Maybe -NFKC_QC; N ; No -NFKC_QC; Y ; Yes +NFKC_QC; M ; Maybe +NFKC_QC; N ; No +NFKC_QC; Y ; Yes # NFKD_Quick_Check (NFKD_QC) -NFKD_QC; N ; No -NFKD_QC; Y ; Yes +NFKD_QC; N ; No +NFKD_QC; Y ; Yes # Name (na) @@ -834,15 +925,15 @@ NFKD_QC; Y ; Yes # Noncharacter_Code_Point (NChar) -NChar; N ; No ; F ; False -NChar; Y ; Yes ; T ; True +NChar; N ; No ; F ; False +NChar; Y ; Yes ; T ; True # Numeric_Type (nt) -nt ; De ; Decimal -nt ; Di ; Digit -nt ; None ; None -nt ; Nu ; Numeric +nt ; De ; Decimal +nt ; Di ; Digit +nt ; None ; None +nt ; Nu ; Numeric # Numeric_Value (nv) @@ -850,186 +941,197 @@ nt ; Nu ; Numeric # Other_Alphabetic (OAlpha) -OAlpha; N ; No ; F ; False -OAlpha; Y ; Yes ; T ; True +OAlpha; N ; No ; F ; False +OAlpha; Y ; Yes ; T ; True # Other_Default_Ignorable_Code_Point (ODI) -ODI; N ; No ; F ; False -ODI; Y ; Yes ; T ; True +ODI; N ; No ; F ; False +ODI; Y ; Yes ; T ; True # Other_Grapheme_Extend (OGr_Ext) -OGr_Ext; N ; No ; F ; False -OGr_Ext; Y ; Yes ; T ; True +OGr_Ext; N ; No ; F ; False +OGr_Ext; Y ; Yes ; T ; True # Other_ID_Continue (OIDC) -OIDC; N ; No ; F ; False -OIDC; Y ; Yes ; T ; True +OIDC; N ; No ; F ; False +OIDC; Y ; Yes ; T ; True # Other_ID_Start (OIDS) -OIDS; N ; No ; F ; False -OIDS; Y ; Yes ; T ; True +OIDS; N ; No ; F ; False +OIDS; Y ; Yes ; T ; True # Other_Lowercase (OLower) -OLower; N ; No ; F ; False -OLower; Y ; Yes ; T ; True +OLower; N ; No ; F ; False +OLower; Y ; Yes ; T ; True # Other_Math (OMath) -OMath; N ; No ; F ; False -OMath; Y ; Yes ; T ; True +OMath; N ; No ; F ; False +OMath; Y ; Yes ; T ; True # Other_Uppercase (OUpper) -OUpper; N ; No ; F ; False -OUpper; Y ; Yes ; T ; True +OUpper; N ; No ; F ; False +OUpper; Y ; Yes ; T ; True # Pattern_Syntax (Pat_Syn) -Pat_Syn; N ; No ; F ; False -Pat_Syn; Y ; Yes ; T ; True +Pat_Syn; N ; No ; F ; False +Pat_Syn; Y ; Yes ; T ; True # Pattern_White_Space (Pat_WS) -Pat_WS; N ; No ; F ; False -Pat_WS; Y ; Yes ; T ; True +Pat_WS; N ; No ; F ; False +Pat_WS; Y ; Yes ; T ; True # Quotation_Mark (QMark) -QMark; N ; No ; F ; False -QMark; Y ; Yes ; T ; True +QMark; N ; No ; F ; False +QMark; Y ; Yes ; T ; True # Radical (Radical) -Radical; N ; No ; F ; False -Radical; Y ; Yes ; T ; True +Radical; N ; No ; F ; False +Radical; Y ; Yes ; T ; True # STerm (STerm) -STerm; N ; No ; F ; False -STerm; Y ; Yes ; T ; True +STerm; N ; No ; F ; False +STerm; Y ; Yes ; T ; True # Script (sc) -sc ; Arab ; Arabic -sc ; Armi ; Imperial_Aramaic -sc ; Armn ; Armenian -sc ; Avst ; Avestan -sc ; Bali ; Balinese -sc ; Bamu ; Bamum -sc ; Batk ; Batak -sc ; Beng ; Bengali -sc ; Bopo ; Bopomofo -sc ; Brah ; Brahmi -sc ; Brai ; Braille -sc ; Bugi ; Buginese -sc ; Buhd ; Buhid -sc ; Cans ; Canadian_Aboriginal -sc ; Cari ; Carian -sc ; Cham ; Cham -sc ; Cher ; Cherokee -sc ; Copt ; Coptic ; Qaac -sc ; Cprt ; Cypriot -sc ; Cyrl ; Cyrillic -sc ; Deva ; Devanagari -sc ; Dsrt ; Deseret -sc ; Egyp ; Egyptian_Hieroglyphs -sc ; Ethi ; Ethiopic -sc ; Geor ; Georgian -sc ; Glag ; Glagolitic -sc ; Goth ; Gothic -sc ; Grek ; Greek -sc ; Gujr ; Gujarati -sc ; Guru ; Gurmukhi -sc ; Hang ; Hangul -sc ; Hani ; Han -sc ; Hano ; Hanunoo -sc ; Hebr ; Hebrew -sc ; Hira ; Hiragana -sc ; Hrkt ; Katakana_Or_Hiragana -sc ; Ital ; Old_Italic -sc ; Java ; Javanese -sc ; Kali ; Kayah_Li -sc ; Kana ; Katakana -sc ; Khar ; Kharoshthi -sc ; Khmr ; Khmer -sc ; Knda ; Kannada -sc ; Kthi ; Kaithi -sc ; Lana ; Tai_Tham -sc ; Laoo ; Lao -sc ; Latn ; Latin -sc ; Lepc ; Lepcha -sc ; Limb ; Limbu -sc ; Linb ; Linear_B -sc ; Lisu ; Lisu -sc ; Lyci ; Lycian -sc ; Lydi ; Lydian -sc ; Mand ; Mandaic -sc ; Mlym ; Malayalam -sc ; Mong ; Mongolian -sc ; Mtei ; Meetei_Mayek -sc ; Mymr ; Myanmar -sc ; Nkoo ; Nko -sc ; Ogam ; Ogham -sc ; Olck ; Ol_Chiki -sc ; Orkh ; Old_Turkic -sc ; Orya ; Oriya -sc ; Osma ; Osmanya -sc ; Phag ; Phags_Pa -sc ; Phli ; Inscriptional_Pahlavi -sc ; Phnx ; Phoenician -sc ; Prti ; Inscriptional_Parthian -sc ; Rjng ; Rejang -sc ; Runr ; Runic -sc ; Samr ; Samaritan -sc ; Sarb ; Old_South_Arabian -sc ; Saur ; Saurashtra -sc ; Shaw ; Shavian -sc ; Sinh ; Sinhala -sc ; Sund ; Sundanese -sc ; Sylo ; Syloti_Nagri -sc ; Syrc ; Syriac -sc ; Tagb ; Tagbanwa -sc ; Tale ; Tai_Le -sc ; Talu ; New_Tai_Lue -sc ; Taml ; Tamil -sc ; Tavt ; Tai_Viet -sc ; Telu ; Telugu -sc ; Tfng ; Tifinagh -sc ; Tglg ; Tagalog -sc ; Thaa ; Thaana -sc ; Thai ; Thai -sc ; Tibt ; Tibetan -sc ; Ugar ; Ugaritic -sc ; Vaii ; Vai -sc ; Xpeo ; Old_Persian -sc ; Xsux ; Cuneiform -sc ; Yiii ; Yi -sc ; Zinh ; Inherited ; Qaai -sc ; Zyyy ; Common -sc ; Zzzz ; Unknown +sc ; Arab ; Arabic +sc ; Armi ; Imperial_Aramaic +sc ; Armn ; Armenian +sc ; Avst ; Avestan +sc ; Bali ; Balinese +sc ; Bamu ; Bamum +sc ; Batk ; Batak +sc ; Beng ; Bengali +sc ; Bopo ; Bopomofo +sc ; Brah ; Brahmi +sc ; Brai ; Braille +sc ; Bugi ; Buginese +sc ; Buhd ; Buhid +sc ; Cakm ; Chakma +sc ; Cans ; Canadian_Aboriginal +sc ; Cari ; Carian +sc ; Cham ; Cham +sc ; Cher ; Cherokee +sc ; Copt ; Coptic ; Qaac +sc ; Cprt ; Cypriot +sc ; Cyrl ; Cyrillic +sc ; Deva ; Devanagari +sc ; Dsrt ; Deseret +sc ; Egyp ; Egyptian_Hieroglyphs +sc ; Ethi ; Ethiopic +sc ; Geor ; Georgian +sc ; Glag ; Glagolitic +sc ; Goth ; Gothic +sc ; Grek ; Greek +sc ; Gujr ; Gujarati +sc ; Guru ; Gurmukhi +sc ; Hang ; Hangul +sc ; Hani ; Han +sc ; Hano ; Hanunoo +sc ; Hebr ; Hebrew +sc ; Hira ; Hiragana +sc ; Hrkt ; Katakana_Or_Hiragana +sc ; Ital ; Old_Italic +sc ; Java ; Javanese +sc ; Kali ; Kayah_Li +sc ; Kana ; Katakana +sc ; Khar ; Kharoshthi +sc ; Khmr ; Khmer +sc ; Knda ; Kannada +sc ; Kthi ; Kaithi +sc ; Lana ; Tai_Tham +sc ; Laoo ; Lao +sc ; Latn ; Latin +sc ; Lepc ; Lepcha +sc ; Limb ; Limbu +sc ; Linb ; Linear_B +sc ; Lisu ; Lisu +sc ; Lyci ; Lycian +sc ; Lydi ; Lydian +sc ; Mand ; Mandaic +sc ; Merc ; Meroitic_Cursive +sc ; Mero ; Meroitic_Hieroglyphs +sc ; Mlym ; Malayalam +sc ; Mong ; Mongolian +sc ; Mtei ; Meetei_Mayek +sc ; Mymr ; Myanmar +sc ; Nkoo ; Nko +sc ; Ogam ; Ogham +sc ; Olck ; Ol_Chiki +sc ; Orkh ; Old_Turkic +sc ; Orya ; Oriya +sc ; Osma ; Osmanya +sc ; Phag ; Phags_Pa +sc ; Phli ; Inscriptional_Pahlavi +sc ; Phnx ; Phoenician +sc ; Plrd ; Miao +sc ; Prti ; Inscriptional_Parthian +sc ; Rjng ; Rejang +sc ; Runr ; Runic +sc ; Samr ; Samaritan +sc ; Sarb ; Old_South_Arabian +sc ; Saur ; Saurashtra +sc ; Shaw ; Shavian +sc ; Shrd ; Sharada +sc ; Sinh ; Sinhala +sc ; Sora ; Sora_Sompeng +sc ; Sund ; Sundanese +sc ; Sylo ; Syloti_Nagri +sc ; Syrc ; Syriac +sc ; Tagb ; Tagbanwa +sc ; Takr ; Takri +sc ; Tale ; Tai_Le +sc ; Talu ; New_Tai_Lue +sc ; Taml ; Tamil +sc ; Tavt ; Tai_Viet +sc ; Telu ; Telugu +sc ; Tfng ; Tifinagh +sc ; Tglg ; Tagalog +sc ; Thaa ; Thaana +sc ; Thai ; Thai +sc ; Tibt ; Tibetan +sc ; Ugar ; Ugaritic +sc ; Vaii ; Vai +sc ; Xpeo ; Old_Persian +sc ; Xsux ; Cuneiform +sc ; Yiii ; Yi +sc ; Zinh ; Inherited ; Qaai +sc ; Zyyy ; Common +sc ; Zzzz ; Unknown + +# Script_Extensions (scx) + +# @missing: 0000..10FFFF; Script_Extensions; <script> # Sentence_Break (SB) -SB ; AT ; ATerm -SB ; CL ; Close -SB ; CR ; CR -SB ; EX ; Extend -SB ; FO ; Format -SB ; LE ; OLetter -SB ; LF ; LF -SB ; LO ; Lower -SB ; NU ; Numeric -SB ; SC ; SContinue -SB ; SE ; Sep -SB ; SP ; Sp -SB ; ST ; STerm -SB ; UP ; Upper -SB ; XX ; Other +SB ; AT ; ATerm +SB ; CL ; Close +SB ; CR ; CR +SB ; EX ; Extend +SB ; FO ; Format +SB ; LE ; OLetter +SB ; LF ; LF +SB ; LO ; Lower +SB ; NU ; Numeric +SB ; SC ; SContinue +SB ; SE ; Sep +SB ; SP ; Sp +SB ; ST ; STerm +SB ; UP ; Upper +SB ; XX ; Other # Simple_Case_Folding (scf) @@ -1049,13 +1151,13 @@ SB ; XX ; Other # Soft_Dotted (SD) -SD ; N ; No ; F ; False -SD ; Y ; Yes ; T ; True +SD ; N ; No ; F ; False +SD ; Y ; Yes ; T ; True # Terminal_Punctuation (Term) -Term; N ; No ; F ; False -Term; Y ; Yes ; T ; True +Term; N ; No ; F ; False +Term; Y ; Yes ; T ; True # Titlecase_Mapping (tc) @@ -1067,13 +1169,13 @@ Term; Y ; Yes ; T # Unified_Ideograph (UIdeo) -UIdeo; N ; No ; F ; False -UIdeo; Y ; Yes ; T ; True +UIdeo; N ; No ; F ; False +UIdeo; Y ; Yes ; T ; True # Uppercase (Upper) -Upper; N ; No ; F ; False -Upper; Y ; Yes ; T ; True +Upper; N ; No ; F ; False +Upper; Y ; Yes ; T ; True # Uppercase_Mapping (uc) @@ -1081,39 +1183,39 @@ Upper; Y ; Yes ; T # Variation_Selector (VS) -VS ; N ; No ; F ; False -VS ; Y ; Yes ; T ; True +VS ; N ; No ; F ; False +VS ; Y ; Yes ; T ; True # White_Space (WSpace) -WSpace; N ; No ; F ; False -WSpace; Y ; Yes ; T ; True +WSpace; N ; No ; F ; False +WSpace; Y ; Yes ; T ; True # Word_Break (WB) -WB ; CR ; CR -WB ; EX ; ExtendNumLet -WB ; Extend ; Extend -WB ; FO ; Format -WB ; KA ; Katakana -WB ; LE ; ALetter -WB ; LF ; LF -WB ; MB ; MidNumLet -WB ; ML ; MidLetter -WB ; MN ; MidNum -WB ; NL ; Newline -WB ; NU ; Numeric -WB ; XX ; Other +WB ; CR ; CR +WB ; EX ; ExtendNumLet +WB ; Extend ; Extend +WB ; FO ; Format +WB ; KA ; Katakana +WB ; LE ; ALetter +WB ; LF ; LF +WB ; MB ; MidNumLet +WB ; ML ; MidLetter +WB ; MN ; MidNum +WB ; NL ; Newline +WB ; NU ; Numeric +WB ; XX ; Other # XID_Continue (XIDC) -XIDC; N ; No ; F ; False -XIDC; Y ; Yes ; T ; True +XIDC; N ; No ; F ; False +XIDC; Y ; Yes ; T ; True # XID_Start (XIDS) -XIDS; N ; No ; F ; False -XIDS; Y ; Yes ; T ; True +XIDS; N ; No ; F ; False +XIDS; Y ; Yes ; T ; True # cjkAccountingNumeric (cjkAccountingNumeric) diff --git a/lib/unicore/PropertyAliases.txt b/lib/unicore/PropertyAliases.txt index 9a4a8c77b2..f891ff254e 100644 --- a/lib/unicore/PropertyAliases.txt +++ b/lib/unicore/PropertyAliases.txt @@ -1,15 +1,14 @@ -# PropertyAliases-6.0.0.txt -# Date: 2010-05-18, 00:49:38 GMT [MD] +# PropertyAliases-6.1.0.txt +# Date: 2011-12-07, 23:40:57 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # # This file contains aliases for properties used in the UCD. # These names can be used for XML formats of UCD data, for regular-expression # property tests, and other programmatic textual descriptions of Unicode data. -# For information on which properties are normative, see UCD.html. # # The names may be translated in appropriate environments, and additional # aliases may be useful. @@ -27,7 +26,7 @@ # Loose matching should be applied to all property names and property values, with # the exception of String Property values. With loose matching of property names and # values, the case distinctions, whitespace, and '_' are ignored. For Numeric Property -# values, numeric equivalences are applied: thus "01.00" is equivalent to "1". +# values, numeric equivalencies are applied: thus "01.00" is equivalent to "1". # # NOTE: Property value names are NOT unique across properties. For example: # @@ -50,140 +49,143 @@ # ================================================ # Numeric Properties # ================================================ -cjkAccountingNumeric; kAccountingNumeric -cjkOtherNumeric; kOtherNumeric -cjkPrimaryNumeric; kPrimaryNumeric -nv ; Numeric_Value +cjkAccountingNumeric ; kAccountingNumeric +cjkOtherNumeric ; kOtherNumeric +cjkPrimaryNumeric ; kPrimaryNumeric +nv ; Numeric_Value # ================================================ # String Properties # ================================================ -bmg ; Bidi_Mirroring_Glyph -cf ; Case_Folding -cjkCompatibilityVariant; kCompatibilityVariant -dm ; Decomposition_Mapping -FC_NFKC ; FC_NFKC_Closure -lc ; Lowercase_Mapping -NFKC_CF ; NFKC_Casefold -scf ; Simple_Case_Folding ; sfc -slc ; Simple_Lowercase_Mapping -stc ; Simple_Titlecase_Mapping -suc ; Simple_Uppercase_Mapping -tc ; Titlecase_Mapping -uc ; Uppercase_Mapping +bmg ; Bidi_Mirroring_Glyph +cf ; Case_Folding +cjkCompatibilityVariant ; kCompatibilityVariant +dm ; Decomposition_Mapping +FC_NFKC ; FC_NFKC_Closure +lc ; Lowercase_Mapping +NFKC_CF ; NFKC_Casefold +scf ; Simple_Case_Folding ; sfc +slc ; Simple_Lowercase_Mapping +stc ; Simple_Titlecase_Mapping +suc ; Simple_Uppercase_Mapping +tc ; Titlecase_Mapping +uc ; Uppercase_Mapping # ================================================ # Miscellaneous Properties # ================================================ -cjkIICore ; kIICore -cjkIRG_GSource; kIRG_GSource -cjkIRG_HSource; kIRG_HSource -cjkIRG_JSource; kIRG_JSource -cjkIRG_KPSource; kIRG_KPSource -cjkIRG_KSource; kIRG_KSource -cjkIRG_MSource; kIRG_MSource -cjkIRG_TSource; kIRG_TSource -cjkIRG_USource; kIRG_USource -cjkIRG_VSource; kIRG_VSource -cjkRSUnicode; kRSUnicode ; Unicode_Radical_Stroke; URS -isc ; ISO_Comment -JSN ; Jamo_Short_Name -na ; Name -na1 ; Unicode_1_Name -Name_Alias; Name_Alias +cjkIICore ; kIICore +cjkIRG_GSource ; kIRG_GSource +cjkIRG_HSource ; kIRG_HSource +cjkIRG_JSource ; kIRG_JSource +cjkIRG_KPSource ; kIRG_KPSource +cjkIRG_KSource ; kIRG_KSource +cjkIRG_MSource ; kIRG_MSource +cjkIRG_TSource ; kIRG_TSource +cjkIRG_USource ; kIRG_USource +cjkIRG_VSource ; kIRG_VSource +cjkRSUnicode ; kRSUnicode ; Unicode_Radical_Stroke; URS +isc ; ISO_Comment +JSN ; Jamo_Short_Name +na ; Name +na1 ; Unicode_1_Name +Name_Alias ; Name_Alias +scx ; Script_Extensions # ================================================ # Catalog Properties # ================================================ -age ; Age -blk ; Block -sc ; Script +age ; Age +blk ; Block +sc ; Script # ================================================ # Enumerated Properties # ================================================ -bc ; Bidi_Class -ccc ; Canonical_Combining_Class -dt ; Decomposition_Type -ea ; East_Asian_Width -gc ; General_Category -GCB ; Grapheme_Cluster_Break -hst ; Hangul_Syllable_Type -jg ; Joining_Group -jt ; Joining_Type -lb ; Line_Break -NFC_QC ; NFC_Quick_Check -NFD_QC ; NFD_Quick_Check -NFKC_QC ; NFKC_Quick_Check -NFKD_QC ; NFKD_Quick_Check -nt ; Numeric_Type -SB ; Sentence_Break -WB ; Word_Break +bc ; Bidi_Class +ccc ; Canonical_Combining_Class +dt ; Decomposition_Type +ea ; East_Asian_Width +gc ; General_Category +GCB ; Grapheme_Cluster_Break +hst ; Hangul_Syllable_Type +InMC ; Indic_Matra_Category +InSC ; Indic_Syllabic_Category +jg ; Joining_Group +jt ; Joining_Type +lb ; Line_Break +NFC_QC ; NFC_Quick_Check +NFD_QC ; NFD_Quick_Check +NFKC_QC ; NFKC_Quick_Check +NFKD_QC ; NFKD_Quick_Check +nt ; Numeric_Type +SB ; Sentence_Break +WB ; Word_Break # ================================================ # Binary Properties # ================================================ -AHex ; ASCII_Hex_Digit -Alpha ; Alphabetic -Bidi_C ; Bidi_Control -Bidi_M ; Bidi_Mirrored -Cased ; Cased -CE ; Composition_Exclusion -CI ; Case_Ignorable -Comp_Ex ; Full_Composition_Exclusion -CWCF ; Changes_When_Casefolded -CWCM ; Changes_When_Casemapped -CWKCF ; Changes_When_NFKC_Casefolded -CWL ; Changes_When_Lowercased -CWT ; Changes_When_Titlecased -CWU ; Changes_When_Uppercased -Dash ; Dash -Dep ; Deprecated -DI ; Default_Ignorable_Code_Point -Dia ; Diacritic -Ext ; Extender -Gr_Base ; Grapheme_Base -Gr_Ext ; Grapheme_Extend -Gr_Link ; Grapheme_Link -Hex ; Hex_Digit -Hyphen ; Hyphen -IDC ; ID_Continue -Ideo ; Ideographic -IDS ; ID_Start -IDSB ; IDS_Binary_Operator -IDST ; IDS_Trinary_Operator -Join_C ; Join_Control -LOE ; Logical_Order_Exception -Lower ; Lowercase -Math ; Math -NChar ; Noncharacter_Code_Point -OAlpha ; Other_Alphabetic -ODI ; Other_Default_Ignorable_Code_Point -OGr_Ext ; Other_Grapheme_Extend -OIDC ; Other_ID_Continue -OIDS ; Other_ID_Start -OLower ; Other_Lowercase -OMath ; Other_Math -OUpper ; Other_Uppercase -Pat_Syn ; Pattern_Syntax -Pat_WS ; Pattern_White_Space -QMark ; Quotation_Mark -Radical ; Radical -SD ; Soft_Dotted -STerm ; STerm -Term ; Terminal_Punctuation -UIdeo ; Unified_Ideograph -Upper ; Uppercase -VS ; Variation_Selector -WSpace ; White_Space ; space -XIDC ; XID_Continue -XIDS ; XID_Start -XO_NFC ; Expands_On_NFC -XO_NFD ; Expands_On_NFD -XO_NFKC ; Expands_On_NFKC -XO_NFKD ; Expands_On_NFKD +AHex ; ASCII_Hex_Digit +Alpha ; Alphabetic +Bidi_C ; Bidi_Control +Bidi_M ; Bidi_Mirrored +Cased ; Cased +CE ; Composition_Exclusion +CI ; Case_Ignorable +Comp_Ex ; Full_Composition_Exclusion +CWCF ; Changes_When_Casefolded +CWCM ; Changes_When_Casemapped +CWKCF ; Changes_When_NFKC_Casefolded +CWL ; Changes_When_Lowercased +CWT ; Changes_When_Titlecased +CWU ; Changes_When_Uppercased +Dash ; Dash +Dep ; Deprecated +DI ; Default_Ignorable_Code_Point +Dia ; Diacritic +Ext ; Extender +Gr_Base ; Grapheme_Base +Gr_Ext ; Grapheme_Extend +Gr_Link ; Grapheme_Link +Hex ; Hex_Digit +Hyphen ; Hyphen +IDC ; ID_Continue +Ideo ; Ideographic +IDS ; ID_Start +IDSB ; IDS_Binary_Operator +IDST ; IDS_Trinary_Operator +Join_C ; Join_Control +LOE ; Logical_Order_Exception +Lower ; Lowercase +Math ; Math +NChar ; Noncharacter_Code_Point +OAlpha ; Other_Alphabetic +ODI ; Other_Default_Ignorable_Code_Point +OGr_Ext ; Other_Grapheme_Extend +OIDC ; Other_ID_Continue +OIDS ; Other_ID_Start +OLower ; Other_Lowercase +OMath ; Other_Math +OUpper ; Other_Uppercase +Pat_Syn ; Pattern_Syntax +Pat_WS ; Pattern_White_Space +QMark ; Quotation_Mark +Radical ; Radical +SD ; Soft_Dotted +STerm ; STerm +Term ; Terminal_Punctuation +UIdeo ; Unified_Ideograph +Upper ; Uppercase +VS ; Variation_Selector +WSpace ; White_Space ; space +XIDC ; XID_Continue +XIDS ; XID_Start +XO_NFC ; Expands_On_NFC +XO_NFD ; Expands_On_NFD +XO_NFKC ; Expands_On_NFKC +XO_NFKD ; Expands_On_NFKD # ================================================ -# Total: 112 +# Total: 115 # EOF diff --git a/lib/unicore/ReadMe.txt b/lib/unicore/ReadMe.txt index 89d5cb39ef..9fd93d8960 100644 --- a/lib/unicore/ReadMe.txt +++ b/lib/unicore/ReadMe.txt @@ -1,7 +1,7 @@ -# Date: 2010-10-05, 16:26:38 PDT [KW] +# Date: 2012-01-26, 22:03:00 GMT [KW] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # # For documentation, see NamesList.html, @@ -10,4 +10,4 @@ # This directory contains final data files -for the Unicode Character Database (UCD) for Unicode 6.0.0. +for the Unicode Character Database (UCD) for Unicode 6.1.0. diff --git a/lib/unicore/ScriptExtensions.txt b/lib/unicore/ScriptExtensions.txt index 0ddb8bcb73..301ccc21f4 100644 --- a/lib/unicore/ScriptExtensions.txt +++ b/lib/unicore/ScriptExtensions.txt @@ -1,15 +1,22 @@ -# ScriptExtensions-6.0.0.txt -# Date: 2010-08-30, 01:48:36 GMT [MD] +# ScriptExtensions-6.1.0.txt +# Date: 2011-12-05, 22:51:22 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ +# The Script_Extensions property indicates which characters are commonly used +# with a limited number of scripts, but with more than one. +# The property is provisional: values are expected to change over time as more information becomes available. +# For each code point, there is one or more property values. Each such value is a Script property value. +# For more information, see: +# UAX #24: http://www.unicode.org/reports/tr24/ and +# UAX #44: http://www.unicode.org/reports/tr44/ # -# The Script Extensions contain data about characters that belong to multiple scripts. -# This data is provisional, and expected to change over time, as more information becomes available. -# The script values are space-delimited short values, such as Hang for Hangul. -# For more information, see UAX #24: http://www.unicode.org/reports/tr24/. +# All code points not explicitly listed for Script_Extensions +# have as their value the corresponding Script property value +# +# @missing: 0000..10FFFF; <script> # ================================================ @@ -19,11 +26,10 @@ # Script_Extensions=Arab Syrc -0640 ; Arab Syrc # Lm ARABIC TATWEEL 064B..0655 ; Arab Syrc # Mn [11] ARABIC FATHATAN..ARABIC HAMZA BELOW 0670 ; Arab Syrc # Mn ARABIC LETTER SUPERSCRIPT ALEF -# Total code points: 13 +# Total code points: 12 # ================================================ @@ -53,6 +59,16 @@ FDFD ; Arab Thaa # So ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHE # ================================================ +# Script_Extensions=Cprt Linb + +10100..10102 ; Cprt Linb # Po [3] AEGEAN WORD SEPARATOR LINE..AEGEAN CHECK MARK +10107..10133 ; Cprt Linb # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND +10137..1013F ; Cprt Linb # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT + +# Total code points: 57 + +# ================================================ + # Script_Extensions=Hira Kana 3031..3035 ; Hira Kana # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF @@ -76,6 +92,14 @@ FF9E..FF9F ; Hira Kana # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFW # ================================================ +# Script_Extensions=Arab Mand Syrc + +0640 ; Arab Mand Syrc # Lm ARABIC TATWEEL + +# Total code points: 1 + +# ================================================ + # Script_Extensions=Arab Syrc Thaa 060C ; Arab Syrc Thaa # Po ARABIC COMMA @@ -140,6 +164,17 @@ FE45..FE46 ; Bopo Hang Hani Hira Kana # Po [2] SESAME DOT..WHITE SESAME DOT # ================================================ +# Script_Extensions=Deva Gujr Guru Kthi Takr + +A830..A835 ; Deva Gujr Guru Kthi Takr # No [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE SIXTEENTHS +A836..A837 ; Deva Gujr Guru Kthi Takr # So [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK +A838 ; Deva Gujr Guru Kthi Takr # Sc NORTH INDIC RUPEE MARK +A839 ; Deva Gujr Guru Kthi Takr # So NORTH INDIC QUANTITY MARK + +# Total code points: 10 + +# ================================================ + # Script_Extensions=Bopo Hang Hani Hira Kana Yiii 3001..3002 ; Bopo Hang Hani Hira Kana Yiii # Po [2] IDEOGRAPHIC COMMA..IDEOGRAPHIC FULL STOP diff --git a/lib/unicore/Scripts.txt b/lib/unicore/Scripts.txt index 70a670703a..2516f889d6 100644 --- a/lib/unicore/Scripts.txt +++ b/lib/unicore/Scripts.txt @@ -1,8 +1,8 @@ -# Scripts-6.0.0.txt -# Date: 2010-08-19, 00:48:47 GMT [MD] +# Scripts-6.1.0.txt +# Date: 2011-11-27, 05:10:50 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -47,7 +47,8 @@ 00A0 ; Common # Zs NO-BREAK SPACE 00A1 ; Common # Po INVERTED EXCLAMATION MARK 00A2..00A5 ; Common # Sc [4] CENT SIGN..YEN SIGN -00A6..00A7 ; Common # So [2] BROKEN BAR..SECTION SIGN +00A6 ; Common # So BROKEN BAR +00A7 ; Common # Po SECTION SIGN 00A8 ; Common # Sk DIAERESIS 00A9 ; Common # So COPYRIGHT SIGN 00AB ; Common # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK @@ -60,8 +61,7 @@ 00B2..00B3 ; Common # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE 00B4 ; Common # Sk ACUTE ACCENT 00B5 ; Common # L& MICRO SIGN -00B6 ; Common # So PILCROW SIGN -00B7 ; Common # Po MIDDLE DOT +00B6..00B7 ; Common # Po [2] PILCROW SIGN..MIDDLE DOT 00B8 ; Common # Sk CEDILLA 00B9 ; Common # No SUPERSCRIPT ONE 00BB ; Common # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK @@ -90,7 +90,6 @@ 0660..0669 ; Common # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE 06DD ; Common # Cf ARABIC END OF AYAH 0964..0965 ; Common # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA -0970 ; Common # Po DEVANAGARI ABBREVIATION SIGN 0E3F ; Common # Sc THAI CURRENCY SYMBOL BAHT 0FD5..0FD8 ; Common # So [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS 10FB ; Common # Po GEORGIAN PARAGRAPH SEPARATOR @@ -102,7 +101,8 @@ 1CE1 ; Common # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA 1CE9..1CEC ; Common # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL 1CEE..1CF1 ; Common # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA -1CF2 ; Common # Mc VEDIC SIGN ARDHAVISARGA +1CF2..1CF3 ; Common # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF5..1CF6 ; Common # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 2000..200A ; Common # Zs [11] EN QUAD..HAIR SPACE 200B ; Common # Cf ZERO WIDTH SPACE 200E..200F ; Common # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK @@ -247,9 +247,7 @@ 27C0..27C4 ; Common # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET 27C5 ; Common # Ps LEFT S-SHAPED BAG DELIMITER 27C6 ; Common # Pe RIGHT S-SHAPED BAG DELIMITER -27C7..27CA ; Common # Sm [4] OR WITH DOT INSIDE..VERTICAL BAR WITH HORIZONTAL STROKE -27CC ; Common # Sm LONG DIVISION -27CE..27E5 ; Common # Sm [24] SQUARED LOGICAL AND..WHITE SQUARE WITH RIGHTWARDS TICK +27C7..27E5 ; Common # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK 27E6 ; Common # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET 27E7 ; Common # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET 27E8 ; Common # Ps MATHEMATICAL LEFT ANGLE BRACKET @@ -329,7 +327,8 @@ 2E29 ; Common # Pe RIGHT DOUBLE PARENTHESIS 2E2A..2E2E ; Common # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK 2E2F ; Common # Lm VERTICAL TILDE -2E30..2E31 ; Common # Po [2] RING POINT..WORD SEPARATOR MIDDLE DOT +2E30..2E39 ; Common # Po [10] RING POINT..TOP HALF SECTION SIGN +2E3A..2E3B ; Common # Pd [2] TWO-EM DASH..THREE-EM DASH 2FF0..2FFB ; Common # So [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID 3000 ; Common # Zs IDEOGRAPHIC SPACE 3001..3003 ; Common # Po [3] IDEOGRAPHIC COMMA..DITTO MARK @@ -373,7 +372,9 @@ 3196..319F ; Common # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK 31C0..31E3 ; Common # So [36] CJK STROKE T..CJK STROKE Q 3220..3229 ; Common # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN -322A..3250 ; Common # So [39] PARENTHESIZED IDEOGRAPH MOON..PARTNERSHIP SIGN +322A..3247 ; Common # So [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO +3248..324F ; Common # No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE +3250 ; Common # So PARTNERSHIP SIGN 3251..325F ; Common # No [15] CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE 327F ; Common # So KOREAN STANDARD SYMBOL 3280..3289 ; Common # No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN @@ -481,8 +482,7 @@ FFE9..FFEC ; Common # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS FFED..FFEE ; Common # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE FFF9..FFFB ; Common # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER -10100..10101 ; Common # Po [2] AEGEAN WORD SEPARATOR LINE..AEGEAN WORD SEPARATOR DOT -10102 ; Common # So AEGEAN CHECK MARK +10100..10102 ; Common # Po [3] AEGEAN WORD SEPARATOR LINE..AEGEAN CHECK MARK 10107..10133 ; Common # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND 10137..1013F ; Common # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT 10190..1019B ; Common # So [12] ROMAN SEXTANS SIGN..ROMAN CENTURIAL SIGN @@ -548,7 +548,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR 1F0D1..1F0DF ; Common # So [15] PLAYING CARD ACE OF CLUBS..PLAYING CARD WHITE JOKER 1F100..1F10A ; Common # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA 1F110..1F12E ; Common # So [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ -1F130..1F169 ; Common # So [58] SQUARED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F130..1F16B ; Common # So [60] SQUARED LATIN CAPITAL LETTER A..RAISED MD SIGN 1F170..1F19A ; Common # So [43] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VS 1F1E6..1F1FF ; Common # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z 1F201..1F202 ; Common # So [2] SQUARED KATAKANA KOKO..SQUARED KATAKANA SA @@ -567,33 +567,23 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR 1F442..1F4F7 ; Common # So [182] EAR..CAMERA 1F4F9..1F4FC ; Common # So [4] VIDEO CAMERA..VIDEOCASSETTE 1F500..1F53D ; Common # So [62] TWISTED RIGHTWARDS ARROWS..DOWN-POINTING SMALL RED TRIANGLE +1F540..1F543 ; Common # So [4] CIRCLED CROSS POMMEE..NOTCHED LEFT SEMICIRCLE WITH THREE DOTS 1F550..1F567 ; Common # So [24] CLOCK FACE ONE OCLOCK..CLOCK FACE TWELVE-THIRTY -1F5FB..1F5FF ; Common # So [5] MOUNT FUJI..MOYAI -1F601..1F610 ; Common # So [16] GRINNING FACE WITH SMILING EYES..NEUTRAL FACE -1F612..1F614 ; Common # So [3] UNAMUSED FACE..PENSIVE FACE -1F616 ; Common # So CONFOUNDED FACE -1F618 ; Common # So FACE THROWING A KISS -1F61A ; Common # So KISSING FACE WITH CLOSED EYES -1F61C..1F61E ; Common # So [3] FACE WITH STUCK-OUT TONGUE AND WINKING EYE..DISAPPOINTED FACE -1F620..1F625 ; Common # So [6] ANGRY FACE..DISAPPOINTED BUT RELIEVED FACE -1F628..1F62B ; Common # So [4] FEARFUL FACE..TIRED FACE -1F62D ; Common # So LOUDLY CRYING FACE -1F630..1F633 ; Common # So [4] FACE WITH OPEN MOUTH AND COLD SWEAT..FLUSHED FACE -1F635..1F640 ; Common # So [12] DIZZY FACE..WEARY CAT FACE +1F5FB..1F640 ; Common # So [70] MOUNT FUJI..WEARY CAT FACE 1F645..1F64F ; Common # So [11] FACE WITH NO GOOD GESTURE..PERSON WITH FOLDED HANDS 1F680..1F6C5 ; Common # So [70] ROCKET..LEFT LUGGAGE 1F700..1F773 ; Common # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE E0001 ; Common # Cf LANGUAGE TAG E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG -# Total code points: 6379 +# Total code points: 6412 # ================================================ 0041..005A ; Latin # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z 0061..007A ; Latin # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z -00AA ; Latin # L& FEMININE ORDINAL INDICATOR -00BA ; Latin # L& MASCULINE ORDINAL INDICATOR +00AA ; Latin # Lo FEMININE ORDINAL INDICATOR +00BA ; Latin # Lo MASCULINE ORDINAL INDICATOR 00C0..00D6 ; Latin # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS 00D8..00F6 ; Latin # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS 00F8..01BA ; Latin # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL @@ -607,7 +597,7 @@ E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG 02E0..02E4 ; Latin # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP 1D00..1D25 ; Latin # L& [38] LATIN LETTER SMALL CAPITAL A..LATIN LETTER AIN 1D2C..1D5C ; Latin # Lm [49] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL AIN -1D62..1D65 ; Latin # L& [4] LATIN SUBSCRIPT SMALL LETTER I..LATIN SUBSCRIPT SMALL LETTER V +1D62..1D65 ; Latin # Lm [4] LATIN SUBSCRIPT SMALL LETTER I..LATIN SUBSCRIPT SMALL LETTER V 1D6B..1D77 ; Latin # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D79..1D9A ; Latin # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBE ; Latin # Lm [36] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL EZH @@ -621,22 +611,23 @@ E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG 2160..2182 ; Latin # Nl [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND 2183..2184 ; Latin # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C 2185..2188 ; Latin # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND -2C60..2C7C ; Latin # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; Latin # Lm MODIFIER LETTER CAPITAL V +2C60..2C7B ; Latin # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; Latin # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2C7F ; Latin # L& [2] LATIN CAPITAL LETTER S WITH SWASH TAIL..LATIN CAPITAL LETTER Z WITH SWASH TAIL A722..A76F ; Latin # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON A770 ; Latin # Lm MODIFIER LETTER US A771..A787 ; Latin # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A78B..A78E ; Latin # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A791 ; Latin # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER -A7A0..A7A9 ; Latin # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE +A790..A793 ; Latin # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR +A7A0..A7AA ; Latin # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK +A7F8..A7F9 ; Latin # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; Latin # L& LATIN LETTER SMALL CAPITAL TURNED M A7FB..A7FF ; Latin # Lo [5] LATIN EPIGRAPHIC LETTER REVERSED F..LATIN EPIGRAPHIC LETTER ARCHAIC M FB00..FB06 ; Latin # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FF21..FF3A ; Latin # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z -# Total code points: 1267 +# Total code points: 1272 # ================================================ @@ -656,7 +647,7 @@ FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 03F7..03FF ; Greek # L& [9] GREEK CAPITAL LETTER SHO..GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL 1D26..1D2A ; Greek # L& [5] GREEK LETTER SMALL CAPITAL GAMMA..GREEK LETTER SMALL CAPITAL PSI 1D5D..1D61 ; Greek # Lm [5] MODIFIER LETTER SMALL BETA..MODIFIER LETTER SMALL CHI -1D66..1D6A ; Greek # L& [5] GREEK SUBSCRIPT SMALL LETTER BETA..GREEK SUBSCRIPT SMALL LETTER CHI +1D66..1D6A ; Greek # Lm [5] GREEK SUBSCRIPT SMALL LETTER BETA..GREEK SUBSCRIPT SMALL LETTER CHI 1DBF ; Greek # Lm MODIFIER LETTER SMALL THETA 1F00..1F15 ; Greek # L& [22] GREEK SMALL LETTER ALPHA WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA 1F18..1F1D ; Greek # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA @@ -710,12 +701,13 @@ A66E ; Cyrillic # Lo CYRILLIC LETTER MULTIOCULAR O A66F ; Cyrillic # Mn COMBINING CYRILLIC VZMET A670..A672 ; Cyrillic # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN A673 ; Cyrillic # Po SLAVONIC ASTERISK -A67C..A67D ; Cyrillic # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A674..A67D ; Cyrillic # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK A67E ; Cyrillic # Po CYRILLIC KAVYKA A67F ; Cyrillic # Lm CYRILLIC PAYEROK A680..A697 ; Cyrillic # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE +A69F ; Cyrillic # Mn COMBINING CYRILLIC LETTER IOTIFIED E -# Total code points: 408 +# Total code points: 417 # ================================================ @@ -724,9 +716,10 @@ A680..A697 ; Cyrillic # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL 055A..055F ; Armenian # Po [6] ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION MARK 0561..0587 ; Armenian # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN 058A ; Armenian # Pd ARMENIAN HYPHEN +058F ; Armenian # Sc ARMENIAN DRAM SIGN FB13..FB17 ; Armenian # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH -# Total code points: 90 +# Total code points: 91 # ================================================ @@ -757,7 +750,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU # ================================================ -0600..0603 ; Arabic # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA +0600..0604 ; Arabic # Cf [5] ARABIC NUMBER SIGN..ARABIC SIGN SAMVAT 0606..0608 ; Arabic # Sm [3] ARABIC-INDIC CUBE ROOT..ARABIC RAY 0609..060A ; Arabic # Po [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN 060B ; Arabic # Sc AFGHANI SIGN @@ -786,6 +779,9 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU 06FD..06FE ; Arabic # So [2] ARABIC SIGN SINDHI AMPERSAND..ARABIC SIGN SINDHI POSTPOSITION MEN 06FF ; Arabic # Lo ARABIC LETTER HEH WITH INVERTED V 0750..077F ; Arabic # Lo [48] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS ABOVE +08A0 ; Arabic # Lo ARABIC LETTER BEH WITH SMALL V BELOW +08A2..08AC ; Arabic # Lo [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH +08E4..08FE ; Arabic # Mn [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT FB50..FBB1 ; Arabic # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM FBB2..FBC1 ; Arabic # Sk [16] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL SMALL TAH BELOW FBD3..FD3D ; Arabic # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM @@ -796,8 +792,42 @@ FDFC ; Arabic # Sc RIAL SIGN FE70..FE74 ; Arabic # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM 10E60..10E7E ; Arabic # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS - -# Total code points: 1051 +1EE00..1EE03 ; Arabic # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; Arabic # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; Arabic # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; Arabic # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; Arabic # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; Arabic # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; Arabic # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; Arabic # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; Arabic # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; Arabic # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; Arabic # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; Arabic # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; Arabic # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; Arabic # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; Arabic # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; Arabic # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; Arabic # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; Arabic # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; Arabic # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; Arabic # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; Arabic # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; Arabic # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; Arabic # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; Arabic # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; Arabic # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; Arabic # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; Arabic # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; Arabic # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; Arabic # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; Arabic # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; Arabic # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; Arabic # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; Arabic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1EEF0..1EEF1 ; Arabic # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL + +# Total code points: 1234 # ================================================ @@ -838,6 +868,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA 0958..0961 ; Devanagari # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL 0962..0963 ; Devanagari # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL 0966..096F ; Devanagari # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE +0970 ; Devanagari # Po DEVANAGARI ABBREVIATION SIGN 0971 ; Devanagari # Lm DEVANAGARI SIGN HIGH SPACING DOT 0972..0977 ; Devanagari # Lo [6] DEVANAGARI LETTER CANDRA A..DEVANAGARI LETTER UUE 0979..097F ; Devanagari # Lo [7] DEVANAGARI LETTER ZHA..DEVANAGARI LETTER BBA @@ -846,7 +877,7 @@ A8F2..A8F7 ; Devanagari # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVAN A8F8..A8FA ; Devanagari # Po [3] DEVANAGARI SIGN PUSHPIKA..DEVANAGARI CARET A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE -# Total code points: 150 +# Total code points: 151 # ================================================ @@ -927,9 +958,10 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE 0AE0..0AE1 ; Gujarati # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL 0AE2..0AE3 ; Gujarati # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL 0AE6..0AEF ; Gujarati # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE +0AF0 ; Gujarati # Po GUJARATI ABBREVIATION SIGN 0AF1 ; Gujarati # Sc GUJARATI RUPEE SIGN -# Total code points: 83 +# Total code points: 84 # ================================================ @@ -1119,16 +1151,18 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE 0EC6 ; Lao # Lm LAO KO LA 0EC8..0ECD ; Lao # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA 0ED0..0ED9 ; Lao # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE -0EDC..0EDD ; Lao # Lo [2] LAO HO NO..LAO HO MO +0EDC..0EDF ; Lao # Lo [4] LAO HO NO..LAO LETTER KHMU NYO -# Total code points: 65 +# Total code points: 67 # ================================================ 0F00 ; Tibetan # Lo TIBETAN SYLLABLE OM 0F01..0F03 ; Tibetan # So [3] TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA 0F04..0F12 ; Tibetan # Po [15] TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK RGYA GRAM SHAD -0F13..0F17 ; Tibetan # So [5] TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS +0F13 ; Tibetan # So TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN +0F14 ; Tibetan # Po TIBETAN MARK GTER TSHEG +0F15..0F17 ; Tibetan # So [3] TIBETAN LOGOTYPE SIGN CHAD RTAGS..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS 0F18..0F19 ; Tibetan # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS 0F1A..0F1F ; Tibetan # So [6] TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RDEL DKAR RDEL NAG 0F20..0F29 ; Tibetan # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE @@ -1212,16 +1246,21 @@ AA7B ; Myanmar # Mc MYANMAR SIGN PAO KAREN TONE # ================================================ 10A0..10C5 ; Georgian # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Georgian # L& GEORGIAN CAPITAL LETTER YN +10CD ; Georgian # L& GEORGIAN CAPITAL LETTER AEN 10D0..10FA ; Georgian # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN 10FC ; Georgian # Lm MODIFIER LETTER GEORGIAN NAR +10FD..10FF ; Georgian # Lo [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN 2D00..2D25 ; Georgian # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Georgian # L& GEORGIAN SMALL LETTER YN +2D2D ; Georgian # L& GEORGIAN SMALL LETTER AEN -# Total code points: 120 +# Total code points: 127 # ================================================ 1100..11FF ; Hangul # Lo [256] HANGUL CHOSEONG KIYEOK..HANGUL JONGSEONG SSANGNIEUN -302E..302F ; Hangul # Mn [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK +302E..302F ; Hangul # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK 3131..318E ; Hangul # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE 3200..321E ; Hangul # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU 3260..327E ; Hangul # So [31] CIRCLED HANGUL KIYEOK..CIRCLED HANGUL IEUNG U @@ -1256,8 +1295,7 @@ FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL L 1312..1315 ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE 1318..135A ; Ethiopic # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA 135D..135F ; Ethiopic # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK -1360 ; Ethiopic # So ETHIOPIC SECTION MARK -1361..1368 ; Ethiopic # Po [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR +1360..1368 ; Ethiopic # Po [9] ETHIOPIC SECTION MARK..ETHIOPIC PARAGRAPH SEPARATOR 1369..137C ; Ethiopic # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND 1380..138F ; Ethiopic # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE 1390..1399 ; Ethiopic # So [10] ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT @@ -1313,7 +1351,7 @@ AB28..AB2E ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO # ================================================ 1780..17B3 ; Khmer # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU -17B4..17B5 ; Khmer # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B4..17B5 ; Khmer # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 17B6 ; Khmer # Mc KHMER VOWEL SIGN AA 17B7..17BD ; Khmer # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA 17BE..17C5 ; Khmer # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU @@ -1393,16 +1431,15 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 3038..303A ; Han # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY 303B ; Han # Lm VERTICAL IDEOGRAPHIC ITERATION MARK 3400..4DB5 ; Han # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 -4E00..9FCB ; Han # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB -F900..FA2D ; Han # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; Han # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +4E00..9FCC ; Han # Lo [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC +F900..FA6D ; Han # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 20000..2A6D6 ; Han # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 2A700..2B734 ; Han # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 2B740..2B81D ; Han # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2F800..2FA1D ; Han # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 75960 +# Total code points: 75963 # ================================================ @@ -1447,6 +1484,7 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE 1CD4..1CE0 ; Inherited # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA 1CE2..1CE8 ; Inherited # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL 1CED ; Inherited # Mn VEDIC SIGN TIRYAK +1CF4 ; Inherited # Mn VEDIC TONE CANDRA ABOVE 1DC0..1DE6 ; Inherited # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z 1DFC..1DFF ; Inherited # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW 200C..200D ; Inherited # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER @@ -1466,7 +1504,7 @@ FE20..FE26 ; Inherited # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CON 1D1AA..1D1AD ; Inherited # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 523 +# Total code points: 524 # ================================================ @@ -1587,11 +1625,12 @@ E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-2 2CE5..2CEA ; Coptic # So [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA 2CEB..2CEE ; Coptic # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA 2CEF..2CF1 ; Coptic # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2CF2..2CF3 ; Coptic # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2CF9..2CFC ; Coptic # Po [4] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN VERSE DIVIDER 2CFD ; Coptic # No COPTIC FRACTION ONE HALF 2CFE..2CFF ; Coptic # Po [2] COPTIC FULL STOP..COPTIC MORPHOLOGICAL DIVIDER -# Total code points: 135 +# Total code points: 137 # ================================================ @@ -1614,12 +1653,12 @@ E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-2 # ================================================ -2D30..2D65 ; Tifinagh # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D30..2D67 ; Tifinagh # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO 2D6F ; Tifinagh # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D70 ; Tifinagh # Po TIFINAGH SEPARATOR MARK 2D7F ; Tifinagh # Mn TIFINAGH CONSONANT JOINER -# Total code points: 57 +# Total code points: 59 # ================================================ @@ -1729,10 +1768,14 @@ A874..A877 ; Phags_Pa # Po [4] PHAGS-PA SINGLE HEAD MARK..PHAGS-PA MARK DOU 1BA6..1BA7 ; Sundanese # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG 1BA8..1BA9 ; Sundanese # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG 1BAA ; Sundanese # Mc SUNDANESE SIGN PAMAAEH +1BAB ; Sundanese # Mn SUNDANESE SIGN VIRAMA +1BAC..1BAD ; Sundanese # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA 1BAE..1BAF ; Sundanese # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA 1BB0..1BB9 ; Sundanese # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE +1BBA..1BBF ; Sundanese # Lo [6] SUNDANESE AVAGRAHA..SUNDANESE LETTER FINAL M +1CC0..1CC7 ; Sundanese # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA -# Total code points: 55 +# Total code points: 72 # ================================================ @@ -1940,6 +1983,15 @@ A9DE..A9DF ; Javanese # Po [2] JAVANESE PADA TIRTA TUMETES..JAVANESE PADA I # ================================================ +AAE0..AAEA ; Meetei_Mayek # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAEB ; Meetei_Mayek # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; Meetei_Mayek # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; Meetei_Mayek # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF0..AAF1 ; Meetei_Mayek # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM +AAF2 ; Meetei_Mayek # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; Meetei_Mayek # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF5 ; Meetei_Mayek # Mc MEETEI MAYEK VOWEL SIGN VISARGA +AAF6 ; Meetei_Mayek # Mn MEETEI MAYEK VIRAMA ABC0..ABE2 ; Meetei_Mayek # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM ABE3..ABE4 ; Meetei_Mayek # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP ABE5 ; Meetei_Mayek # Mn MEETEI MAYEK VOWEL SIGN ANAP @@ -1951,7 +2003,7 @@ ABEC ; Meetei_Mayek # Mc MEETEI MAYEK LUM IYEK ABED ; Meetei_Mayek # Mn MEETEI MAYEK APUN IYEK ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE -# Total code points: 56 +# Total code points: 79 # ================================================ @@ -2040,4 +2092,74 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI # Total code points: 29 +# ================================================ + +11100..11102 ; Chakma # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11103..11126 ; Chakma # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11127..1112B ; Chakma # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; Chakma # Mc CHAKMA VOWEL SIGN E +1112D..11134 ; Chakma # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11136..1113F ; Chakma # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +11140..11143 ; Chakma # Po [4] CHAKMA SECTION MARK..CHAKMA QUESTION MARK + +# Total code points: 67 + +# ================================================ + +109A0..109B7 ; Meroitic_Cursive # Lo [24] MEROITIC CURSIVE LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; Meroitic_Cursive # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN + +# Total code points: 26 + +# ================================================ + +10980..1099F ; Meroitic_Hieroglyphs # Lo [32] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC HIEROGLYPHIC SYMBOL VIDJ-2 + +# Total code points: 32 + +# ================================================ + +16F00..16F44 ; Miao # Lo [69] MIAO LETTER PA..MIAO LETTER HHA +16F50 ; Miao # Lo MIAO LETTER NASALIZATION +16F51..16F7E ; Miao # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG +16F8F..16F92 ; Miao # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16F93..16F9F ; Miao # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 + +# Total code points: 133 + +# ================================================ + +11180..11181 ; Sharada # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; Sharada # Mc SHARADA SIGN VISARGA +11183..111B2 ; Sharada # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111B3..111B5 ; Sharada # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; Sharada # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF..111C0 ; Sharada # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +111C1..111C4 ; Sharada # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111C5..111C8 ; Sharada # Po [4] SHARADA DANDA..SHARADA SEPARATOR +111D0..111D9 ; Sharada # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE + +# Total code points: 83 + +# ================================================ + +110D0..110E8 ; Sora_Sompeng # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +110F0..110F9 ; Sora_Sompeng # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE + +# Total code points: 35 + +# ================================================ + +11680..116AA ; Takri # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116AB ; Takri # Mn TAKRI SIGN ANUSVARA +116AC ; Takri # Mc TAKRI SIGN VISARGA +116AD ; Takri # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; Takri # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; Takri # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B6 ; Takri # Mc TAKRI SIGN VIRAMA +116B7 ; Takri # Mn TAKRI SIGN NUKTA +116C0..116C9 ; Takri # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE + +# Total code points: 66 + # EOF diff --git a/lib/unicore/SpecialCasing.txt b/lib/unicore/SpecialCasing.txt index 62a0ec9f3a..d650b6d9dc 100644 --- a/lib/unicore/SpecialCasing.txt +++ b/lib/unicore/SpecialCasing.txt @@ -1,8 +1,8 @@ -# SpecialCasing-6.0.0.txt -# Date: 2010-05-18, 00:49:39 GMT [MD] +# SpecialCasing-6.1.0.txt +# Date: 2011-11-27, 05:10:51 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # @@ -47,7 +47,9 @@ # * Additional contexts # * Additional fields # ================================================================================ -# @missing 0000..10FFFF; <slc>; <stc>; <suc> + +# @missing: 0000..10FFFF; <slc>; <stc>; <suc>; + # ================================================================================ # Unconditional mappings # ================================================================================ diff --git a/lib/unicore/StandardizedVariants.txt b/lib/unicore/StandardizedVariants.txt index a55af629b4..331b831e32 100644 --- a/lib/unicore/StandardizedVariants.txt +++ b/lib/unicore/StandardizedVariants.txt @@ -1,13 +1,13 @@ -# StandardizedVariants-6.0.0.txt -# Date: 2010-05-19, 11:22:00 PDT [KW] +# StandardizedVariants-6.1.0.txt +# Date: 2011-11-10, 20:28:00 GMT [KW, LI] # -# Specification of the variant sequences that are defined in the +# Specification of the variation sequences that are defined in the # Unicode Standard. # # This file is a normative contributory data file in the # Unicode Character Database. # -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # # Standardized variation sequences are defined in this file. @@ -21,9 +21,9 @@ # # For more information on standardized variation sequences, # see Section 16.4, Variation Selectors, -# in The Unicode Standard, Version 6.0. +# in The Unicode Standard, Version 6.1. # -# For more information on the Ideographic Variation Database +# For more information on the Ideographic Variation Database, # see http://www.unicode.org/ivd/ # # Format: @@ -44,7 +44,7 @@ 2273 FE00; following the slant of the lower leg; # GREATER-THAN OR EQUIVALENT TO # The following two entries were originally defined for Unicode 3.2 # but were determined to be in error and were removed from the list -# of standardized variation sequences. The entries are left commented +# of standardized variation sequences. The entries are left commented out # in the file for the historical record of changes made to the data. #2278 FE00; with vertical stroke; # NEITHER LESS-THAN NOR GREATER-THAN #2279 FE00; with vertical stroke; # NEITHER GREATER-THAN NOR LESS-THAN @@ -141,3 +141,225 @@ A868 FE00; phags-pa letter reversed shaping subjoined ya; # PHAGS-PA SUBJOINED L 1887 180D; fourth form; final # MONGOLIAN LETTER ALI GALI A 1888 180B; second form; final # MONGOLIAN LETTER ALI GALI I 188A 180B; second form; initial medial # MONGOLIAN LETTER ALI GALI NGA + +# Emoji variation sequences for use as part of keycap symbols + +0023 FE0E; text style; # NUMBER SIGN +0023 FE0F; emoji style; # NUMBER SIGN +0030 FE0E; text style; # DIGIT ZERO +0030 FE0F; emoji style; # DIGIT ZERO +0031 FE0E; text style; # DIGIT ONE +0031 FE0F; emoji style; # DIGIT ONE +0032 FE0E; text style; # DIGIT TWO +0032 FE0F; emoji style; # DIGIT TWO +0033 FE0E; text style; # DIGIT THREE +0033 FE0F; emoji style; # DIGIT THREE +0034 FE0E; text style; # DIGIT FOUR +0034 FE0F; emoji style; # DIGIT FOUR +0035 FE0E; text style; # DIGIT FIVE +0035 FE0F; emoji style; # DIGIT FIVE +0036 FE0E; text style; # DIGIT SIX +0036 FE0F; emoji style; # DIGIT SIX +0037 FE0E; text style; # DIGIT SEVEN +0037 FE0F; emoji style; # DIGIT SEVEN +0038 FE0E; text style; # DIGIT EIGHT +0038 FE0F; emoji style; # DIGIT EIGHT +0039 FE0E; text style; # DIGIT NINE +0039 FE0F; emoji style; # DIGIT NINE + +# Emoji variation sequences + +203C FE0E; text style; # DOUBLE EXCLAMATION MARK +203C FE0F; emoji style; # DOUBLE EXCLAMATION MARK +2049 FE0E; text style; # EXCLAMATION QUESTION MARK +2049 FE0F; emoji style; # EXCLAMATION QUESTION MARK +2139 FE0E; text style; # INFORMATION SOURCE +2139 FE0F; emoji style; # INFORMATION SOURCE +2194 FE0E; text style; # LEFT RIGHT ARROW +2194 FE0F; emoji style; # LEFT RIGHT ARROW +2195 FE0E; text style; # UP DOWN ARROW +2195 FE0F; emoji style; # UP DOWN ARROW +2196 FE0E; text style; # NORTH WEST ARROW +2196 FE0F; emoji style; # NORTH WEST ARROW +2197 FE0E; text style; # NORTH EAST ARROW +2197 FE0F; emoji style; # NORTH EAST ARROW +2198 FE0E; text style; # SOUTH EAST ARROW +2198 FE0F; emoji style; # SOUTH EAST ARROW +2199 FE0E; text style; # SOUTH WEST ARROW +2199 FE0F; emoji style; # SOUTH WEST ARROW +21A9 FE0E; text style; # LEFTWARDS ARROW WITH HOOK +21A9 FE0F; emoji style; # LEFTWARDS ARROW WITH HOOK +21AA FE0E; text style; # RIGHTWARDS ARROW WITH HOOK +21AA FE0F; emoji style; # RIGHTWARDS ARROW WITH HOOK +231A FE0E; text style; # WATCH +231A FE0F; emoji style; # WATCH +231B FE0E; text style; # HOURGLASS +231B FE0F; emoji style; # HOURGLASS +24C2 FE0E; text style; # CIRCLED LATIN CAPITAL LETTER M +24C2 FE0F; emoji style; # CIRCLED LATIN CAPITAL LETTER M +25AA FE0E; text style; # BLACK SMALL SQUARE +25AA FE0F; emoji style; # BLACK SMALL SQUARE +25AB FE0E; text style; # WHITE SMALL SQUARE +25AB FE0F; emoji style; # WHITE SMALL SQUARE +25B6 FE0E; text style; # BLACK RIGHT-POINTING TRIANGLE +25B6 FE0F; emoji style; # BLACK RIGHT-POINTING TRIANGLE +25C0 FE0E; text style; # BLACK LEFT-POINTING TRIANGLE +25C0 FE0F; emoji style; # BLACK LEFT-POINTING TRIANGLE +25FB FE0E; text style; # WHITE MEDIUM SQUARE +25FB FE0F; emoji style; # WHITE MEDIUM SQUARE +25FC FE0E; text style; # BLACK MEDIUM SQUARE +25FC FE0F; emoji style; # BLACK MEDIUM SQUARE +25FD FE0E; text style; # WHITE MEDIUM SMALL SQUARE +25FD FE0F; emoji style; # WHITE MEDIUM SMALL SQUARE +25FE FE0E; text style; # BLACK MEDIUM SMALL SQUARE +25FE FE0F; emoji style; # BLACK MEDIUM SMALL SQUARE +2600 FE0E; text style; # BLACK SUN WITH RAYS +2600 FE0F; emoji style; # BLACK SUN WITH RAYS +2601 FE0E; text style; # CLOUD +2601 FE0F; emoji style; # CLOUD +260E FE0E; text style; # BLACK TELEPHONE +260E FE0F; emoji style; # BLACK TELEPHONE +2611 FE0E; text style; # BALLOT BOX WITH CHECK +2611 FE0F; emoji style; # BALLOT BOX WITH CHECK +2614 FE0E; text style; # UMBRELLA WITH RAIN DROPS +2614 FE0F; emoji style; # UMBRELLA WITH RAIN DROPS +2615 FE0E; text style; # HOT BEVERAGE +2615 FE0F; emoji style; # HOT BEVERAGE +261D FE0E; text style; # WHITE UP POINTING INDEX +261D FE0F; emoji style; # WHITE UP POINTING INDEX +263A FE0E; text style; # WHITE SMILING FACE +263A FE0F; emoji style; # WHITE SMILING FACE +2648 FE0E; text style; # ARIES +2648 FE0F; emoji style; # ARIES +2649 FE0E; text style; # TAURUS +2649 FE0F; emoji style; # TAURUS +264A FE0E; text style; # GEMINI +264A FE0F; emoji style; # GEMINI +264B FE0E; text style; # CANCER +264B FE0F; emoji style; # CANCER +264C FE0E; text style; # LEO +264C FE0F; emoji style; # LEO +264D FE0E; text style; # VIRGO +264D FE0F; emoji style; # VIRGO +264E FE0E; text style; # LIBRA +264E FE0F; emoji style; # LIBRA +264F FE0E; text style; # SCORPIUS +264F FE0F; emoji style; # SCORPIUS +2650 FE0E; text style; # SAGITTARIUS +2650 FE0F; emoji style; # SAGITTARIUS +2651 FE0E; text style; # CAPRICORN +2651 FE0F; emoji style; # CAPRICORN +2652 FE0E; text style; # AQUARIUS +2652 FE0F; emoji style; # AQUARIUS +2653 FE0E; text style; # PISCES +2653 FE0F; emoji style; # PISCES +2660 FE0E; text style; # BLACK SPADE SUIT +2660 FE0F; emoji style; # BLACK SPADE SUIT +2663 FE0E; text style; # BLACK CLUB SUIT +2663 FE0F; emoji style; # BLACK CLUB SUIT +2665 FE0E; text style; # BLACK HEART SUIT +2665 FE0F; emoji style; # BLACK HEART SUIT +2666 FE0E; text style; # BLACK DIAMOND SUIT +2666 FE0F; emoji style; # BLACK DIAMOND SUIT +2668 FE0E; text style; # HOT SPRINGS +2668 FE0F; emoji style; # HOT SPRINGS +267B FE0E; text style; # BLACK UNIVERSAL RECYCLING SYMBOL +267B FE0F; emoji style; # BLACK UNIVERSAL RECYCLING SYMBOL +267F FE0E; text style; # WHEELCHAIR SYMBOL +267F FE0F; emoji style; # WHEELCHAIR SYMBOL +2693 FE0E; text style; # ANCHOR +2693 FE0F; emoji style; # ANCHOR +26A0 FE0E; text style; # WARNING SIGN +26A0 FE0F; emoji style; # WARNING SIGN +26A1 FE0E; text style; # HIGH VOLTAGE SIGN +26A1 FE0F; emoji style; # HIGH VOLTAGE SIGN +26AA FE0E; text style; # MEDIUM WHITE CIRCLE +26AA FE0F; emoji style; # MEDIUM WHITE CIRCLE +26AB FE0E; text style; # MEDIUM BLACK CIRCLE +26AB FE0F; emoji style; # MEDIUM BLACK CIRCLE +26BD FE0E; text style; # SOCCER BALL +26BD FE0F; emoji style; # SOCCER BALL +26BE FE0E; text style; # BASEBALL +26BE FE0F; emoji style; # BASEBALL +26C4 FE0E; text style; # SNOWMAN WITHOUT SNOW +26C4 FE0F; emoji style; # SNOWMAN WITHOUT SNOW +26C5 FE0E; text style; # SUN BEHIND CLOUD +26C5 FE0F; emoji style; # SUN BEHIND CLOUD +26D4 FE0E; text style; # NO ENTRY +26D4 FE0F; emoji style; # NO ENTRY +26EA FE0E; text style; # CHURCH +26EA FE0F; emoji style; # CHURCH +26F2 FE0E; text style; # FOUNTAIN +26F2 FE0F; emoji style; # FOUNTAIN +26F3 FE0E; text style; # FLAG IN HOLE +26F3 FE0F; emoji style; # FLAG IN HOLE +26F5 FE0E; text style; # SAILBOAT +26F5 FE0F; emoji style; # SAILBOAT +26FA FE0E; text style; # TENT +26FA FE0F; emoji style; # TENT +26FD FE0E; text style; # FUEL PUMP +26FD FE0F; emoji style; # FUEL PUMP +2702 FE0E; text style; # BLACK SCISSORS +2702 FE0F; emoji style; # BLACK SCISSORS +2708 FE0E; text style; # AIRPLANE +2708 FE0F; emoji style; # AIRPLANE +2709 FE0E; text style; # ENVELOPE +2709 FE0F; emoji style; # ENVELOPE +270C FE0E; text style; # VICTORY HAND +270C FE0F; emoji style; # VICTORY HAND +270F FE0E; text style; # PENCIL +270F FE0F; emoji style; # PENCIL +2712 FE0E; text style; # BLACK NIB +2712 FE0F; emoji style; # BLACK NIB +2714 FE0E; text style; # HEAVY CHECK MARK +2714 FE0F; emoji style; # HEAVY CHECK MARK +2716 FE0E; text style; # HEAVY MULTIPLICATION X +2716 FE0F; emoji style; # HEAVY MULTIPLICATION X +2733 FE0E; text style; # EIGHT SPOKED ASTERISK +2733 FE0F; emoji style; # EIGHT SPOKED ASTERISK +2734 FE0E; text style; # EIGHT POINTED BLACK STAR +2734 FE0F; emoji style; # EIGHT POINTED BLACK STAR +2744 FE0E; text style; # SNOWFLAKE +2744 FE0F; emoji style; # SNOWFLAKE +2747 FE0E; text style; # SPARKLE +2747 FE0F; emoji style; # SPARKLE +2757 FE0E; text style; # HEAVY EXCLAMATION MARK SYMBOL +2757 FE0F; emoji style; # HEAVY EXCLAMATION MARK SYMBOL +2764 FE0E; text style; # HEAVY BLACK HEART +2764 FE0F; emoji style; # HEAVY BLACK HEART +27A1 FE0E; text style; # BLACK RIGHTWARDS ARROW +27A1 FE0F; emoji style; # BLACK RIGHTWARDS ARROW +2934 FE0E; text style; # ARROW POINTING RIGHTWARDS THEN CURVING UPWARDS +2934 FE0F; emoji style; # ARROW POINTING RIGHTWARDS THEN CURVING UPWARDS +2935 FE0E; text style; # ARROW POINTING RIGHTWARDS THEN CURVING DOWNWARDS +2935 FE0F; emoji style; # ARROW POINTING RIGHTWARDS THEN CURVING DOWNWARDS +2B05 FE0E; text style; # LEFTWARDS BLACK ARROW +2B05 FE0F; emoji style; # LEFTWARDS BLACK ARROW +2B06 FE0E; text style; # UPWARDS BLACK ARROW +2B06 FE0F; emoji style; # UPWARDS BLACK ARROW +2B07 FE0E; text style; # DOWNWARDS BLACK ARROW +2B07 FE0F; emoji style; # DOWNWARDS BLACK ARROW +2B1B FE0E; text style; # BLACK LARGE SQUARE +2B1B FE0F; emoji style; # BLACK LARGE SQUARE +2B1C FE0E; text style; # WHITE LARGE SQUARE +2B1C FE0F; emoji style; # WHITE LARGE SQUARE +2B50 FE0E; text style; # WHITE MEDIUM STAR +2B50 FE0F; emoji style; # WHITE MEDIUM STAR +2B55 FE0E; text style; # HEAVY LARGE CIRCLE +2B55 FE0F; emoji style; # HEAVY LARGE CIRCLE +303D FE0E; text style; # PART ALTERNATION MARK +303D FE0F; emoji style; # PART ALTERNATION MARK +3297 FE0E; text style; # CIRCLED IDEOGRAPH CONGRATULATION +3297 FE0F; emoji style; # CIRCLED IDEOGRAPH CONGRATULATION +3299 FE0E; text style; # CIRCLED IDEOGRAPH SECRET +3299 FE0F; emoji style; # CIRCLED IDEOGRAPH SECRET +1F004 FE0E; text style; # MAHJONG TILE RED DRAGON +1F004 FE0F; emoji style; # MAHJONG TILE RED DRAGON +1F17F FE0E; text style; # NEGATIVE SQUARED LATIN CAPITAL LETTER P +1F17F FE0F; emoji style; # NEGATIVE SQUARED LATIN CAPITAL LETTER P +1F21A FE0E; text style; # SQUARED CJK UNIFIED IDEOGRAPH-7121 +1F21A FE0F; emoji style; # SQUARED CJK UNIFIED IDEOGRAPH-7121 +1F22F FE0E; text style; # SQUARED CJK UNIFIED IDEOGRAPH-6307 +1F22F FE0F; emoji style; # SQUARED CJK UNIFIED IDEOGRAPH-6307 + +# EOF diff --git a/lib/unicore/UnicodeData.txt b/lib/unicore/UnicodeData.txt index 8d7222b137..9f204050c6 100644 --- a/lib/unicore/UnicodeData.txt +++ b/lib/unicore/UnicodeData.txt @@ -165,10 +165,10 @@ 00A4;CURRENCY SIGN;Sc;0;ET;;;;;N;;;;; 00A5;YEN SIGN;Sc;0;ET;;;;;N;;;;; 00A6;BROKEN BAR;So;0;ON;;;;;N;BROKEN VERTICAL BAR;;;; -00A7;SECTION SIGN;So;0;ON;;;;;N;;;;; +00A7;SECTION SIGN;Po;0;ON;;;;;N;;;;; 00A8;DIAERESIS;Sk;0;ON;<compat> 0020 0308;;;;N;SPACING DIAERESIS;;;; 00A9;COPYRIGHT SIGN;So;0;ON;;;;;N;;;;; -00AA;FEMININE ORDINAL INDICATOR;Ll;0;L;<super> 0061;;;;N;;;;; +00AA;FEMININE ORDINAL INDICATOR;Lo;0;L;<super> 0061;;;;N;;;;; 00AB;LEFT-POINTING DOUBLE ANGLE QUOTATION MARK;Pi;0;ON;;;;;Y;LEFT POINTING GUILLEMET;;;; 00AC;NOT SIGN;Sm;0;ON;;;;;N;;;;; 00AD;SOFT HYPHEN;Cf;0;BN;;;;;N;;;;; @@ -180,11 +180,11 @@ 00B3;SUPERSCRIPT THREE;No;0;EN;<super> 0033;;3;3;N;SUPERSCRIPT DIGIT THREE;;;; 00B4;ACUTE ACCENT;Sk;0;ON;<compat> 0020 0301;;;;N;SPACING ACUTE;;;; 00B5;MICRO SIGN;Ll;0;L;<compat> 03BC;;;;N;;;039C;;039C -00B6;PILCROW SIGN;So;0;ON;;;;;N;PARAGRAPH SIGN;;;; +00B6;PILCROW SIGN;Po;0;ON;;;;;N;PARAGRAPH SIGN;;;; 00B7;MIDDLE DOT;Po;0;ON;;;;;N;;;;; 00B8;CEDILLA;Sk;0;ON;<compat> 0020 0327;;;;N;SPACING CEDILLA;;;; 00B9;SUPERSCRIPT ONE;No;0;EN;<super> 0031;;1;1;N;SUPERSCRIPT DIGIT ONE;;;; -00BA;MASCULINE ORDINAL INDICATOR;Ll;0;L;<super> 006F;;;;N;;;;; +00BA;MASCULINE ORDINAL INDICATOR;Lo;0;L;<super> 006F;;;;N;;;;; 00BB;RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK;Pf;0;ON;;;;;Y;RIGHT POINTING GUILLEMET;;;; 00BC;VULGAR FRACTION ONE QUARTER;No;0;ON;<fraction> 0031 2044 0034;;;1/4;N;FRACTION ONE QUARTER;;;; 00BD;VULGAR FRACTION ONE HALF;No;0;ON;<fraction> 0031 2044 0032;;;1/2;N;FRACTION ONE HALF;;;; @@ -612,7 +612,7 @@ 0263;LATIN SMALL LETTER GAMMA;Ll;0;L;;;;;N;;;0194;;0194 0264;LATIN SMALL LETTER RAMS HORN;Ll;0;L;;;;;N;LATIN SMALL LETTER BABY GAMMA;;;; 0265;LATIN SMALL LETTER TURNED H;Ll;0;L;;;;;N;;;A78D;;A78D -0266;LATIN SMALL LETTER H WITH HOOK;Ll;0;L;;;;;N;LATIN SMALL LETTER H HOOK;;;; +0266;LATIN SMALL LETTER H WITH HOOK;Ll;0;L;;;;;N;LATIN SMALL LETTER H HOOK;;A7AA;;A7AA 0267;LATIN SMALL LETTER HENG WITH HOOK;Ll;0;L;;;;;N;LATIN SMALL LETTER HENG HOOK;;;; 0268;LATIN SMALL LETTER I WITH STROKE;Ll;0;L;;;;;N;LATIN SMALL LETTER BARRED I;;0197;;0197 0269;LATIN SMALL LETTER IOTA;Ll;0;L;;;;;N;;;0196;;0196 @@ -1394,6 +1394,7 @@ 0587;ARMENIAN SMALL LIGATURE ECH YIWN;Ll;0;L;<compat> 0565 0582;;;;N;;;;; 0589;ARMENIAN FULL STOP;Po;0;L;;;;;N;ARMENIAN PERIOD;;;; 058A;ARMENIAN HYPHEN;Pd;0;ON;;;;;N;;;;; +058F;ARMENIAN DRAM SIGN;Sc;0;ET;;;;;N;;;;; 0591;HEBREW ACCENT ETNAHTA;Mn;220;NSM;;;;;N;;;;; 0592;HEBREW ACCENT SEGOL;Mn;230;NSM;;;;;N;;;;; 0593;HEBREW ACCENT SHALSHELET;Mn;230;NSM;;;;;N;;;;; @@ -1485,6 +1486,7 @@ 0601;ARABIC SIGN SANAH;Cf;0;AN;;;;;N;;;;; 0602;ARABIC FOOTNOTE MARKER;Cf;0;AN;;;;;N;;;;; 0603;ARABIC SIGN SAFHA;Cf;0;AN;;;;;N;;;;; +0604;ARABIC SIGN SAMVAT;Cf;0;AN;;;;;N;;;;; 0606;ARABIC-INDIC CUBE ROOT;Sm;0;ON;;;;;N;;;;; 0607;ARABIC-INDIC FOURTH ROOT;Sm;0;ON;;;;;N;;;;; 0608;ARABIC RAY;Sm;0;AL;;;;;N;;;;; @@ -1747,7 +1749,7 @@ 070B;SYRIAC HARKLEAN OBELUS;Po;0;AL;;;;;N;;;;; 070C;SYRIAC HARKLEAN METOBELUS;Po;0;AL;;;;;N;;;;; 070D;SYRIAC HARKLEAN ASTERISCUS;Po;0;AL;;;;;N;;;;; -070F;SYRIAC ABBREVIATION MARK;Cf;0;AN;;;;;N;;;;; +070F;SYRIAC ABBREVIATION MARK;Cf;0;AL;;;;;N;;;;; 0710;SYRIAC LETTER ALAPH;Lo;0;AL;;;;;N;;;;; 0711;SYRIAC LETTER SUPERSCRIPT ALAPH;Mn;36;NSM;;;;;N;;;;; 0712;SYRIAC LETTER BETH;Lo;0;AL;;;;;N;;;;; @@ -2057,6 +2059,45 @@ 085A;MANDAIC VOCALIZATION MARK;Mn;220;NSM;;;;;N;;;;; 085B;MANDAIC GEMINATION MARK;Mn;220;NSM;;;;;N;;;;; 085E;MANDAIC PUNCTUATION;Po;0;R;;;;;N;;;;; +08A0;ARABIC LETTER BEH WITH SMALL V BELOW;Lo;0;AL;;;;;N;;;;; +08A2;ARABIC LETTER JEEM WITH TWO DOTS ABOVE;Lo;0;AL;;;;;N;;;;; +08A3;ARABIC LETTER TAH WITH TWO DOTS ABOVE;Lo;0;AL;;;;;N;;;;; +08A4;ARABIC LETTER FEH WITH DOT BELOW AND THREE DOTS ABOVE;Lo;0;AL;;;;;N;;;;; +08A5;ARABIC LETTER QAF WITH DOT BELOW;Lo;0;AL;;;;;N;;;;; +08A6;ARABIC LETTER LAM WITH DOUBLE BAR;Lo;0;AL;;;;;N;;;;; +08A7;ARABIC LETTER MEEM WITH THREE DOTS ABOVE;Lo;0;AL;;;;;N;;;;; +08A8;ARABIC LETTER YEH WITH TWO DOTS BELOW AND HAMZA ABOVE;Lo;0;AL;;;;;N;;;;; +08A9;ARABIC LETTER YEH WITH TWO DOTS BELOW AND DOT ABOVE;Lo;0;AL;;;;;N;;;;; +08AA;ARABIC LETTER REH WITH LOOP;Lo;0;AL;;;;;N;;;;; +08AB;ARABIC LETTER WAW WITH DOT WITHIN;Lo;0;AL;;;;;N;;;;; +08AC;ARABIC LETTER ROHINGYA YEH;Lo;0;AL;;;;;N;;;;; +08E4;ARABIC CURLY FATHA;Mn;230;NSM;;;;;N;;;;; +08E5;ARABIC CURLY DAMMA;Mn;230;NSM;;;;;N;;;;; +08E6;ARABIC CURLY KASRA;Mn;220;NSM;;;;;N;;;;; +08E7;ARABIC CURLY FATHATAN;Mn;230;NSM;;;;;N;;;;; +08E8;ARABIC CURLY DAMMATAN;Mn;230;NSM;;;;;N;;;;; +08E9;ARABIC CURLY KASRATAN;Mn;220;NSM;;;;;N;;;;; +08EA;ARABIC TONE ONE DOT ABOVE;Mn;230;NSM;;;;;N;;;;; +08EB;ARABIC TONE TWO DOTS ABOVE;Mn;230;NSM;;;;;N;;;;; +08EC;ARABIC TONE LOOP ABOVE;Mn;230;NSM;;;;;N;;;;; +08ED;ARABIC TONE ONE DOT BELOW;Mn;220;NSM;;;;;N;;;;; +08EE;ARABIC TONE TWO DOTS BELOW;Mn;220;NSM;;;;;N;;;;; +08EF;ARABIC TONE LOOP BELOW;Mn;220;NSM;;;;;N;;;;; +08F0;ARABIC OPEN FATHATAN;Mn;27;NSM;;;;;N;;;;; +08F1;ARABIC OPEN DAMMATAN;Mn;28;NSM;;;;;N;;;;; +08F2;ARABIC OPEN KASRATAN;Mn;29;NSM;;;;;N;;;;; +08F3;ARABIC SMALL HIGH WAW;Mn;230;NSM;;;;;N;;;;; +08F4;ARABIC FATHA WITH RING;Mn;230;NSM;;;;;N;;;;; +08F5;ARABIC FATHA WITH DOT ABOVE;Mn;230;NSM;;;;;N;;;;; +08F6;ARABIC KASRA WITH DOT BELOW;Mn;220;NSM;;;;;N;;;;; +08F7;ARABIC LEFT ARROWHEAD ABOVE;Mn;230;NSM;;;;;N;;;;; +08F8;ARABIC RIGHT ARROWHEAD ABOVE;Mn;230;NSM;;;;;N;;;;; +08F9;ARABIC LEFT ARROWHEAD BELOW;Mn;220;NSM;;;;;N;;;;; +08FA;ARABIC RIGHT ARROWHEAD BELOW;Mn;220;NSM;;;;;N;;;;; +08FB;ARABIC DOUBLE RIGHT ARROWHEAD ABOVE;Mn;230;NSM;;;;;N;;;;; +08FC;ARABIC DOUBLE RIGHT ARROWHEAD ABOVE WITH DOT;Mn;230;NSM;;;;;N;;;;; +08FD;ARABIC RIGHT ARROWHEAD ABOVE WITH DOT;Mn;230;NSM;;;;;N;;;;; +08FE;ARABIC DAMMA WITH DOT;Mn;230;NSM;;;;;N;;;;; 0900;DEVANAGARI SIGN INVERTED CANDRABINDU;Mn;0;NSM;;;;;N;;;;; 0901;DEVANAGARI SIGN CANDRABINDU;Mn;0;NSM;;;;;N;;;;; 0902;DEVANAGARI SIGN ANUSVARA;Mn;0;NSM;;;;;N;;;;; @@ -2437,6 +2478,7 @@ 0AED;GUJARATI DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;; 0AEE;GUJARATI DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;; 0AEF;GUJARATI DIGIT NINE;Nd;0;L;;9;9;9;N;;;;; +0AF0;GUJARATI ABBREVIATION SIGN;Po;0;L;;;;;N;;;;; 0AF1;GUJARATI RUPEE SIGN;Sc;0;ET;;;;;N;;;;; 0B01;ORIYA SIGN CANDRABINDU;Mn;0;NSM;;;;;N;;;;; 0B02;ORIYA SIGN ANUSVARA;Mc;0;L;;;;;N;;;;; @@ -3109,6 +3151,8 @@ 0ED9;LAO DIGIT NINE;Nd;0;L;;9;9;9;N;;;;; 0EDC;LAO HO NO;Lo;0;L;<compat> 0EAB 0E99;;;;N;;;;; 0EDD;LAO HO MO;Lo;0;L;<compat> 0EAB 0EA1;;;;N;;;;; +0EDE;LAO LETTER KHMU GO;Lo;0;L;;;;;N;;;;; +0EDF;LAO LETTER KHMU NYO;Lo;0;L;;;;;N;;;;; 0F00;TIBETAN SYLLABLE OM;Lo;0;L;;;;;N;;;;; 0F01;TIBETAN MARK GTER YIG MGO TRUNCATED A;So;0;L;;;;;N;;;;; 0F02;TIBETAN MARK GTER YIG MGO -UM RNAM BCAD MA;So;0;L;;;;;N;;;;; @@ -3129,7 +3173,7 @@ 0F11;TIBETAN MARK RIN CHEN SPUNGS SHAD;Po;0;L;;;;;N;TIBETAN RINCHANPHUNGSHAD;;;; 0F12;TIBETAN MARK RGYA GRAM SHAD;Po;0;L;;;;;N;;;;; 0F13;TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN;So;0;L;;;;;N;;;;; -0F14;TIBETAN MARK GTER TSHEG;So;0;L;;;;;N;TIBETAN COMMA;;;; +0F14;TIBETAN MARK GTER TSHEG;Po;0;L;;;;;N;TIBETAN COMMA;;;; 0F15;TIBETAN LOGOTYPE SIGN CHAD RTAGS;So;0;L;;;;;N;;;;; 0F16;TIBETAN LOGOTYPE SIGN LHAG RTAGS;So;0;L;;;;;N;;;;; 0F17;TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS;So;0;L;;;;;N;;;;; @@ -3518,6 +3562,8 @@ 10C3;GEORGIAN CAPITAL LETTER WE;Lu;0;L;;;;;N;;;;2D23; 10C4;GEORGIAN CAPITAL LETTER HAR;Lu;0;L;;;;;N;;;;2D24; 10C5;GEORGIAN CAPITAL LETTER HOE;Lu;0;L;;;;;N;;;;2D25; +10C7;GEORGIAN CAPITAL LETTER YN;Lu;0;L;;;;;N;;;;2D27; +10CD;GEORGIAN CAPITAL LETTER AEN;Lu;0;L;;;;;N;;;;2D2D; 10D0;GEORGIAN LETTER AN;Lo;0;L;;;;;N;GEORGIAN SMALL LETTER AN;;;; 10D1;GEORGIAN LETTER BAN;Lo;0;L;;;;;N;GEORGIAN SMALL LETTER BAN;;;; 10D2;GEORGIAN LETTER GAN;Lo;0;L;;;;;N;GEORGIAN SMALL LETTER GAN;;;; @@ -3563,6 +3609,9 @@ 10FA;GEORGIAN LETTER AIN;Lo;0;L;;;;;N;;;;; 10FB;GEORGIAN PARAGRAPH SEPARATOR;Po;0;L;;;;;N;;;;; 10FC;MODIFIER LETTER GEORGIAN NAR;Lm;0;L;<super> 10DC;;;;N;;;;; +10FD;GEORGIAN LETTER AEN;Lo;0;L;;;;;N;;;;; +10FE;GEORGIAN LETTER HARD SIGN;Lo;0;L;;;;;N;;;;; +10FF;GEORGIAN LETTER LABIAL SIGN;Lo;0;L;;;;;N;;;;; 1100;HANGUL CHOSEONG KIYEOK;Lo;0;L;;;;;N;;;;; 1101;HANGUL CHOSEONG SSANGKIYEOK;Lo;0;L;;;;;N;;;;; 1102;HANGUL CHOSEONG NIEUN;Lo;0;L;;;;;N;;;;; @@ -4148,7 +4197,7 @@ 135D;ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK;Mn;230;NSM;;;;;N;;;;; 135E;ETHIOPIC COMBINING VOWEL LENGTH MARK;Mn;230;NSM;;;;;N;;;;; 135F;ETHIOPIC COMBINING GEMINATION MARK;Mn;230;NSM;;;;;N;;;;; -1360;ETHIOPIC SECTION MARK;So;0;L;;;;;N;;;;; +1360;ETHIOPIC SECTION MARK;Po;0;L;;;;;N;;;;; 1361;ETHIOPIC WORDSPACE;Po;0;L;;;;;N;;;;; 1362;ETHIOPIC FULL STOP;Po;0;L;;;;;N;;;;; 1363;ETHIOPIC COMMA;Po;0;L;;;;;N;;;;; @@ -5171,8 +5220,8 @@ 17B1;KHMER INDEPENDENT VOWEL QOO TYPE ONE;Lo;0;L;;;;;N;;;;; 17B2;KHMER INDEPENDENT VOWEL QOO TYPE TWO;Lo;0;L;;;;;N;;;;; 17B3;KHMER INDEPENDENT VOWEL QAU;Lo;0;L;;;;;N;;;;; -17B4;KHMER VOWEL INHERENT AQ;Cf;0;L;;;;;N;;;;; -17B5;KHMER VOWEL INHERENT AA;Cf;0;L;;;;;N;;;;; +17B4;KHMER VOWEL INHERENT AQ;Mn;0;NSM;;;;;N;;;;; +17B5;KHMER VOWEL INHERENT AA;Mn;0;NSM;;;;;N;;;;; 17B6;KHMER VOWEL SIGN AA;Mc;0;L;;;;;N;;;;; 17B7;KHMER VOWEL SIGN I;Mn;0;NSM;;;;;N;;;;; 17B8;KHMER VOWEL SIGN II;Mn;0;NSM;;;;;N;;;;; @@ -5996,6 +6045,9 @@ 1BA8;SUNDANESE VOWEL SIGN PAMEPET;Mn;0;NSM;;;;;N;;;;; 1BA9;SUNDANESE VOWEL SIGN PANEULEUNG;Mn;0;NSM;;;;;N;;;;; 1BAA;SUNDANESE SIGN PAMAAEH;Mc;9;L;;;;;N;;;;; +1BAB;SUNDANESE SIGN VIRAMA;Mn;9;NSM;;;;;N;;;;; +1BAC;SUNDANESE CONSONANT SIGN PASANGAN MA;Mc;0;L;;;;;N;;;;; +1BAD;SUNDANESE CONSONANT SIGN PASANGAN WA;Mc;0;L;;;;;N;;;;; 1BAE;SUNDANESE LETTER KHA;Lo;0;L;;;;;N;;;;; 1BAF;SUNDANESE LETTER SYA;Lo;0;L;;;;;N;;;;; 1BB0;SUNDANESE DIGIT ZERO;Nd;0;L;;0;0;0;N;;;;; @@ -6008,6 +6060,12 @@ 1BB7;SUNDANESE DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;; 1BB8;SUNDANESE DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;; 1BB9;SUNDANESE DIGIT NINE;Nd;0;L;;9;9;9;N;;;;; +1BBA;SUNDANESE AVAGRAHA;Lo;0;L;;;;;N;;;;; +1BBB;SUNDANESE LETTER REU;Lo;0;L;;;;;N;;;;; +1BBC;SUNDANESE LETTER LEU;Lo;0;L;;;;;N;;;;; +1BBD;SUNDANESE LETTER BHA;Lo;0;L;;;;;N;;;;; +1BBE;SUNDANESE LETTER FINAL K;Lo;0;L;;;;;N;;;;; +1BBF;SUNDANESE LETTER FINAL M;Lo;0;L;;;;;N;;;;; 1BC0;BATAK LETTER A;Lo;0;L;;;;;N;;;;; 1BC1;BATAK LETTER SIMALUNGUN A;Lo;0;L;;;;;N;;;;; 1BC2;BATAK LETTER HA;Lo;0;L;;;;;N;;;;; @@ -6186,6 +6244,14 @@ 1C7D;OL CHIKI AHAD;Lm;0;L;;;;;N;;;;; 1C7E;OL CHIKI PUNCTUATION MUCAAD;Po;0;L;;;;;N;;;;; 1C7F;OL CHIKI PUNCTUATION DOUBLE MUCAAD;Po;0;L;;;;;N;;;;; +1CC0;SUNDANESE PUNCTUATION BINDU SURYA;Po;0;L;;;;;N;;;;; +1CC1;SUNDANESE PUNCTUATION BINDU PANGLONG;Po;0;L;;;;;N;;;;; +1CC2;SUNDANESE PUNCTUATION BINDU PURNAMA;Po;0;L;;;;;N;;;;; +1CC3;SUNDANESE PUNCTUATION BINDU CAKRA;Po;0;L;;;;;N;;;;; +1CC4;SUNDANESE PUNCTUATION BINDU LEU SATANGA;Po;0;L;;;;;N;;;;; +1CC5;SUNDANESE PUNCTUATION BINDU KA SATANGA;Po;0;L;;;;;N;;;;; +1CC6;SUNDANESE PUNCTUATION BINDU DA SATANGA;Po;0;L;;;;;N;;;;; +1CC7;SUNDANESE PUNCTUATION BINDU BA SATANGA;Po;0;L;;;;;N;;;;; 1CD0;VEDIC TONE KARSHANA;Mn;230;NSM;;;;;N;;;;; 1CD1;VEDIC TONE SHARA;Mn;230;NSM;;;;;N;;;;; 1CD2;VEDIC TONE PRENKHA;Mn;230;NSM;;;;;N;;;;; @@ -6221,6 +6287,10 @@ 1CF0;VEDIC SIGN RTHANG LONG ANUSVARA;Lo;0;L;;;;;N;;;;; 1CF1;VEDIC SIGN ANUSVARA UBHAYATO MUKHA;Lo;0;L;;;;;N;;;;; 1CF2;VEDIC SIGN ARDHAVISARGA;Mc;0;L;;;;;N;;;;; +1CF3;VEDIC SIGN ROTATED ARDHAVISARGA;Mc;0;L;;;;;N;;;;; +1CF4;VEDIC TONE CANDRA ABOVE;Mn;230;NSM;;;;;N;;;;; +1CF5;VEDIC SIGN JIHVAMULIYA;Lo;0;L;;;;;N;;;;; +1CF6;VEDIC SIGN UPADHMANIYA;Lo;0;L;;;;;N;;;;; 1D00;LATIN LETTER SMALL CAPITAL A;Ll;0;L;;;;;N;;;;; 1D01;LATIN LETTER SMALL CAPITAL AE;Ll;0;L;;;;;N;;;;; 1D02;LATIN SMALL LETTER TURNED AE;Ll;0;L;;;;;N;;;;; @@ -6319,15 +6389,15 @@ 1D5F;MODIFIER LETTER SMALL DELTA;Lm;0;L;<super> 03B4;;;;N;;;;; 1D60;MODIFIER LETTER SMALL GREEK PHI;Lm;0;L;<super> 03C6;;;;N;;;;; 1D61;MODIFIER LETTER SMALL CHI;Lm;0;L;<super> 03C7;;;;N;;;;; -1D62;LATIN SUBSCRIPT SMALL LETTER I;Ll;0;L;<sub> 0069;;;;N;;;;; -1D63;LATIN SUBSCRIPT SMALL LETTER R;Ll;0;L;<sub> 0072;;;;N;;;;; -1D64;LATIN SUBSCRIPT SMALL LETTER U;Ll;0;L;<sub> 0075;;;;N;;;;; -1D65;LATIN SUBSCRIPT SMALL LETTER V;Ll;0;L;<sub> 0076;;;;N;;;;; -1D66;GREEK SUBSCRIPT SMALL LETTER BETA;Ll;0;L;<sub> 03B2;;;;N;;;;; -1D67;GREEK SUBSCRIPT SMALL LETTER GAMMA;Ll;0;L;<sub> 03B3;;;;N;;;;; -1D68;GREEK SUBSCRIPT SMALL LETTER RHO;Ll;0;L;<sub> 03C1;;;;N;;;;; -1D69;GREEK SUBSCRIPT SMALL LETTER PHI;Ll;0;L;<sub> 03C6;;;;N;;;;; -1D6A;GREEK SUBSCRIPT SMALL LETTER CHI;Ll;0;L;<sub> 03C7;;;;N;;;;; +1D62;LATIN SUBSCRIPT SMALL LETTER I;Lm;0;L;<sub> 0069;;;;N;;;;; +1D63;LATIN SUBSCRIPT SMALL LETTER R;Lm;0;L;<sub> 0072;;;;N;;;;; +1D64;LATIN SUBSCRIPT SMALL LETTER U;Lm;0;L;<sub> 0075;;;;N;;;;; +1D65;LATIN SUBSCRIPT SMALL LETTER V;Lm;0;L;<sub> 0076;;;;N;;;;; +1D66;GREEK SUBSCRIPT SMALL LETTER BETA;Lm;0;L;<sub> 03B2;;;;N;;;;; +1D67;GREEK SUBSCRIPT SMALL LETTER GAMMA;Lm;0;L;<sub> 03B3;;;;N;;;;; +1D68;GREEK SUBSCRIPT SMALL LETTER RHO;Lm;0;L;<sub> 03C1;;;;N;;;;; +1D69;GREEK SUBSCRIPT SMALL LETTER PHI;Lm;0;L;<sub> 03C6;;;;N;;;;; +1D6A;GREEK SUBSCRIPT SMALL LETTER CHI;Lm;0;L;<sub> 03C7;;;;N;;;;; 1D6B;LATIN SMALL LETTER UE;Ll;0;L;;;;;N;;;;; 1D6C;LATIN SMALL LETTER B WITH MIDDLE TILDE;Ll;0;L;;;;;N;;;;; 1D6D;LATIN SMALL LETTER D WITH MIDDLE TILDE;Ll;0;L;;;;;N;;;;; @@ -8827,7 +8897,9 @@ 27C8;REVERSE SOLIDUS PRECEDING SUBSET;Sm;0;ON;;;;;Y;;;;; 27C9;SUPERSET PRECEDING SOLIDUS;Sm;0;ON;;;;;Y;;;;; 27CA;VERTICAL BAR WITH HORIZONTAL STROKE;Sm;0;ON;;;;;N;;;;; +27CB;MATHEMATICAL RISING DIAGONAL;Sm;0;ON;;;;;Y;;;;; 27CC;LONG DIVISION;Sm;0;ON;;;;;Y;;;;; +27CD;MATHEMATICAL FALLING DIAGONAL;Sm;0;ON;;;;;Y;;;;; 27CE;SQUARED LOGICAL AND;Sm;0;ON;;;;;N;;;;; 27CF;SQUARED LOGICAL OR;Sm;0;ON;;;;;N;;;;; 27D0;WHITE DIAMOND WITH CENTRED DOT;Sm;0;ON;;;;;N;;;;; @@ -9855,7 +9927,7 @@ 2C79;LATIN SMALL LETTER TURNED R WITH TAIL;Ll;0;L;;;;;N;;;;; 2C7A;LATIN SMALL LETTER O WITH LOW RING INSIDE;Ll;0;L;;;;;N;;;;; 2C7B;LATIN LETTER SMALL CAPITAL TURNED E;Ll;0;L;;;;;N;;;;; -2C7C;LATIN SUBSCRIPT SMALL LETTER J;Ll;0;L;<sub> 006A;;;;N;;;;; +2C7C;LATIN SUBSCRIPT SMALL LETTER J;Lm;0;L;<sub> 006A;;;;N;;;;; 2C7D;MODIFIER LETTER CAPITAL V;Lm;0;L;<super> 0056;;;;N;;;;; 2C7E;LATIN CAPITAL LETTER S WITH SWASH TAIL;Lu;0;L;;;;;N;;;;023F; 2C7F;LATIN CAPITAL LETTER Z WITH SWASH TAIL;Lu;0;L;;;;;N;;;;0240; @@ -9973,6 +10045,8 @@ 2CEF;COPTIC COMBINING NI ABOVE;Mn;230;NSM;;;;;N;;;;; 2CF0;COPTIC COMBINING SPIRITUS ASPER;Mn;230;NSM;;;;;N;;;;; 2CF1;COPTIC COMBINING SPIRITUS LENIS;Mn;230;NSM;;;;;N;;;;; +2CF2;COPTIC CAPITAL LETTER BOHAIRIC KHEI;Lu;0;L;;;;;N;;;;2CF3; +2CF3;COPTIC SMALL LETTER BOHAIRIC KHEI;Ll;0;L;;;;;N;;;2CF2;;2CF2 2CF9;COPTIC OLD NUBIAN FULL STOP;Po;0;ON;;;;;N;;;;; 2CFA;COPTIC OLD NUBIAN DIRECT QUESTION MARK;Po;0;ON;;;;;N;;;;; 2CFB;COPTIC OLD NUBIAN INDIRECT QUESTION MARK;Po;0;ON;;;;;N;;;;; @@ -10018,6 +10092,8 @@ 2D23;GEORGIAN SMALL LETTER WE;Ll;0;L;;;;;N;;;10C3;;10C3 2D24;GEORGIAN SMALL LETTER HAR;Ll;0;L;;;;;N;;;10C4;;10C4 2D25;GEORGIAN SMALL LETTER HOE;Ll;0;L;;;;;N;;;10C5;;10C5 +2D27;GEORGIAN SMALL LETTER YN;Ll;0;L;;;;;N;;;10C7;;10C7 +2D2D;GEORGIAN SMALL LETTER AEN;Ll;0;L;;;;;N;;;10CD;;10CD 2D30;TIFINAGH LETTER YA;Lo;0;L;;;;;N;;;;; 2D31;TIFINAGH LETTER YAB;Lo;0;L;;;;;N;;;;; 2D32;TIFINAGH LETTER YABH;Lo;0;L;;;;;N;;;;; @@ -10072,6 +10148,8 @@ 2D63;TIFINAGH LETTER YAZ;Lo;0;L;;;;;N;;;;; 2D64;TIFINAGH LETTER TAWELLEMET YAZ;Lo;0;L;;;;;N;;;;; 2D65;TIFINAGH LETTER YAZZ;Lo;0;L;;;;;N;;;;; +2D66;TIFINAGH LETTER YE;Lo;0;L;;;;;N;;;;; +2D67;TIFINAGH LETTER YO;Lo;0;L;;;;;N;;;;; 2D6F;TIFINAGH MODIFIER LETTER LABIALIZATION MARK;Lm;0;L;<super> 2D61;;;;N;;;;; 2D70;TIFINAGH SEPARATOR MARK;Po;0;L;;;;;N;;;;; 2D7F;TIFINAGH CONSONANT JOINER;Mn;9;NSM;;;;;N;;;;; @@ -10236,6 +10314,16 @@ 2E2F;VERTICAL TILDE;Lm;0;ON;;;;;N;;;;; 2E30;RING POINT;Po;0;ON;;;;;N;;;;; 2E31;WORD SEPARATOR MIDDLE DOT;Po;0;ON;;;;;N;;;;; +2E32;TURNED COMMA;Po;0;ON;;;;;N;;;;; +2E33;RAISED DOT;Po;0;ON;;;;;N;;;;; +2E34;RAISED COMMA;Po;0;ON;;;;;N;;;;; +2E35;TURNED SEMICOLON;Po;0;ON;;;;;N;;;;; +2E36;DAGGER WITH LEFT GUARD;Po;0;ON;;;;;N;;;;; +2E37;DAGGER WITH RIGHT GUARD;Po;0;ON;;;;;N;;;;; +2E38;TURNED DAGGER;Po;0;ON;;;;;N;;;;; +2E39;TOP HALF SECTION SIGN;Po;0;ON;;;;;N;;;;; +2E3A;TWO-EM DASH;Pd;0;ON;;;;;N;;;;; +2E3B;THREE-EM DASH;Pd;0;ON;;;;;N;;;;; 2E80;CJK RADICAL REPEAT;So;0;ON;;;;;N;;;;; 2E81;CJK RADICAL CLIFF;So;0;ON;;;;;N;;;;; 2E82;CJK RADICAL SECOND ONE;So;0;ON;;;;;N;;;;; @@ -10623,8 +10711,8 @@ 302B;IDEOGRAPHIC RISING TONE MARK;Mn;228;NSM;;;;;N;;;;; 302C;IDEOGRAPHIC DEPARTING TONE MARK;Mn;232;NSM;;;;;N;;;;; 302D;IDEOGRAPHIC ENTERING TONE MARK;Mn;222;NSM;;;;;N;;;;; -302E;HANGUL SINGLE DOT TONE MARK;Mn;224;NSM;;;;;N;;;;; -302F;HANGUL DOUBLE DOT TONE MARK;Mn;224;NSM;;;;;N;;;;; +302E;HANGUL SINGLE DOT TONE MARK;Mc;224;L;;;;;N;;;;; +302F;HANGUL DOUBLE DOT TONE MARK;Mc;224;L;;;;;N;;;;; 3030;WAVY DASH;Pd;0;ON;;;;;N;;;;; 3031;VERTICAL KANA REPEAT MARK;Lm;0;L;;;;;N;;;;; 3032;VERTICAL KANA REPEAT WITH VOICED SOUND MARK;Lm;0;L;;;;;N;;;;; @@ -11131,14 +11219,14 @@ 3245;CIRCLED IDEOGRAPH KINDERGARTEN;So;0;L;<circle> 5E7C;;;;N;;;;; 3246;CIRCLED IDEOGRAPH SCHOOL;So;0;L;<circle> 6587;;;;N;;;;; 3247;CIRCLED IDEOGRAPH KOTO;So;0;L;<circle> 7B8F;;;;N;;;;; -3248;CIRCLED NUMBER TEN ON BLACK SQUARE;So;0;L;;;;;N;;;;; -3249;CIRCLED NUMBER TWENTY ON BLACK SQUARE;So;0;L;;;;;N;;;;; -324A;CIRCLED NUMBER THIRTY ON BLACK SQUARE;So;0;L;;;;;N;;;;; -324B;CIRCLED NUMBER FORTY ON BLACK SQUARE;So;0;L;;;;;N;;;;; -324C;CIRCLED NUMBER FIFTY ON BLACK SQUARE;So;0;L;;;;;N;;;;; -324D;CIRCLED NUMBER SIXTY ON BLACK SQUARE;So;0;L;;;;;N;;;;; -324E;CIRCLED NUMBER SEVENTY ON BLACK SQUARE;So;0;L;;;;;N;;;;; -324F;CIRCLED NUMBER EIGHTY ON BLACK SQUARE;So;0;L;;;;;N;;;;; +3248;CIRCLED NUMBER TEN ON BLACK SQUARE;No;0;L;;;;10;N;;;;; +3249;CIRCLED NUMBER TWENTY ON BLACK SQUARE;No;0;L;;;;20;N;;;;; +324A;CIRCLED NUMBER THIRTY ON BLACK SQUARE;No;0;L;;;;30;N;;;;; +324B;CIRCLED NUMBER FORTY ON BLACK SQUARE;No;0;L;;;;40;N;;;;; +324C;CIRCLED NUMBER FIFTY ON BLACK SQUARE;No;0;L;;;;50;N;;;;; +324D;CIRCLED NUMBER SIXTY ON BLACK SQUARE;No;0;L;;;;60;N;;;;; +324E;CIRCLED NUMBER SEVENTY ON BLACK SQUARE;No;0;L;;;;70;N;;;;; +324F;CIRCLED NUMBER EIGHTY ON BLACK SQUARE;No;0;L;;;;80;N;;;;; 3250;PARTNERSHIP SIGN;So;0;ON;<square> 0050 0054 0045;;;;N;;;;; 3251;CIRCLED NUMBER TWENTY ONE;No;0;ON;<circle> 0032 0031;;;21;N;;;;; 3252;CIRCLED NUMBER TWENTY TWO;No;0;ON;<circle> 0032 0032;;;22;N;;;;; @@ -11637,7 +11725,7 @@ 4DFE;HEXAGRAM FOR AFTER COMPLETION;So;0;ON;;;;;N;;;;; 4DFF;HEXAGRAM FOR BEFORE COMPLETION;So;0;ON;;;;;N;;;;; 4E00;<CJK Ideograph, First>;Lo;0;L;;;;;N;;;;; -9FCB;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;; +9FCC;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;; A000;YI SYLLABLE IT;Lo;0;L;;;;;N;;;;; A001;YI SYLLABLE IX;Lo;0;L;;;;;N;;;;; A002;YI SYLLABLE I;Lo;0;L;;;;;N;;;;; @@ -13258,6 +13346,14 @@ A670;COMBINING CYRILLIC TEN MILLIONS SIGN;Me;0;NSM;;;;;N;;;;; A671;COMBINING CYRILLIC HUNDRED MILLIONS SIGN;Me;0;NSM;;;;;N;;;;; A672;COMBINING CYRILLIC THOUSAND MILLIONS SIGN;Me;0;NSM;;;;;N;;;;; A673;SLAVONIC ASTERISK;Po;0;ON;;;;;N;;;;; +A674;COMBINING CYRILLIC LETTER UKRAINIAN IE;Mn;230;NSM;;;;;N;;;;; +A675;COMBINING CYRILLIC LETTER I;Mn;230;NSM;;;;;N;;;;; +A676;COMBINING CYRILLIC LETTER YI;Mn;230;NSM;;;;;N;;;;; +A677;COMBINING CYRILLIC LETTER U;Mn;230;NSM;;;;;N;;;;; +A678;COMBINING CYRILLIC LETTER HARD SIGN;Mn;230;NSM;;;;;N;;;;; +A679;COMBINING CYRILLIC LETTER YERU;Mn;230;NSM;;;;;N;;;;; +A67A;COMBINING CYRILLIC LETTER SOFT SIGN;Mn;230;NSM;;;;;N;;;;; +A67B;COMBINING CYRILLIC LETTER OMEGA;Mn;230;NSM;;;;;N;;;;; A67C;COMBINING CYRILLIC KAVYKA;Mn;230;NSM;;;;;N;;;;; A67D;COMBINING CYRILLIC PAYEROK;Mn;230;NSM;;;;;N;;;;; A67E;CYRILLIC KAVYKA;Po;0;ON;;;;;N;;;;; @@ -13286,6 +13382,7 @@ A694;CYRILLIC CAPITAL LETTER HWE;Lu;0;L;;;;;N;;;;A695; A695;CYRILLIC SMALL LETTER HWE;Ll;0;L;;;;;N;;;A694;;A694 A696;CYRILLIC CAPITAL LETTER SHWE;Lu;0;L;;;;;N;;;;A697; A697;CYRILLIC SMALL LETTER SHWE;Ll;0;L;;;;;N;;;A696;;A696 +A69F;COMBINING CYRILLIC LETTER IOTIFIED E;Mn;230;NSM;;;;;N;;;;; A6A0;BAMUM LETTER A;Lo;0;L;;;;;N;;;;; A6A1;BAMUM LETTER KA;Lo;0;L;;;;;N;;;;; A6A2;BAMUM LETTER U;Lo;0;L;;;;;N;;;;; @@ -13519,6 +13616,8 @@ A78D;LATIN CAPITAL LETTER TURNED H;Lu;0;L;;;;;N;;;;0265; A78E;LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT;Ll;0;L;;;;;N;;;;; A790;LATIN CAPITAL LETTER N WITH DESCENDER;Lu;0;L;;;;;N;;;;A791; A791;LATIN SMALL LETTER N WITH DESCENDER;Ll;0;L;;;;;N;;;A790;;A790 +A792;LATIN CAPITAL LETTER C WITH BAR;Lu;0;L;;;;;N;;;;A793; +A793;LATIN SMALL LETTER C WITH BAR;Ll;0;L;;;;;N;;;A792;;A792 A7A0;LATIN CAPITAL LETTER G WITH OBLIQUE STROKE;Lu;0;L;;;;;N;;;;A7A1; A7A1;LATIN SMALL LETTER G WITH OBLIQUE STROKE;Ll;0;L;;;;;N;;;A7A0;;A7A0 A7A2;LATIN CAPITAL LETTER K WITH OBLIQUE STROKE;Lu;0;L;;;;;N;;;;A7A3; @@ -13529,6 +13628,9 @@ A7A6;LATIN CAPITAL LETTER R WITH OBLIQUE STROKE;Lu;0;L;;;;;N;;;;A7A7; A7A7;LATIN SMALL LETTER R WITH OBLIQUE STROKE;Ll;0;L;;;;;N;;;A7A6;;A7A6 A7A8;LATIN CAPITAL LETTER S WITH OBLIQUE STROKE;Lu;0;L;;;;;N;;;;A7A9; A7A9;LATIN SMALL LETTER S WITH OBLIQUE STROKE;Ll;0;L;;;;;N;;;A7A8;;A7A8 +A7AA;LATIN CAPITAL LETTER H WITH HOOK;Lu;0;L;;;;;N;;;;0266; +A7F8;MODIFIER LETTER CAPITAL H WITH STROKE;Lm;0;L;<super> 0126;;;;N;;;;; +A7F9;MODIFIER LETTER SMALL LIGATURE OE;Lm;0;L;<super> 0153;;;;N;;;;; A7FA;LATIN LETTER SMALL CAPITAL TURNED M;Ll;0;L;;;;;N;;;;; A7FB;LATIN EPIGRAPHIC LETTER REVERSED F;Lo;0;L;;;;;N;;;;; A7FC;LATIN EPIGRAPHIC LETTER REVERSED P;Lo;0;L;;;;;N;;;;; @@ -14142,6 +14244,29 @@ AADC;TAI VIET SYMBOL NUENG;Lo;0;L;;;;;N;;;;; AADD;TAI VIET SYMBOL SAM;Lm;0;L;;;;;N;;;;; AADE;TAI VIET SYMBOL HO HOI;Po;0;L;;;;;N;;;;; AADF;TAI VIET SYMBOL KOI KOI;Po;0;L;;;;;N;;;;; +AAE0;MEETEI MAYEK LETTER E;Lo;0;L;;;;;N;;;;; +AAE1;MEETEI MAYEK LETTER O;Lo;0;L;;;;;N;;;;; +AAE2;MEETEI MAYEK LETTER CHA;Lo;0;L;;;;;N;;;;; +AAE3;MEETEI MAYEK LETTER NYA;Lo;0;L;;;;;N;;;;; +AAE4;MEETEI MAYEK LETTER TTA;Lo;0;L;;;;;N;;;;; +AAE5;MEETEI MAYEK LETTER TTHA;Lo;0;L;;;;;N;;;;; +AAE6;MEETEI MAYEK LETTER DDA;Lo;0;L;;;;;N;;;;; +AAE7;MEETEI MAYEK LETTER DDHA;Lo;0;L;;;;;N;;;;; +AAE8;MEETEI MAYEK LETTER NNA;Lo;0;L;;;;;N;;;;; +AAE9;MEETEI MAYEK LETTER SHA;Lo;0;L;;;;;N;;;;; +AAEA;MEETEI MAYEK LETTER SSA;Lo;0;L;;;;;N;;;;; +AAEB;MEETEI MAYEK VOWEL SIGN II;Mc;0;L;;;;;N;;;;; +AAEC;MEETEI MAYEK VOWEL SIGN UU;Mn;0;NSM;;;;;N;;;;; +AAED;MEETEI MAYEK VOWEL SIGN AAI;Mn;0;NSM;;;;;N;;;;; +AAEE;MEETEI MAYEK VOWEL SIGN AU;Mc;0;L;;;;;N;;;;; +AAEF;MEETEI MAYEK VOWEL SIGN AAU;Mc;0;L;;;;;N;;;;; +AAF0;MEETEI MAYEK CHEIKHAN;Po;0;L;;;;;N;;;;; +AAF1;MEETEI MAYEK AHANG KHUDAM;Po;0;L;;;;;N;;;;; +AAF2;MEETEI MAYEK ANJI;Lo;0;L;;;;;N;;;;; +AAF3;MEETEI MAYEK SYLLABLE REPETITION MARK;Lm;0;L;;;;;N;;;;; +AAF4;MEETEI MAYEK WORD REPETITION MARK;Lm;0;L;;;;;N;;;;; +AAF5;MEETEI MAYEK VOWEL SIGN VISARGA;Mc;0;L;;;;;N;;;;; +AAF6;MEETEI MAYEK VIRAMA;Mn;9;NSM;;;;;N;;;;; AB01;ETHIOPIC SYLLABLE TTHU;Lo;0;L;;;;;N;;;;; AB02;ETHIOPIC SYLLABLE TTHI;Lo;0;L;;;;;N;;;;; AB03;ETHIOPIC SYLLABLE TTHAA;Lo;0;L;;;;;N;;;;; @@ -14614,6 +14739,8 @@ FA2A;CJK COMPATIBILITY IDEOGRAPH-FA2A;Lo;0;L;98EF;;;;N;;;;; FA2B;CJK COMPATIBILITY IDEOGRAPH-FA2B;Lo;0;L;98FC;;;;N;;;;; FA2C;CJK COMPATIBILITY IDEOGRAPH-FA2C;Lo;0;L;9928;;;;N;;;;; FA2D;CJK COMPATIBILITY IDEOGRAPH-FA2D;Lo;0;L;9DB4;;;;N;;;;; +FA2E;CJK COMPATIBILITY IDEOGRAPH-FA2E;Lo;0;L;90DE;;;;N;;;;; +FA2F;CJK COMPATIBILITY IDEOGRAPH-FA2F;Lo;0;L;96B7;;;;N;;;;; FA30;CJK COMPATIBILITY IDEOGRAPH-FA30;Lo;0;L;4FAE;;;;N;;;;; FA31;CJK COMPATIBILITY IDEOGRAPH-FA31;Lo;0;L;50E7;;;;N;;;;; FA32;CJK COMPATIBILITY IDEOGRAPH-FA32;Lo;0;L;514D;;;;N;;;;; @@ -16126,7 +16253,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 100FA;LINEAR B IDEOGRAM VESSEL B305;Lo;0;L;;;;;N;;;;; 10100;AEGEAN WORD SEPARATOR LINE;Po;0;L;;;;;N;;;;; 10101;AEGEAN WORD SEPARATOR DOT;Po;0;ON;;;;;N;;;;; -10102;AEGEAN CHECK MARK;So;0;L;;;;;N;;;;; +10102;AEGEAN CHECK MARK;Po;0;L;;;;;N;;;;; 10107;AEGEAN NUMBER ONE;No;0;L;;;;1;N;;;;; 10108;AEGEAN NUMBER TWO;No;0;L;;;;2;N;;;;; 10109;AEGEAN NUMBER THREE;No;0;L;;;;3;N;;;;; @@ -16845,6 +16972,64 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 10938;LYDIAN LETTER NN;Lo;0;R;;;;;N;;;;; 10939;LYDIAN LETTER C;Lo;0;R;;;;;N;;;;; 1093F;LYDIAN TRIANGULAR MARK;Po;0;R;;;;;N;;;;; +10980;MEROITIC HIEROGLYPHIC LETTER A;Lo;0;R;;;;;N;;;;; +10981;MEROITIC HIEROGLYPHIC LETTER E;Lo;0;R;;;;;N;;;;; +10982;MEROITIC HIEROGLYPHIC LETTER I;Lo;0;R;;;;;N;;;;; +10983;MEROITIC HIEROGLYPHIC LETTER O;Lo;0;R;;;;;N;;;;; +10984;MEROITIC HIEROGLYPHIC LETTER YA;Lo;0;R;;;;;N;;;;; +10985;MEROITIC HIEROGLYPHIC LETTER WA;Lo;0;R;;;;;N;;;;; +10986;MEROITIC HIEROGLYPHIC LETTER BA;Lo;0;R;;;;;N;;;;; +10987;MEROITIC HIEROGLYPHIC LETTER BA-2;Lo;0;R;;;;;N;;;;; +10988;MEROITIC HIEROGLYPHIC LETTER PA;Lo;0;R;;;;;N;;;;; +10989;MEROITIC HIEROGLYPHIC LETTER MA;Lo;0;R;;;;;N;;;;; +1098A;MEROITIC HIEROGLYPHIC LETTER NA;Lo;0;R;;;;;N;;;;; +1098B;MEROITIC HIEROGLYPHIC LETTER NA-2;Lo;0;R;;;;;N;;;;; +1098C;MEROITIC HIEROGLYPHIC LETTER NE;Lo;0;R;;;;;N;;;;; +1098D;MEROITIC HIEROGLYPHIC LETTER NE-2;Lo;0;R;;;;;N;;;;; +1098E;MEROITIC HIEROGLYPHIC LETTER RA;Lo;0;R;;;;;N;;;;; +1098F;MEROITIC HIEROGLYPHIC LETTER RA-2;Lo;0;R;;;;;N;;;;; +10990;MEROITIC HIEROGLYPHIC LETTER LA;Lo;0;R;;;;;N;;;;; +10991;MEROITIC HIEROGLYPHIC LETTER KHA;Lo;0;R;;;;;N;;;;; +10992;MEROITIC HIEROGLYPHIC LETTER HHA;Lo;0;R;;;;;N;;;;; +10993;MEROITIC HIEROGLYPHIC LETTER SA;Lo;0;R;;;;;N;;;;; +10994;MEROITIC HIEROGLYPHIC LETTER SA-2;Lo;0;R;;;;;N;;;;; +10995;MEROITIC HIEROGLYPHIC LETTER SE;Lo;0;R;;;;;N;;;;; +10996;MEROITIC HIEROGLYPHIC LETTER KA;Lo;0;R;;;;;N;;;;; +10997;MEROITIC HIEROGLYPHIC LETTER QA;Lo;0;R;;;;;N;;;;; +10998;MEROITIC HIEROGLYPHIC LETTER TA;Lo;0;R;;;;;N;;;;; +10999;MEROITIC HIEROGLYPHIC LETTER TA-2;Lo;0;R;;;;;N;;;;; +1099A;MEROITIC HIEROGLYPHIC LETTER TE;Lo;0;R;;;;;N;;;;; +1099B;MEROITIC HIEROGLYPHIC LETTER TE-2;Lo;0;R;;;;;N;;;;; +1099C;MEROITIC HIEROGLYPHIC LETTER TO;Lo;0;R;;;;;N;;;;; +1099D;MEROITIC HIEROGLYPHIC LETTER DA;Lo;0;R;;;;;N;;;;; +1099E;MEROITIC HIEROGLYPHIC SYMBOL VIDJ;Lo;0;R;;;;;N;;;;; +1099F;MEROITIC HIEROGLYPHIC SYMBOL VIDJ-2;Lo;0;R;;;;;N;;;;; +109A0;MEROITIC CURSIVE LETTER A;Lo;0;R;;;;;N;;;;; +109A1;MEROITIC CURSIVE LETTER E;Lo;0;R;;;;;N;;;;; +109A2;MEROITIC CURSIVE LETTER I;Lo;0;R;;;;;N;;;;; +109A3;MEROITIC CURSIVE LETTER O;Lo;0;R;;;;;N;;;;; +109A4;MEROITIC CURSIVE LETTER YA;Lo;0;R;;;;;N;;;;; +109A5;MEROITIC CURSIVE LETTER WA;Lo;0;R;;;;;N;;;;; +109A6;MEROITIC CURSIVE LETTER BA;Lo;0;R;;;;;N;;;;; +109A7;MEROITIC CURSIVE LETTER PA;Lo;0;R;;;;;N;;;;; +109A8;MEROITIC CURSIVE LETTER MA;Lo;0;R;;;;;N;;;;; +109A9;MEROITIC CURSIVE LETTER NA;Lo;0;R;;;;;N;;;;; +109AA;MEROITIC CURSIVE LETTER NE;Lo;0;R;;;;;N;;;;; +109AB;MEROITIC CURSIVE LETTER RA;Lo;0;R;;;;;N;;;;; +109AC;MEROITIC CURSIVE LETTER LA;Lo;0;R;;;;;N;;;;; +109AD;MEROITIC CURSIVE LETTER KHA;Lo;0;R;;;;;N;;;;; +109AE;MEROITIC CURSIVE LETTER HHA;Lo;0;R;;;;;N;;;;; +109AF;MEROITIC CURSIVE LETTER SA;Lo;0;R;;;;;N;;;;; +109B0;MEROITIC CURSIVE LETTER ARCHAIC SA;Lo;0;R;;;;;N;;;;; +109B1;MEROITIC CURSIVE LETTER SE;Lo;0;R;;;;;N;;;;; +109B2;MEROITIC CURSIVE LETTER KA;Lo;0;R;;;;;N;;;;; +109B3;MEROITIC CURSIVE LETTER QA;Lo;0;R;;;;;N;;;;; +109B4;MEROITIC CURSIVE LETTER TA;Lo;0;R;;;;;N;;;;; +109B5;MEROITIC CURSIVE LETTER TE;Lo;0;R;;;;;N;;;;; +109B6;MEROITIC CURSIVE LETTER TO;Lo;0;R;;;;;N;;;;; +109B7;MEROITIC CURSIVE LETTER DA;Lo;0;R;;;;;N;;;;; +109BE;MEROITIC CURSIVE LOGOGRAM RMT;Lo;0;R;;;;;N;;;;; +109BF;MEROITIC CURSIVE LOGOGRAM IMN;Lo;0;R;;;;;N;;;;; 10A00;KHAROSHTHI LETTER A;Lo;0;R;;;;;N;;;;; 10A01;KHAROSHTHI VOWEL SIGN I;Mn;0;NSM;;;;;N;;;;; 10A02;KHAROSHTHI VOWEL SIGN U;Mn;0;NSM;;;;;N;;;;; @@ -17338,6 +17523,257 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 110BF;KAITHI DOUBLE SECTION MARK;Po;0;L;;;;;N;;;;; 110C0;KAITHI DANDA;Po;0;L;;;;;N;;;;; 110C1;KAITHI DOUBLE DANDA;Po;0;L;;;;;N;;;;; +110D0;SORA SOMPENG LETTER SAH;Lo;0;L;;;;;N;;;;; +110D1;SORA SOMPENG LETTER TAH;Lo;0;L;;;;;N;;;;; +110D2;SORA SOMPENG LETTER BAH;Lo;0;L;;;;;N;;;;; +110D3;SORA SOMPENG LETTER CAH;Lo;0;L;;;;;N;;;;; +110D4;SORA SOMPENG LETTER DAH;Lo;0;L;;;;;N;;;;; +110D5;SORA SOMPENG LETTER GAH;Lo;0;L;;;;;N;;;;; +110D6;SORA SOMPENG LETTER MAH;Lo;0;L;;;;;N;;;;; +110D7;SORA SOMPENG LETTER NGAH;Lo;0;L;;;;;N;;;;; +110D8;SORA SOMPENG LETTER LAH;Lo;0;L;;;;;N;;;;; +110D9;SORA SOMPENG LETTER NAH;Lo;0;L;;;;;N;;;;; +110DA;SORA SOMPENG LETTER VAH;Lo;0;L;;;;;N;;;;; +110DB;SORA SOMPENG LETTER PAH;Lo;0;L;;;;;N;;;;; +110DC;SORA SOMPENG LETTER YAH;Lo;0;L;;;;;N;;;;; +110DD;SORA SOMPENG LETTER RAH;Lo;0;L;;;;;N;;;;; +110DE;SORA SOMPENG LETTER HAH;Lo;0;L;;;;;N;;;;; +110DF;SORA SOMPENG LETTER KAH;Lo;0;L;;;;;N;;;;; +110E0;SORA SOMPENG LETTER JAH;Lo;0;L;;;;;N;;;;; +110E1;SORA SOMPENG LETTER NYAH;Lo;0;L;;;;;N;;;;; +110E2;SORA SOMPENG LETTER AH;Lo;0;L;;;;;N;;;;; +110E3;SORA SOMPENG LETTER EEH;Lo;0;L;;;;;N;;;;; +110E4;SORA SOMPENG LETTER IH;Lo;0;L;;;;;N;;;;; +110E5;SORA SOMPENG LETTER UH;Lo;0;L;;;;;N;;;;; +110E6;SORA SOMPENG LETTER OH;Lo;0;L;;;;;N;;;;; +110E7;SORA SOMPENG LETTER EH;Lo;0;L;;;;;N;;;;; +110E8;SORA SOMPENG LETTER MAE;Lo;0;L;;;;;N;;;;; +110F0;SORA SOMPENG DIGIT ZERO;Nd;0;L;;0;0;0;N;;;;; +110F1;SORA SOMPENG DIGIT ONE;Nd;0;L;;1;1;1;N;;;;; +110F2;SORA SOMPENG DIGIT TWO;Nd;0;L;;2;2;2;N;;;;; +110F3;SORA SOMPENG DIGIT THREE;Nd;0;L;;3;3;3;N;;;;; +110F4;SORA SOMPENG DIGIT FOUR;Nd;0;L;;4;4;4;N;;;;; +110F5;SORA SOMPENG DIGIT FIVE;Nd;0;L;;5;5;5;N;;;;; +110F6;SORA SOMPENG DIGIT SIX;Nd;0;L;;6;6;6;N;;;;; +110F7;SORA SOMPENG DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;; +110F8;SORA SOMPENG DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;; +110F9;SORA SOMPENG DIGIT NINE;Nd;0;L;;9;9;9;N;;;;; +11100;CHAKMA SIGN CANDRABINDU;Mn;230;NSM;;;;;N;;;;; +11101;CHAKMA SIGN ANUSVARA;Mn;230;NSM;;;;;N;;;;; +11102;CHAKMA SIGN VISARGA;Mn;230;NSM;;;;;N;;;;; +11103;CHAKMA LETTER AA;Lo;0;L;;;;;N;;;;; +11104;CHAKMA LETTER I;Lo;0;L;;;;;N;;;;; +11105;CHAKMA LETTER U;Lo;0;L;;;;;N;;;;; +11106;CHAKMA LETTER E;Lo;0;L;;;;;N;;;;; +11107;CHAKMA LETTER KAA;Lo;0;L;;;;;N;;;;; +11108;CHAKMA LETTER KHAA;Lo;0;L;;;;;N;;;;; +11109;CHAKMA LETTER GAA;Lo;0;L;;;;;N;;;;; +1110A;CHAKMA LETTER GHAA;Lo;0;L;;;;;N;;;;; +1110B;CHAKMA LETTER NGAA;Lo;0;L;;;;;N;;;;; +1110C;CHAKMA LETTER CAA;Lo;0;L;;;;;N;;;;; +1110D;CHAKMA LETTER CHAA;Lo;0;L;;;;;N;;;;; +1110E;CHAKMA LETTER JAA;Lo;0;L;;;;;N;;;;; +1110F;CHAKMA LETTER JHAA;Lo;0;L;;;;;N;;;;; +11110;CHAKMA LETTER NYAA;Lo;0;L;;;;;N;;;;; +11111;CHAKMA LETTER TTAA;Lo;0;L;;;;;N;;;;; +11112;CHAKMA LETTER TTHAA;Lo;0;L;;;;;N;;;;; +11113;CHAKMA LETTER DDAA;Lo;0;L;;;;;N;;;;; +11114;CHAKMA LETTER DDHAA;Lo;0;L;;;;;N;;;;; +11115;CHAKMA LETTER NNAA;Lo;0;L;;;;;N;;;;; +11116;CHAKMA LETTER TAA;Lo;0;L;;;;;N;;;;; +11117;CHAKMA LETTER THAA;Lo;0;L;;;;;N;;;;; +11118;CHAKMA LETTER DAA;Lo;0;L;;;;;N;;;;; +11119;CHAKMA LETTER DHAA;Lo;0;L;;;;;N;;;;; +1111A;CHAKMA LETTER NAA;Lo;0;L;;;;;N;;;;; +1111B;CHAKMA LETTER PAA;Lo;0;L;;;;;N;;;;; +1111C;CHAKMA LETTER PHAA;Lo;0;L;;;;;N;;;;; +1111D;CHAKMA LETTER BAA;Lo;0;L;;;;;N;;;;; +1111E;CHAKMA LETTER BHAA;Lo;0;L;;;;;N;;;;; +1111F;CHAKMA LETTER MAA;Lo;0;L;;;;;N;;;;; +11120;CHAKMA LETTER YYAA;Lo;0;L;;;;;N;;;;; +11121;CHAKMA LETTER YAA;Lo;0;L;;;;;N;;;;; +11122;CHAKMA LETTER RAA;Lo;0;L;;;;;N;;;;; +11123;CHAKMA LETTER LAA;Lo;0;L;;;;;N;;;;; +11124;CHAKMA LETTER WAA;Lo;0;L;;;;;N;;;;; +11125;CHAKMA LETTER SAA;Lo;0;L;;;;;N;;;;; +11126;CHAKMA LETTER HAA;Lo;0;L;;;;;N;;;;; +11127;CHAKMA VOWEL SIGN A;Mn;0;NSM;;;;;N;;;;; +11128;CHAKMA VOWEL SIGN I;Mn;0;NSM;;;;;N;;;;; +11129;CHAKMA VOWEL SIGN II;Mn;0;NSM;;;;;N;;;;; +1112A;CHAKMA VOWEL SIGN U;Mn;0;NSM;;;;;N;;;;; +1112B;CHAKMA VOWEL SIGN UU;Mn;0;NSM;;;;;N;;;;; +1112C;CHAKMA VOWEL SIGN E;Mc;0;L;;;;;N;;;;; +1112D;CHAKMA VOWEL SIGN AI;Mn;0;NSM;;;;;N;;;;; +1112E;CHAKMA VOWEL SIGN O;Mn;0;NSM;11131 11127;;;;N;;;;; +1112F;CHAKMA VOWEL SIGN AU;Mn;0;NSM;11132 11127;;;;N;;;;; +11130;CHAKMA VOWEL SIGN OI;Mn;0;NSM;;;;;N;;;;; +11131;CHAKMA O MARK;Mn;0;NSM;;;;;N;;;;; +11132;CHAKMA AU MARK;Mn;0;NSM;;;;;N;;;;; +11133;CHAKMA VIRAMA;Mn;9;NSM;;;;;N;;;;; +11134;CHAKMA MAAYYAA;Mn;9;NSM;;;;;N;;;;; +11136;CHAKMA DIGIT ZERO;Nd;0;L;;0;0;0;N;;;;; +11137;CHAKMA DIGIT ONE;Nd;0;L;;1;1;1;N;;;;; +11138;CHAKMA DIGIT TWO;Nd;0;L;;2;2;2;N;;;;; +11139;CHAKMA DIGIT THREE;Nd;0;L;;3;3;3;N;;;;; +1113A;CHAKMA DIGIT FOUR;Nd;0;L;;4;4;4;N;;;;; +1113B;CHAKMA DIGIT FIVE;Nd;0;L;;5;5;5;N;;;;; +1113C;CHAKMA DIGIT SIX;Nd;0;L;;6;6;6;N;;;;; +1113D;CHAKMA DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;; +1113E;CHAKMA DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;; +1113F;CHAKMA DIGIT NINE;Nd;0;L;;9;9;9;N;;;;; +11140;CHAKMA SECTION MARK;Po;0;L;;;;;N;;;;; +11141;CHAKMA DANDA;Po;0;L;;;;;N;;;;; +11142;CHAKMA DOUBLE DANDA;Po;0;L;;;;;N;;;;; +11143;CHAKMA QUESTION MARK;Po;0;L;;;;;N;;;;; +11180;SHARADA SIGN CANDRABINDU;Mn;0;NSM;;;;;N;;;;; +11181;SHARADA SIGN ANUSVARA;Mn;0;NSM;;;;;N;;;;; +11182;SHARADA SIGN VISARGA;Mc;0;L;;;;;N;;;;; +11183;SHARADA LETTER A;Lo;0;L;;;;;N;;;;; +11184;SHARADA LETTER AA;Lo;0;L;;;;;N;;;;; +11185;SHARADA LETTER I;Lo;0;L;;;;;N;;;;; +11186;SHARADA LETTER II;Lo;0;L;;;;;N;;;;; +11187;SHARADA LETTER U;Lo;0;L;;;;;N;;;;; +11188;SHARADA LETTER UU;Lo;0;L;;;;;N;;;;; +11189;SHARADA LETTER VOCALIC R;Lo;0;L;;;;;N;;;;; +1118A;SHARADA LETTER VOCALIC RR;Lo;0;L;;;;;N;;;;; +1118B;SHARADA LETTER VOCALIC L;Lo;0;L;;;;;N;;;;; +1118C;SHARADA LETTER VOCALIC LL;Lo;0;L;;;;;N;;;;; +1118D;SHARADA LETTER E;Lo;0;L;;;;;N;;;;; +1118E;SHARADA LETTER AI;Lo;0;L;;;;;N;;;;; +1118F;SHARADA LETTER O;Lo;0;L;;;;;N;;;;; +11190;SHARADA LETTER AU;Lo;0;L;;;;;N;;;;; +11191;SHARADA LETTER KA;Lo;0;L;;;;;N;;;;; +11192;SHARADA LETTER KHA;Lo;0;L;;;;;N;;;;; +11193;SHARADA LETTER GA;Lo;0;L;;;;;N;;;;; +11194;SHARADA LETTER GHA;Lo;0;L;;;;;N;;;;; +11195;SHARADA LETTER NGA;Lo;0;L;;;;;N;;;;; +11196;SHARADA LETTER CA;Lo;0;L;;;;;N;;;;; +11197;SHARADA LETTER CHA;Lo;0;L;;;;;N;;;;; +11198;SHARADA LETTER JA;Lo;0;L;;;;;N;;;;; +11199;SHARADA LETTER JHA;Lo;0;L;;;;;N;;;;; +1119A;SHARADA LETTER NYA;Lo;0;L;;;;;N;;;;; +1119B;SHARADA LETTER TTA;Lo;0;L;;;;;N;;;;; +1119C;SHARADA LETTER TTHA;Lo;0;L;;;;;N;;;;; +1119D;SHARADA LETTER DDA;Lo;0;L;;;;;N;;;;; +1119E;SHARADA LETTER DDHA;Lo;0;L;;;;;N;;;;; +1119F;SHARADA LETTER NNA;Lo;0;L;;;;;N;;;;; +111A0;SHARADA LETTER TA;Lo;0;L;;;;;N;;;;; +111A1;SHARADA LETTER THA;Lo;0;L;;;;;N;;;;; +111A2;SHARADA LETTER DA;Lo;0;L;;;;;N;;;;; +111A3;SHARADA LETTER DHA;Lo;0;L;;;;;N;;;;; +111A4;SHARADA LETTER NA;Lo;0;L;;;;;N;;;;; +111A5;SHARADA LETTER PA;Lo;0;L;;;;;N;;;;; +111A6;SHARADA LETTER PHA;Lo;0;L;;;;;N;;;;; +111A7;SHARADA LETTER BA;Lo;0;L;;;;;N;;;;; +111A8;SHARADA LETTER BHA;Lo;0;L;;;;;N;;;;; +111A9;SHARADA LETTER MA;Lo;0;L;;;;;N;;;;; +111AA;SHARADA LETTER YA;Lo;0;L;;;;;N;;;;; +111AB;SHARADA LETTER RA;Lo;0;L;;;;;N;;;;; +111AC;SHARADA LETTER LA;Lo;0;L;;;;;N;;;;; +111AD;SHARADA LETTER LLA;Lo;0;L;;;;;N;;;;; +111AE;SHARADA LETTER VA;Lo;0;L;;;;;N;;;;; +111AF;SHARADA LETTER SHA;Lo;0;L;;;;;N;;;;; +111B0;SHARADA LETTER SSA;Lo;0;L;;;;;N;;;;; +111B1;SHARADA LETTER SA;Lo;0;L;;;;;N;;;;; +111B2;SHARADA LETTER HA;Lo;0;L;;;;;N;;;;; +111B3;SHARADA VOWEL SIGN AA;Mc;0;L;;;;;N;;;;; +111B4;SHARADA VOWEL SIGN I;Mc;0;L;;;;;N;;;;; +111B5;SHARADA VOWEL SIGN II;Mc;0;L;;;;;N;;;;; +111B6;SHARADA VOWEL SIGN U;Mn;0;NSM;;;;;N;;;;; +111B7;SHARADA VOWEL SIGN UU;Mn;0;NSM;;;;;N;;;;; +111B8;SHARADA VOWEL SIGN VOCALIC R;Mn;0;NSM;;;;;N;;;;; +111B9;SHARADA VOWEL SIGN VOCALIC RR;Mn;0;NSM;;;;;N;;;;; +111BA;SHARADA VOWEL SIGN VOCALIC L;Mn;0;NSM;;;;;N;;;;; +111BB;SHARADA VOWEL SIGN VOCALIC LL;Mn;0;NSM;;;;;N;;;;; +111BC;SHARADA VOWEL SIGN E;Mn;0;NSM;;;;;N;;;;; +111BD;SHARADA VOWEL SIGN AI;Mn;0;NSM;;;;;N;;;;; +111BE;SHARADA VOWEL SIGN O;Mn;0;NSM;;;;;N;;;;; +111BF;SHARADA VOWEL SIGN AU;Mc;0;L;;;;;N;;;;; +111C0;SHARADA SIGN VIRAMA;Mc;9;L;;;;;N;;;;; +111C1;SHARADA SIGN AVAGRAHA;Lo;0;L;;;;;N;;;;; +111C2;SHARADA SIGN JIHVAMULIYA;Lo;0;L;;;;;N;;;;; +111C3;SHARADA SIGN UPADHMANIYA;Lo;0;L;;;;;N;;;;; +111C4;SHARADA OM;Lo;0;L;;;;;N;;;;; +111C5;SHARADA DANDA;Po;0;L;;;;;N;;;;; +111C6;SHARADA DOUBLE DANDA;Po;0;L;;;;;N;;;;; +111C7;SHARADA ABBREVIATION SIGN;Po;0;L;;;;;N;;;;; +111C8;SHARADA SEPARATOR;Po;0;L;;;;;N;;;;; +111D0;SHARADA DIGIT ZERO;Nd;0;L;;0;0;0;N;;;;; +111D1;SHARADA DIGIT ONE;Nd;0;L;;1;1;1;N;;;;; +111D2;SHARADA DIGIT TWO;Nd;0;L;;2;2;2;N;;;;; +111D3;SHARADA DIGIT THREE;Nd;0;L;;3;3;3;N;;;;; +111D4;SHARADA DIGIT FOUR;Nd;0;L;;4;4;4;N;;;;; +111D5;SHARADA DIGIT FIVE;Nd;0;L;;5;5;5;N;;;;; +111D6;SHARADA DIGIT SIX;Nd;0;L;;6;6;6;N;;;;; +111D7;SHARADA DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;; +111D8;SHARADA DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;; +111D9;SHARADA DIGIT NINE;Nd;0;L;;9;9;9;N;;;;; +11680;TAKRI LETTER A;Lo;0;L;;;;;N;;;;; +11681;TAKRI LETTER AA;Lo;0;L;;;;;N;;;;; +11682;TAKRI LETTER I;Lo;0;L;;;;;N;;;;; +11683;TAKRI LETTER II;Lo;0;L;;;;;N;;;;; +11684;TAKRI LETTER U;Lo;0;L;;;;;N;;;;; +11685;TAKRI LETTER UU;Lo;0;L;;;;;N;;;;; +11686;TAKRI LETTER E;Lo;0;L;;;;;N;;;;; +11687;TAKRI LETTER AI;Lo;0;L;;;;;N;;;;; +11688;TAKRI LETTER O;Lo;0;L;;;;;N;;;;; +11689;TAKRI LETTER AU;Lo;0;L;;;;;N;;;;; +1168A;TAKRI LETTER KA;Lo;0;L;;;;;N;;;;; +1168B;TAKRI LETTER KHA;Lo;0;L;;;;;N;;;;; +1168C;TAKRI LETTER GA;Lo;0;L;;;;;N;;;;; +1168D;TAKRI LETTER GHA;Lo;0;L;;;;;N;;;;; +1168E;TAKRI LETTER NGA;Lo;0;L;;;;;N;;;;; +1168F;TAKRI LETTER CA;Lo;0;L;;;;;N;;;;; +11690;TAKRI LETTER CHA;Lo;0;L;;;;;N;;;;; +11691;TAKRI LETTER JA;Lo;0;L;;;;;N;;;;; +11692;TAKRI LETTER JHA;Lo;0;L;;;;;N;;;;; +11693;TAKRI LETTER NYA;Lo;0;L;;;;;N;;;;; +11694;TAKRI LETTER TTA;Lo;0;L;;;;;N;;;;; +11695;TAKRI LETTER TTHA;Lo;0;L;;;;;N;;;;; +11696;TAKRI LETTER DDA;Lo;0;L;;;;;N;;;;; +11697;TAKRI LETTER DDHA;Lo;0;L;;;;;N;;;;; +11698;TAKRI LETTER NNA;Lo;0;L;;;;;N;;;;; +11699;TAKRI LETTER TA;Lo;0;L;;;;;N;;;;; +1169A;TAKRI LETTER THA;Lo;0;L;;;;;N;;;;; +1169B;TAKRI LETTER DA;Lo;0;L;;;;;N;;;;; +1169C;TAKRI LETTER DHA;Lo;0;L;;;;;N;;;;; +1169D;TAKRI LETTER NA;Lo;0;L;;;;;N;;;;; +1169E;TAKRI LETTER PA;Lo;0;L;;;;;N;;;;; +1169F;TAKRI LETTER PHA;Lo;0;L;;;;;N;;;;; +116A0;TAKRI LETTER BA;Lo;0;L;;;;;N;;;;; +116A1;TAKRI LETTER BHA;Lo;0;L;;;;;N;;;;; +116A2;TAKRI LETTER MA;Lo;0;L;;;;;N;;;;; +116A3;TAKRI LETTER YA;Lo;0;L;;;;;N;;;;; +116A4;TAKRI LETTER RA;Lo;0;L;;;;;N;;;;; +116A5;TAKRI LETTER LA;Lo;0;L;;;;;N;;;;; +116A6;TAKRI LETTER VA;Lo;0;L;;;;;N;;;;; +116A7;TAKRI LETTER SHA;Lo;0;L;;;;;N;;;;; +116A8;TAKRI LETTER SA;Lo;0;L;;;;;N;;;;; +116A9;TAKRI LETTER HA;Lo;0;L;;;;;N;;;;; +116AA;TAKRI LETTER RRA;Lo;0;L;;;;;N;;;;; +116AB;TAKRI SIGN ANUSVARA;Mn;0;NSM;;;;;N;;;;; +116AC;TAKRI SIGN VISARGA;Mc;0;L;;;;;N;;;;; +116AD;TAKRI VOWEL SIGN AA;Mn;0;NSM;;;;;N;;;;; +116AE;TAKRI VOWEL SIGN I;Mc;0;L;;;;;N;;;;; +116AF;TAKRI VOWEL SIGN II;Mc;0;L;;;;;N;;;;; +116B0;TAKRI VOWEL SIGN U;Mn;0;NSM;;;;;N;;;;; +116B1;TAKRI VOWEL SIGN UU;Mn;0;NSM;;;;;N;;;;; +116B2;TAKRI VOWEL SIGN E;Mn;0;NSM;;;;;N;;;;; +116B3;TAKRI VOWEL SIGN AI;Mn;0;NSM;;;;;N;;;;; +116B4;TAKRI VOWEL SIGN O;Mn;0;NSM;;;;;N;;;;; +116B5;TAKRI VOWEL SIGN AU;Mn;0;NSM;;;;;N;;;;; +116B6;TAKRI SIGN VIRAMA;Mc;9;L;;;;;N;;;;; +116B7;TAKRI SIGN NUKTA;Mn;7;NSM;;;;;N;;;;; +116C0;TAKRI DIGIT ZERO;Nd;0;L;;0;0;0;N;;;;; +116C1;TAKRI DIGIT ONE;Nd;0;L;;1;1;1;N;;;;; +116C2;TAKRI DIGIT TWO;Nd;0;L;;2;2;2;N;;;;; +116C3;TAKRI DIGIT THREE;Nd;0;L;;3;3;3;N;;;;; +116C4;TAKRI DIGIT FOUR;Nd;0;L;;4;4;4;N;;;;; +116C5;TAKRI DIGIT FIVE;Nd;0;L;;5;5;5;N;;;;; +116C6;TAKRI DIGIT SIX;Nd;0;L;;6;6;6;N;;;;; +116C7;TAKRI DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;; +116C8;TAKRI DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;; +116C9;TAKRI DIGIT NINE;Nd;0;L;;9;9;9;N;;;;; 12000;CUNEIFORM SIGN A;Lo;0;L;;;;;N;;;;; 12001;CUNEIFORM SIGN A TIMES A;Lo;0;L;;;;;N;;;;; 12002;CUNEIFORM SIGN A TIMES BAD;Lo;0;L;;;;;N;;;;; @@ -19960,6 +20396,139 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 16A36;BAMUM LETTER PHASE-F KPA;Lo;0;L;;;;;N;;;;; 16A37;BAMUM LETTER PHASE-F SAMBA;Lo;0;L;;;;;N;;;;; 16A38;BAMUM LETTER PHASE-F VUEQ;Lo;0;L;;;;;N;;;;; +16F00;MIAO LETTER PA;Lo;0;L;;;;;N;;;;; +16F01;MIAO LETTER BA;Lo;0;L;;;;;N;;;;; +16F02;MIAO LETTER YI PA;Lo;0;L;;;;;N;;;;; +16F03;MIAO LETTER PLA;Lo;0;L;;;;;N;;;;; +16F04;MIAO LETTER MA;Lo;0;L;;;;;N;;;;; +16F05;MIAO LETTER MHA;Lo;0;L;;;;;N;;;;; +16F06;MIAO LETTER ARCHAIC MA;Lo;0;L;;;;;N;;;;; +16F07;MIAO LETTER FA;Lo;0;L;;;;;N;;;;; +16F08;MIAO LETTER VA;Lo;0;L;;;;;N;;;;; +16F09;MIAO LETTER VFA;Lo;0;L;;;;;N;;;;; +16F0A;MIAO LETTER TA;Lo;0;L;;;;;N;;;;; +16F0B;MIAO LETTER DA;Lo;0;L;;;;;N;;;;; +16F0C;MIAO LETTER YI TTA;Lo;0;L;;;;;N;;;;; +16F0D;MIAO LETTER YI TA;Lo;0;L;;;;;N;;;;; +16F0E;MIAO LETTER TTA;Lo;0;L;;;;;N;;;;; +16F0F;MIAO LETTER DDA;Lo;0;L;;;;;N;;;;; +16F10;MIAO LETTER NA;Lo;0;L;;;;;N;;;;; +16F11;MIAO LETTER NHA;Lo;0;L;;;;;N;;;;; +16F12;MIAO LETTER YI NNA;Lo;0;L;;;;;N;;;;; +16F13;MIAO LETTER ARCHAIC NA;Lo;0;L;;;;;N;;;;; +16F14;MIAO LETTER NNA;Lo;0;L;;;;;N;;;;; +16F15;MIAO LETTER NNHA;Lo;0;L;;;;;N;;;;; +16F16;MIAO LETTER LA;Lo;0;L;;;;;N;;;;; +16F17;MIAO LETTER LYA;Lo;0;L;;;;;N;;;;; +16F18;MIAO LETTER LHA;Lo;0;L;;;;;N;;;;; +16F19;MIAO LETTER LHYA;Lo;0;L;;;;;N;;;;; +16F1A;MIAO LETTER TLHA;Lo;0;L;;;;;N;;;;; +16F1B;MIAO LETTER DLHA;Lo;0;L;;;;;N;;;;; +16F1C;MIAO LETTER TLHYA;Lo;0;L;;;;;N;;;;; +16F1D;MIAO LETTER DLHYA;Lo;0;L;;;;;N;;;;; +16F1E;MIAO LETTER KA;Lo;0;L;;;;;N;;;;; +16F1F;MIAO LETTER GA;Lo;0;L;;;;;N;;;;; +16F20;MIAO LETTER YI KA;Lo;0;L;;;;;N;;;;; +16F21;MIAO LETTER QA;Lo;0;L;;;;;N;;;;; +16F22;MIAO LETTER QGA;Lo;0;L;;;;;N;;;;; +16F23;MIAO LETTER NGA;Lo;0;L;;;;;N;;;;; +16F24;MIAO LETTER NGHA;Lo;0;L;;;;;N;;;;; +16F25;MIAO LETTER ARCHAIC NGA;Lo;0;L;;;;;N;;;;; +16F26;MIAO LETTER HA;Lo;0;L;;;;;N;;;;; +16F27;MIAO LETTER XA;Lo;0;L;;;;;N;;;;; +16F28;MIAO LETTER GHA;Lo;0;L;;;;;N;;;;; +16F29;MIAO LETTER GHHA;Lo;0;L;;;;;N;;;;; +16F2A;MIAO LETTER TSSA;Lo;0;L;;;;;N;;;;; +16F2B;MIAO LETTER DZZA;Lo;0;L;;;;;N;;;;; +16F2C;MIAO LETTER NYA;Lo;0;L;;;;;N;;;;; +16F2D;MIAO LETTER NYHA;Lo;0;L;;;;;N;;;;; +16F2E;MIAO LETTER TSHA;Lo;0;L;;;;;N;;;;; +16F2F;MIAO LETTER DZHA;Lo;0;L;;;;;N;;;;; +16F30;MIAO LETTER YI TSHA;Lo;0;L;;;;;N;;;;; +16F31;MIAO LETTER YI DZHA;Lo;0;L;;;;;N;;;;; +16F32;MIAO LETTER REFORMED TSHA;Lo;0;L;;;;;N;;;;; +16F33;MIAO LETTER SHA;Lo;0;L;;;;;N;;;;; +16F34;MIAO LETTER SSA;Lo;0;L;;;;;N;;;;; +16F35;MIAO LETTER ZHA;Lo;0;L;;;;;N;;;;; +16F36;MIAO LETTER ZSHA;Lo;0;L;;;;;N;;;;; +16F37;MIAO LETTER TSA;Lo;0;L;;;;;N;;;;; +16F38;MIAO LETTER DZA;Lo;0;L;;;;;N;;;;; +16F39;MIAO LETTER YI TSA;Lo;0;L;;;;;N;;;;; +16F3A;MIAO LETTER SA;Lo;0;L;;;;;N;;;;; +16F3B;MIAO LETTER ZA;Lo;0;L;;;;;N;;;;; +16F3C;MIAO LETTER ZSA;Lo;0;L;;;;;N;;;;; +16F3D;MIAO LETTER ZZA;Lo;0;L;;;;;N;;;;; +16F3E;MIAO LETTER ZZSA;Lo;0;L;;;;;N;;;;; +16F3F;MIAO LETTER ARCHAIC ZZA;Lo;0;L;;;;;N;;;;; +16F40;MIAO LETTER ZZYA;Lo;0;L;;;;;N;;;;; +16F41;MIAO LETTER ZZSYA;Lo;0;L;;;;;N;;;;; +16F42;MIAO LETTER WA;Lo;0;L;;;;;N;;;;; +16F43;MIAO LETTER AH;Lo;0;L;;;;;N;;;;; +16F44;MIAO LETTER HHA;Lo;0;L;;;;;N;;;;; +16F50;MIAO LETTER NASALIZATION;Lo;0;L;;;;;N;;;;; +16F51;MIAO SIGN ASPIRATION;Mc;0;L;;;;;N;;;;; +16F52;MIAO SIGN REFORMED VOICING;Mc;0;L;;;;;N;;;;; +16F53;MIAO SIGN REFORMED ASPIRATION;Mc;0;L;;;;;N;;;;; +16F54;MIAO VOWEL SIGN A;Mc;0;L;;;;;N;;;;; +16F55;MIAO VOWEL SIGN AA;Mc;0;L;;;;;N;;;;; +16F56;MIAO VOWEL SIGN AHH;Mc;0;L;;;;;N;;;;; +16F57;MIAO VOWEL SIGN AN;Mc;0;L;;;;;N;;;;; +16F58;MIAO VOWEL SIGN ANG;Mc;0;L;;;;;N;;;;; +16F59;MIAO VOWEL SIGN O;Mc;0;L;;;;;N;;;;; +16F5A;MIAO VOWEL SIGN OO;Mc;0;L;;;;;N;;;;; +16F5B;MIAO VOWEL SIGN WO;Mc;0;L;;;;;N;;;;; +16F5C;MIAO VOWEL SIGN W;Mc;0;L;;;;;N;;;;; +16F5D;MIAO VOWEL SIGN E;Mc;0;L;;;;;N;;;;; +16F5E;MIAO VOWEL SIGN EN;Mc;0;L;;;;;N;;;;; +16F5F;MIAO VOWEL SIGN ENG;Mc;0;L;;;;;N;;;;; +16F60;MIAO VOWEL SIGN OEY;Mc;0;L;;;;;N;;;;; +16F61;MIAO VOWEL SIGN I;Mc;0;L;;;;;N;;;;; +16F62;MIAO VOWEL SIGN IA;Mc;0;L;;;;;N;;;;; +16F63;MIAO VOWEL SIGN IAN;Mc;0;L;;;;;N;;;;; +16F64;MIAO VOWEL SIGN IANG;Mc;0;L;;;;;N;;;;; +16F65;MIAO VOWEL SIGN IO;Mc;0;L;;;;;N;;;;; +16F66;MIAO VOWEL SIGN IE;Mc;0;L;;;;;N;;;;; +16F67;MIAO VOWEL SIGN II;Mc;0;L;;;;;N;;;;; +16F68;MIAO VOWEL SIGN IU;Mc;0;L;;;;;N;;;;; +16F69;MIAO VOWEL SIGN ING;Mc;0;L;;;;;N;;;;; +16F6A;MIAO VOWEL SIGN U;Mc;0;L;;;;;N;;;;; +16F6B;MIAO VOWEL SIGN UA;Mc;0;L;;;;;N;;;;; +16F6C;MIAO VOWEL SIGN UAN;Mc;0;L;;;;;N;;;;; +16F6D;MIAO VOWEL SIGN UANG;Mc;0;L;;;;;N;;;;; +16F6E;MIAO VOWEL SIGN UU;Mc;0;L;;;;;N;;;;; +16F6F;MIAO VOWEL SIGN UEI;Mc;0;L;;;;;N;;;;; +16F70;MIAO VOWEL SIGN UNG;Mc;0;L;;;;;N;;;;; +16F71;MIAO VOWEL SIGN Y;Mc;0;L;;;;;N;;;;; +16F72;MIAO VOWEL SIGN YI;Mc;0;L;;;;;N;;;;; +16F73;MIAO VOWEL SIGN AE;Mc;0;L;;;;;N;;;;; +16F74;MIAO VOWEL SIGN AEE;Mc;0;L;;;;;N;;;;; +16F75;MIAO VOWEL SIGN ERR;Mc;0;L;;;;;N;;;;; +16F76;MIAO VOWEL SIGN ROUNDED ERR;Mc;0;L;;;;;N;;;;; +16F77;MIAO VOWEL SIGN ER;Mc;0;L;;;;;N;;;;; +16F78;MIAO VOWEL SIGN ROUNDED ER;Mc;0;L;;;;;N;;;;; +16F79;MIAO VOWEL SIGN AI;Mc;0;L;;;;;N;;;;; +16F7A;MIAO VOWEL SIGN EI;Mc;0;L;;;;;N;;;;; +16F7B;MIAO VOWEL SIGN AU;Mc;0;L;;;;;N;;;;; +16F7C;MIAO VOWEL SIGN OU;Mc;0;L;;;;;N;;;;; +16F7D;MIAO VOWEL SIGN N;Mc;0;L;;;;;N;;;;; +16F7E;MIAO VOWEL SIGN NG;Mc;0;L;;;;;N;;;;; +16F8F;MIAO TONE RIGHT;Mn;0;NSM;;;;;N;;;;; +16F90;MIAO TONE TOP RIGHT;Mn;0;NSM;;;;;N;;;;; +16F91;MIAO TONE ABOVE;Mn;0;NSM;;;;;N;;;;; +16F92;MIAO TONE BELOW;Mn;0;NSM;;;;;N;;;;; +16F93;MIAO LETTER TONE-2;Lm;0;L;;;;;N;;;;; +16F94;MIAO LETTER TONE-3;Lm;0;L;;;;;N;;;;; +16F95;MIAO LETTER TONE-4;Lm;0;L;;;;;N;;;;; +16F96;MIAO LETTER TONE-5;Lm;0;L;;;;;N;;;;; +16F97;MIAO LETTER TONE-6;Lm;0;L;;;;;N;;;;; +16F98;MIAO LETTER TONE-7;Lm;0;L;;;;;N;;;;; +16F99;MIAO LETTER TONE-8;Lm;0;L;;;;;N;;;;; +16F9A;MIAO LETTER REFORMED TONE-1;Lm;0;L;;;;;N;;;;; +16F9B;MIAO LETTER REFORMED TONE-2;Lm;0;L;;;;;N;;;;; +16F9C;MIAO LETTER REFORMED TONE-4;Lm;0;L;;;;;N;;;;; +16F9D;MIAO LETTER REFORMED TONE-5;Lm;0;L;;;;;N;;;;; +16F9E;MIAO LETTER REFORMED TONE-6;Lm;0;L;;;;;N;;;;; +16F9F;MIAO LETTER REFORMED TONE-8;Lm;0;L;;;;;N;;;;; 1B000;KATAKANA LETTER ARCHAIC E;Lo;0;L;;;;;N;;;;; 1B001;HIRAGANA LETTER ARCHAIC YE;Lo;0;L;;;;;N;;;;; 1D000;BYZANTINE MUSICAL SYMBOL PSILI;So;0;L;;;;;N;;;;; @@ -21599,6 +22168,149 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1D7FD;MATHEMATICAL MONOSPACE DIGIT SEVEN;Nd;0;EN;<font> 0037;7;7;7;N;;;;; 1D7FE;MATHEMATICAL MONOSPACE DIGIT EIGHT;Nd;0;EN;<font> 0038;8;8;8;N;;;;; 1D7FF;MATHEMATICAL MONOSPACE DIGIT NINE;Nd;0;EN;<font> 0039;9;9;9;N;;;;; +1EE00;ARABIC MATHEMATICAL ALEF;Lo;0;AL;<font> 0627;;;;N;;;;; +1EE01;ARABIC MATHEMATICAL BEH;Lo;0;AL;<font> 0628;;;;N;;;;; +1EE02;ARABIC MATHEMATICAL JEEM;Lo;0;AL;<font> 062C;;;;N;;;;; +1EE03;ARABIC MATHEMATICAL DAL;Lo;0;AL;<font> 062F;;;;N;;;;; +1EE05;ARABIC MATHEMATICAL WAW;Lo;0;AL;<font> 0648;;;;N;;;;; +1EE06;ARABIC MATHEMATICAL ZAIN;Lo;0;AL;<font> 0632;;;;N;;;;; +1EE07;ARABIC MATHEMATICAL HAH;Lo;0;AL;<font> 062D;;;;N;;;;; +1EE08;ARABIC MATHEMATICAL TAH;Lo;0;AL;<font> 0637;;;;N;;;;; +1EE09;ARABIC MATHEMATICAL YEH;Lo;0;AL;<font> 064A;;;;N;;;;; +1EE0A;ARABIC MATHEMATICAL KAF;Lo;0;AL;<font> 0643;;;;N;;;;; +1EE0B;ARABIC MATHEMATICAL LAM;Lo;0;AL;<font> 0644;;;;N;;;;; +1EE0C;ARABIC MATHEMATICAL MEEM;Lo;0;AL;<font> 0645;;;;N;;;;; +1EE0D;ARABIC MATHEMATICAL NOON;Lo;0;AL;<font> 0646;;;;N;;;;; +1EE0E;ARABIC MATHEMATICAL SEEN;Lo;0;AL;<font> 0633;;;;N;;;;; +1EE0F;ARABIC MATHEMATICAL AIN;Lo;0;AL;<font> 0639;;;;N;;;;; +1EE10;ARABIC MATHEMATICAL FEH;Lo;0;AL;<font> 0641;;;;N;;;;; +1EE11;ARABIC MATHEMATICAL SAD;Lo;0;AL;<font> 0635;;;;N;;;;; +1EE12;ARABIC MATHEMATICAL QAF;Lo;0;AL;<font> 0642;;;;N;;;;; +1EE13;ARABIC MATHEMATICAL REH;Lo;0;AL;<font> 0631;;;;N;;;;; +1EE14;ARABIC MATHEMATICAL SHEEN;Lo;0;AL;<font> 0634;;;;N;;;;; +1EE15;ARABIC MATHEMATICAL TEH;Lo;0;AL;<font> 062A;;;;N;;;;; +1EE16;ARABIC MATHEMATICAL THEH;Lo;0;AL;<font> 062B;;;;N;;;;; +1EE17;ARABIC MATHEMATICAL KHAH;Lo;0;AL;<font> 062E;;;;N;;;;; +1EE18;ARABIC MATHEMATICAL THAL;Lo;0;AL;<font> 0630;;;;N;;;;; +1EE19;ARABIC MATHEMATICAL DAD;Lo;0;AL;<font> 0636;;;;N;;;;; +1EE1A;ARABIC MATHEMATICAL ZAH;Lo;0;AL;<font> 0638;;;;N;;;;; +1EE1B;ARABIC MATHEMATICAL GHAIN;Lo;0;AL;<font> 063A;;;;N;;;;; +1EE1C;ARABIC MATHEMATICAL DOTLESS BEH;Lo;0;AL;<font> 066E;;;;N;;;;; +1EE1D;ARABIC MATHEMATICAL DOTLESS NOON;Lo;0;AL;<font> 06BA;;;;N;;;;; +1EE1E;ARABIC MATHEMATICAL DOTLESS FEH;Lo;0;AL;<font> 06A1;;;;N;;;;; +1EE1F;ARABIC MATHEMATICAL DOTLESS QAF;Lo;0;AL;<font> 066F;;;;N;;;;; +1EE21;ARABIC MATHEMATICAL INITIAL BEH;Lo;0;AL;<font> 0628;;;;N;;;;; +1EE22;ARABIC MATHEMATICAL INITIAL JEEM;Lo;0;AL;<font> 062C;;;;N;;;;; +1EE24;ARABIC MATHEMATICAL INITIAL HEH;Lo;0;AL;<font> 0647;;;;N;;;;; +1EE27;ARABIC MATHEMATICAL INITIAL HAH;Lo;0;AL;<font> 062D;;;;N;;;;; +1EE29;ARABIC MATHEMATICAL INITIAL YEH;Lo;0;AL;<font> 064A;;;;N;;;;; +1EE2A;ARABIC MATHEMATICAL INITIAL KAF;Lo;0;AL;<font> 0643;;;;N;;;;; +1EE2B;ARABIC MATHEMATICAL INITIAL LAM;Lo;0;AL;<font> 0644;;;;N;;;;; +1EE2C;ARABIC MATHEMATICAL INITIAL MEEM;Lo;0;AL;<font> 0645;;;;N;;;;; +1EE2D;ARABIC MATHEMATICAL INITIAL NOON;Lo;0;AL;<font> 0646;;;;N;;;;; +1EE2E;ARABIC MATHEMATICAL INITIAL SEEN;Lo;0;AL;<font> 0633;;;;N;;;;; +1EE2F;ARABIC MATHEMATICAL INITIAL AIN;Lo;0;AL;<font> 0639;;;;N;;;;; +1EE30;ARABIC MATHEMATICAL INITIAL FEH;Lo;0;AL;<font> 0641;;;;N;;;;; +1EE31;ARABIC MATHEMATICAL INITIAL SAD;Lo;0;AL;<font> 0635;;;;N;;;;; +1EE32;ARABIC MATHEMATICAL INITIAL QAF;Lo;0;AL;<font> 0642;;;;N;;;;; +1EE34;ARABIC MATHEMATICAL INITIAL SHEEN;Lo;0;AL;<font> 0634;;;;N;;;;; +1EE35;ARABIC MATHEMATICAL INITIAL TEH;Lo;0;AL;<font> 062A;;;;N;;;;; +1EE36;ARABIC MATHEMATICAL INITIAL THEH;Lo;0;AL;<font> 062B;;;;N;;;;; +1EE37;ARABIC MATHEMATICAL INITIAL KHAH;Lo;0;AL;<font> 062E;;;;N;;;;; +1EE39;ARABIC MATHEMATICAL INITIAL DAD;Lo;0;AL;<font> 0636;;;;N;;;;; +1EE3B;ARABIC MATHEMATICAL INITIAL GHAIN;Lo;0;AL;<font> 063A;;;;N;;;;; +1EE42;ARABIC MATHEMATICAL TAILED JEEM;Lo;0;AL;<font> 062C;;;;N;;;;; +1EE47;ARABIC MATHEMATICAL TAILED HAH;Lo;0;AL;<font> 062D;;;;N;;;;; +1EE49;ARABIC MATHEMATICAL TAILED YEH;Lo;0;AL;<font> 064A;;;;N;;;;; +1EE4B;ARABIC MATHEMATICAL TAILED LAM;Lo;0;AL;<font> 0644;;;;N;;;;; +1EE4D;ARABIC MATHEMATICAL TAILED NOON;Lo;0;AL;<font> 0646;;;;N;;;;; +1EE4E;ARABIC MATHEMATICAL TAILED SEEN;Lo;0;AL;<font> 0633;;;;N;;;;; +1EE4F;ARABIC MATHEMATICAL TAILED AIN;Lo;0;AL;<font> 0639;;;;N;;;;; +1EE51;ARABIC MATHEMATICAL TAILED SAD;Lo;0;AL;<font> 0635;;;;N;;;;; +1EE52;ARABIC MATHEMATICAL TAILED QAF;Lo;0;AL;<font> 0642;;;;N;;;;; +1EE54;ARABIC MATHEMATICAL TAILED SHEEN;Lo;0;AL;<font> 0634;;;;N;;;;; +1EE57;ARABIC MATHEMATICAL TAILED KHAH;Lo;0;AL;<font> 062E;;;;N;;;;; +1EE59;ARABIC MATHEMATICAL TAILED DAD;Lo;0;AL;<font> 0636;;;;N;;;;; +1EE5B;ARABIC MATHEMATICAL TAILED GHAIN;Lo;0;AL;<font> 063A;;;;N;;;;; +1EE5D;ARABIC MATHEMATICAL TAILED DOTLESS NOON;Lo;0;AL;<font> 06BA;;;;N;;;;; +1EE5F;ARABIC MATHEMATICAL TAILED DOTLESS QAF;Lo;0;AL;<font> 066F;;;;N;;;;; +1EE61;ARABIC MATHEMATICAL STRETCHED BEH;Lo;0;AL;<font> 0628;;;;N;;;;; +1EE62;ARABIC MATHEMATICAL STRETCHED JEEM;Lo;0;AL;<font> 062C;;;;N;;;;; +1EE64;ARABIC MATHEMATICAL STRETCHED HEH;Lo;0;AL;<font> 0647;;;;N;;;;; +1EE67;ARABIC MATHEMATICAL STRETCHED HAH;Lo;0;AL;<font> 062D;;;;N;;;;; +1EE68;ARABIC MATHEMATICAL STRETCHED TAH;Lo;0;AL;<font> 0637;;;;N;;;;; +1EE69;ARABIC MATHEMATICAL STRETCHED YEH;Lo;0;AL;<font> 064A;;;;N;;;;; +1EE6A;ARABIC MATHEMATICAL STRETCHED KAF;Lo;0;AL;<font> 0643;;;;N;;;;; +1EE6C;ARABIC MATHEMATICAL STRETCHED MEEM;Lo;0;AL;<font> 0645;;;;N;;;;; +1EE6D;ARABIC MATHEMATICAL STRETCHED NOON;Lo;0;AL;<font> 0646;;;;N;;;;; +1EE6E;ARABIC MATHEMATICAL STRETCHED SEEN;Lo;0;AL;<font> 0633;;;;N;;;;; +1EE6F;ARABIC MATHEMATICAL STRETCHED AIN;Lo;0;AL;<font> 0639;;;;N;;;;; +1EE70;ARABIC MATHEMATICAL STRETCHED FEH;Lo;0;AL;<font> 0641;;;;N;;;;; +1EE71;ARABIC MATHEMATICAL STRETCHED SAD;Lo;0;AL;<font> 0635;;;;N;;;;; +1EE72;ARABIC MATHEMATICAL STRETCHED QAF;Lo;0;AL;<font> 0642;;;;N;;;;; +1EE74;ARABIC MATHEMATICAL STRETCHED SHEEN;Lo;0;AL;<font> 0634;;;;N;;;;; +1EE75;ARABIC MATHEMATICAL STRETCHED TEH;Lo;0;AL;<font> 062A;;;;N;;;;; +1EE76;ARABIC MATHEMATICAL STRETCHED THEH;Lo;0;AL;<font> 062B;;;;N;;;;; +1EE77;ARABIC MATHEMATICAL STRETCHED KHAH;Lo;0;AL;<font> 062E;;;;N;;;;; +1EE79;ARABIC MATHEMATICAL STRETCHED DAD;Lo;0;AL;<font> 0636;;;;N;;;;; +1EE7A;ARABIC MATHEMATICAL STRETCHED ZAH;Lo;0;AL;<font> 0638;;;;N;;;;; +1EE7B;ARABIC MATHEMATICAL STRETCHED GHAIN;Lo;0;AL;<font> 063A;;;;N;;;;; +1EE7C;ARABIC MATHEMATICAL STRETCHED DOTLESS BEH;Lo;0;AL;<font> 066E;;;;N;;;;; +1EE7E;ARABIC MATHEMATICAL STRETCHED DOTLESS FEH;Lo;0;AL;<font> 06A1;;;;N;;;;; +1EE80;ARABIC MATHEMATICAL LOOPED ALEF;Lo;0;AL;<font> 0627;;;;N;;;;; +1EE81;ARABIC MATHEMATICAL LOOPED BEH;Lo;0;AL;<font> 0628;;;;N;;;;; +1EE82;ARABIC MATHEMATICAL LOOPED JEEM;Lo;0;AL;<font> 062C;;;;N;;;;; +1EE83;ARABIC MATHEMATICAL LOOPED DAL;Lo;0;AL;<font> 062F;;;;N;;;;; +1EE84;ARABIC MATHEMATICAL LOOPED HEH;Lo;0;AL;<font> 0647;;;;N;;;;; +1EE85;ARABIC MATHEMATICAL LOOPED WAW;Lo;0;AL;<font> 0648;;;;N;;;;; +1EE86;ARABIC MATHEMATICAL LOOPED ZAIN;Lo;0;AL;<font> 0632;;;;N;;;;; +1EE87;ARABIC MATHEMATICAL LOOPED HAH;Lo;0;AL;<font> 062D;;;;N;;;;; +1EE88;ARABIC MATHEMATICAL LOOPED TAH;Lo;0;AL;<font> 0637;;;;N;;;;; +1EE89;ARABIC MATHEMATICAL LOOPED YEH;Lo;0;AL;<font> 064A;;;;N;;;;; +1EE8B;ARABIC MATHEMATICAL LOOPED LAM;Lo;0;AL;<font> 0644;;;;N;;;;; +1EE8C;ARABIC MATHEMATICAL LOOPED MEEM;Lo;0;AL;<font> 0645;;;;N;;;;; +1EE8D;ARABIC MATHEMATICAL LOOPED NOON;Lo;0;AL;<font> 0646;;;;N;;;;; +1EE8E;ARABIC MATHEMATICAL LOOPED SEEN;Lo;0;AL;<font> 0633;;;;N;;;;; +1EE8F;ARABIC MATHEMATICAL LOOPED AIN;Lo;0;AL;<font> 0639;;;;N;;;;; +1EE90;ARABIC MATHEMATICAL LOOPED FEH;Lo;0;AL;<font> 0641;;;;N;;;;; +1EE91;ARABIC MATHEMATICAL LOOPED SAD;Lo;0;AL;<font> 0635;;;;N;;;;; +1EE92;ARABIC MATHEMATICAL LOOPED QAF;Lo;0;AL;<font> 0642;;;;N;;;;; +1EE93;ARABIC MATHEMATICAL LOOPED REH;Lo;0;AL;<font> 0631;;;;N;;;;; +1EE94;ARABIC MATHEMATICAL LOOPED SHEEN;Lo;0;AL;<font> 0634;;;;N;;;;; +1EE95;ARABIC MATHEMATICAL LOOPED TEH;Lo;0;AL;<font> 062A;;;;N;;;;; +1EE96;ARABIC MATHEMATICAL LOOPED THEH;Lo;0;AL;<font> 062B;;;;N;;;;; +1EE97;ARABIC MATHEMATICAL LOOPED KHAH;Lo;0;AL;<font> 062E;;;;N;;;;; +1EE98;ARABIC MATHEMATICAL LOOPED THAL;Lo;0;AL;<font> 0630;;;;N;;;;; +1EE99;ARABIC MATHEMATICAL LOOPED DAD;Lo;0;AL;<font> 0636;;;;N;;;;; +1EE9A;ARABIC MATHEMATICAL LOOPED ZAH;Lo;0;AL;<font> 0638;;;;N;;;;; +1EE9B;ARABIC MATHEMATICAL LOOPED GHAIN;Lo;0;AL;<font> 063A;;;;N;;;;; +1EEA1;ARABIC MATHEMATICAL DOUBLE-STRUCK BEH;Lo;0;AL;<font> 0628;;;;N;;;;; +1EEA2;ARABIC MATHEMATICAL DOUBLE-STRUCK JEEM;Lo;0;AL;<font> 062C;;;;N;;;;; +1EEA3;ARABIC MATHEMATICAL DOUBLE-STRUCK DAL;Lo;0;AL;<font> 062F;;;;N;;;;; +1EEA5;ARABIC MATHEMATICAL DOUBLE-STRUCK WAW;Lo;0;AL;<font> 0648;;;;N;;;;; +1EEA6;ARABIC MATHEMATICAL DOUBLE-STRUCK ZAIN;Lo;0;AL;<font> 0632;;;;N;;;;; +1EEA7;ARABIC MATHEMATICAL DOUBLE-STRUCK HAH;Lo;0;AL;<font> 062D;;;;N;;;;; +1EEA8;ARABIC MATHEMATICAL DOUBLE-STRUCK TAH;Lo;0;AL;<font> 0637;;;;N;;;;; +1EEA9;ARABIC MATHEMATICAL DOUBLE-STRUCK YEH;Lo;0;AL;<font> 064A;;;;N;;;;; +1EEAB;ARABIC MATHEMATICAL DOUBLE-STRUCK LAM;Lo;0;AL;<font> 0644;;;;N;;;;; +1EEAC;ARABIC MATHEMATICAL DOUBLE-STRUCK MEEM;Lo;0;AL;<font> 0645;;;;N;;;;; +1EEAD;ARABIC MATHEMATICAL DOUBLE-STRUCK NOON;Lo;0;AL;<font> 0646;;;;N;;;;; +1EEAE;ARABIC MATHEMATICAL DOUBLE-STRUCK SEEN;Lo;0;AL;<font> 0633;;;;N;;;;; +1EEAF;ARABIC MATHEMATICAL DOUBLE-STRUCK AIN;Lo;0;AL;<font> 0639;;;;N;;;;; +1EEB0;ARABIC MATHEMATICAL DOUBLE-STRUCK FEH;Lo;0;AL;<font> 0641;;;;N;;;;; +1EEB1;ARABIC MATHEMATICAL DOUBLE-STRUCK SAD;Lo;0;AL;<font> 0635;;;;N;;;;; +1EEB2;ARABIC MATHEMATICAL DOUBLE-STRUCK QAF;Lo;0;AL;<font> 0642;;;;N;;;;; +1EEB3;ARABIC MATHEMATICAL DOUBLE-STRUCK REH;Lo;0;AL;<font> 0631;;;;N;;;;; +1EEB4;ARABIC MATHEMATICAL DOUBLE-STRUCK SHEEN;Lo;0;AL;<font> 0634;;;;N;;;;; +1EEB5;ARABIC MATHEMATICAL DOUBLE-STRUCK TEH;Lo;0;AL;<font> 062A;;;;N;;;;; +1EEB6;ARABIC MATHEMATICAL DOUBLE-STRUCK THEH;Lo;0;AL;<font> 062B;;;;N;;;;; +1EEB7;ARABIC MATHEMATICAL DOUBLE-STRUCK KHAH;Lo;0;AL;<font> 062E;;;;N;;;;; +1EEB8;ARABIC MATHEMATICAL DOUBLE-STRUCK THAL;Lo;0;AL;<font> 0630;;;;N;;;;; +1EEB9;ARABIC MATHEMATICAL DOUBLE-STRUCK DAD;Lo;0;AL;<font> 0636;;;;N;;;;; +1EEBA;ARABIC MATHEMATICAL DOUBLE-STRUCK ZAH;Lo;0;AL;<font> 0638;;;;N;;;;; +1EEBB;ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN;Lo;0;AL;<font> 063A;;;;N;;;;; +1EEF0;ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL;Sm;0;ON;;;;;N;;;;; +1EEF1;ARABIC MATHEMATICAL OPERATOR HAH WITH DAL;Sm;0;ON;;;;;N;;;;; 1F000;MAHJONG TILE EAST WIND;So;0;ON;;;;;N;;;;; 1F001;MAHJONG TILE SOUTH WIND;So;0;ON;;;;;N;;;;; 1F002;MAHJONG TILE WEST WIND;So;0;ON;;;;;N;;;;; @@ -21902,6 +22614,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1F167;NEGATIVE CIRCLED LATIN CAPITAL LETTER X;So;0;L;;;;;N;;;;; 1F168;NEGATIVE CIRCLED LATIN CAPITAL LETTER Y;So;0;L;;;;;N;;;;; 1F169;NEGATIVE CIRCLED LATIN CAPITAL LETTER Z;So;0;L;;;;;N;;;;; +1F16A;RAISED MC SIGN;So;0;ON;<super> 004D 0043;;;;N;;;;; +1F16B;RAISED MD SIGN;So;0;ON;<super> 004D 0044;;;;N;;;;; 1F170;NEGATIVE SQUARED LATIN CAPITAL LETTER A;So;0;L;;;;;N;;;;; 1F171;NEGATIVE SQUARED LATIN CAPITAL LETTER B;So;0;L;;;;;N;;;;; 1F172;NEGATIVE SQUARED LATIN CAPITAL LETTER C;So;0;L;;;;;N;;;;; @@ -22354,7 +23068,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1F489;SYRINGE;So;0;ON;;;;;N;;;;; 1F48A;PILL;So;0;ON;;;;;N;;;;; 1F48B;KISS MARK;So;0;ON;;;;;N;;;;; -1F48C;LOVE LETTER;So;0;L;;;;;N;;;;; +1F48C;LOVE LETTER;So;0;ON;;;;;N;;;;; 1F48D;RING;So;0;ON;;;;;N;;;;; 1F48E;GEM STONE;So;0;ON;;;;;N;;;;; 1F48F;KISS;So;0;ON;;;;;N;;;;; @@ -22502,7 +23216,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1F521;INPUT SYMBOL FOR LATIN SMALL LETTERS;So;0;ON;;;;;N;;;;; 1F522;INPUT SYMBOL FOR NUMBERS;So;0;ON;;;;;N;;;;; 1F523;INPUT SYMBOL FOR SYMBOLS;So;0;ON;;;;;N;;;;; -1F524;INPUT SYMBOL FOR LATIN LETTERS;So;0;L;;;;;N;;;;; +1F524;INPUT SYMBOL FOR LATIN LETTERS;So;0;ON;;;;;N;;;;; 1F525;FIRE;So;0;ON;;;;;N;;;;; 1F526;ELECTRIC TORCH;So;0;ON;;;;;N;;;;; 1F527;WRENCH;So;0;ON;;;;;N;;;;; @@ -22528,6 +23242,10 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1F53B;DOWN-POINTING RED TRIANGLE;So;0;ON;;;;;N;;;;; 1F53C;UP-POINTING SMALL RED TRIANGLE;So;0;ON;;;;;N;;;;; 1F53D;DOWN-POINTING SMALL RED TRIANGLE;So;0;ON;;;;;N;;;;; +1F540;CIRCLED CROSS POMMEE;So;0;ON;;;;;N;;;;; +1F541;CROSS POMMEE WITH HALF-CIRCLE BELOW;So;0;ON;;;;;N;;;;; +1F542;CROSS POMMEE;So;0;ON;;;;;N;;;;; +1F543;NOTCHED LEFT SEMICIRCLE WITH THREE DOTS;So;0;ON;;;;;N;;;;; 1F550;CLOCK FACE ONE OCLOCK;So;0;ON;;;;;N;;;;; 1F551;CLOCK FACE TWO OCLOCK;So;0;ON;;;;;N;;;;; 1F552;CLOCK FACE THREE OCLOCK;So;0;ON;;;;;N;;;;; @@ -22557,6 +23275,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1F5FD;STATUE OF LIBERTY;So;0;ON;;;;;N;;;;; 1F5FE;SILHOUETTE OF JAPAN;So;0;ON;;;;;N;;;;; 1F5FF;MOYAI;So;0;ON;;;;;N;;;;; +1F600;GRINNING FACE;So;0;ON;;;;;N;;;;; 1F601;GRINNING FACE WITH SMILING EYES;So;0;ON;;;;;N;;;;; 1F602;FACE WITH TEARS OF JOY;So;0;ON;;;;;N;;;;; 1F603;SMILING FACE WITH OPEN MOUTH;So;0;ON;;;;;N;;;;; @@ -22573,30 +23292,42 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1F60E;SMILING FACE WITH SUNGLASSES;So;0;ON;;;;;N;;;;; 1F60F;SMIRKING FACE;So;0;ON;;;;;N;;;;; 1F610;NEUTRAL FACE;So;0;ON;;;;;N;;;;; +1F611;EXPRESSIONLESS FACE;So;0;ON;;;;;N;;;;; 1F612;UNAMUSED FACE;So;0;ON;;;;;N;;;;; 1F613;FACE WITH COLD SWEAT;So;0;ON;;;;;N;;;;; 1F614;PENSIVE FACE;So;0;ON;;;;;N;;;;; +1F615;CONFUSED FACE;So;0;ON;;;;;N;;;;; 1F616;CONFOUNDED FACE;So;0;ON;;;;;N;;;;; +1F617;KISSING FACE;So;0;ON;;;;;N;;;;; 1F618;FACE THROWING A KISS;So;0;ON;;;;;N;;;;; +1F619;KISSING FACE WITH SMILING EYES;So;0;ON;;;;;N;;;;; 1F61A;KISSING FACE WITH CLOSED EYES;So;0;ON;;;;;N;;;;; +1F61B;FACE WITH STUCK-OUT TONGUE;So;0;ON;;;;;N;;;;; 1F61C;FACE WITH STUCK-OUT TONGUE AND WINKING EYE;So;0;ON;;;;;N;;;;; 1F61D;FACE WITH STUCK-OUT TONGUE AND TIGHTLY-CLOSED EYES;So;0;ON;;;;;N;;;;; 1F61E;DISAPPOINTED FACE;So;0;ON;;;;;N;;;;; +1F61F;WORRIED FACE;So;0;ON;;;;;N;;;;; 1F620;ANGRY FACE;So;0;ON;;;;;N;;;;; 1F621;POUTING FACE;So;0;ON;;;;;N;;;;; 1F622;CRYING FACE;So;0;ON;;;;;N;;;;; 1F623;PERSEVERING FACE;So;0;ON;;;;;N;;;;; 1F624;FACE WITH LOOK OF TRIUMPH;So;0;ON;;;;;N;;;;; 1F625;DISAPPOINTED BUT RELIEVED FACE;So;0;ON;;;;;N;;;;; +1F626;FROWNING FACE WITH OPEN MOUTH;So;0;ON;;;;;N;;;;; +1F627;ANGUISHED FACE;So;0;ON;;;;;N;;;;; 1F628;FEARFUL FACE;So;0;ON;;;;;N;;;;; 1F629;WEARY FACE;So;0;ON;;;;;N;;;;; 1F62A;SLEEPY FACE;So;0;ON;;;;;N;;;;; 1F62B;TIRED FACE;So;0;ON;;;;;N;;;;; +1F62C;GRIMACING FACE;So;0;ON;;;;;N;;;;; 1F62D;LOUDLY CRYING FACE;So;0;ON;;;;;N;;;;; +1F62E;FACE WITH OPEN MOUTH;So;0;ON;;;;;N;;;;; +1F62F;HUSHED FACE;So;0;ON;;;;;N;;;;; 1F630;FACE WITH OPEN MOUTH AND COLD SWEAT;So;0;ON;;;;;N;;;;; 1F631;FACE SCREAMING IN FEAR;So;0;ON;;;;;N;;;;; 1F632;ASTONISHED FACE;So;0;ON;;;;;N;;;;; 1F633;FLUSHED FACE;So;0;ON;;;;;N;;;;; +1F634;SLEEPING FACE;So;0;ON;;;;;N;;;;; 1F635;DIZZY FACE;So;0;ON;;;;;N;;;;; 1F636;FACE WITHOUT MOUTH;So;0;ON;;;;;N;;;;; 1F637;FACE WITH MEDICAL MASK;So;0;ON;;;;;N;;;;; diff --git a/lib/unicore/auxiliary/GCBTest.txt b/lib/unicore/auxiliary/GCBTest.txt index 8f45e56cf6..33b859cbff 100644 --- a/lib/unicore/auxiliary/GCBTest.txt +++ b/lib/unicore/auxiliary/GCBTest.txt @@ -1,8 +1,8 @@ -# GraphemeBreakTest-6.0.0.txt -# Date: 2010-05-18, 00:49:27 GMT [MD] +# GraphemeBreakTest-6.1.0.txt +# Date: 2011-12-07, 17:54:39 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # @@ -30,8 +30,6 @@ ÷ 0020 × 0308 ÷ 0001 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] ÷ 0020 × 0300 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] ÷ 0020 × 0308 × 0300 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] -÷ 0020 ÷ 0E40 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] -÷ 0020 × 0308 ÷ 0E40 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] ÷ 0020 × 0903 ÷ # ÷ [0.2] SPACE (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 0020 × 0308 × 0903 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 0020 ÷ 1100 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] @@ -44,6 +42,10 @@ ÷ 0020 × 0308 ÷ AC00 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 0020 ÷ AC01 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 0020 × 0308 ÷ AC01 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0020 ÷ 0378 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 0020 × 0308 ÷ 0378 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 0020 ÷ D800 ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] +÷ 0020 × 0308 ÷ D800 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] ÷ 000D ÷ 0020 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] SPACE (Other) ÷ [0.3] ÷ 000D ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ 000D ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] @@ -54,8 +56,6 @@ ÷ 000D ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] ÷ 000D ÷ 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] ÷ 000D ÷ 0308 × 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] -÷ 000D ÷ 0E40 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] -÷ 000D ÷ 0308 ÷ 0E40 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] ÷ 000D ÷ 0903 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 000D ÷ 0308 × 0903 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 000D ÷ 1100 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] @@ -68,6 +68,10 @@ ÷ 000D ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 000D ÷ AC01 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 000D ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 000D ÷ 0378 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 000D ÷ D800 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <surrogate-D800> (Control) ÷ [0.3] +÷ 000D ÷ 0308 ÷ D800 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] ÷ 000A ÷ 0020 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] SPACE (Other) ÷ [0.3] ÷ 000A ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ 000A ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] @@ -78,8 +82,6 @@ ÷ 000A ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] ÷ 000A ÷ 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] ÷ 000A ÷ 0308 × 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] -÷ 000A ÷ 0E40 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] -÷ 000A ÷ 0308 ÷ 0E40 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] ÷ 000A ÷ 0903 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 000A ÷ 0308 × 0903 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 000A ÷ 1100 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] @@ -92,6 +94,10 @@ ÷ 000A ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 000A ÷ AC01 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 000A ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 000A ÷ 0378 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 000A ÷ D800 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <surrogate-D800> (Control) ÷ [0.3] +÷ 000A ÷ 0308 ÷ D800 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] ÷ 0001 ÷ 0020 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] SPACE (Other) ÷ [0.3] ÷ 0001 ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ 0001 ÷ 000D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] @@ -102,8 +108,6 @@ ÷ 0001 ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] ÷ 0001 ÷ 0300 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] ÷ 0001 ÷ 0308 × 0300 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] -÷ 0001 ÷ 0E40 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] -÷ 0001 ÷ 0308 ÷ 0E40 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] ÷ 0001 ÷ 0903 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 0001 ÷ 0308 × 0903 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 0001 ÷ 1100 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] @@ -116,6 +120,10 @@ ÷ 0001 ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 0001 ÷ AC01 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 0001 ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0001 ÷ 0378 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 0001 ÷ D800 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <surrogate-D800> (Control) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ D800 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] ÷ 0300 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ 0300 × 0308 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ 0300 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] @@ -126,8 +134,6 @@ ÷ 0300 × 0308 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] ÷ 0300 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] ÷ 0300 × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] -÷ 0300 ÷ 0E40 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] -÷ 0300 × 0308 ÷ 0E40 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] ÷ 0300 × 0903 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 0300 × 0308 × 0903 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 0300 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] @@ -140,30 +146,10 @@ ÷ 0300 × 0308 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 0300 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 0300 × 0308 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] -÷ 0E40 × 0020 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.2] SPACE (Other) ÷ [0.3] -÷ 0E40 × 0308 ÷ 0020 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] -÷ 0E40 ÷ 000D ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] -÷ 0E40 × 0308 ÷ 000D ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] -÷ 0E40 ÷ 000A ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] -÷ 0E40 × 0308 ÷ 000A ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] -÷ 0E40 ÷ 0001 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] -÷ 0E40 × 0308 ÷ 0001 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] -÷ 0E40 × 0300 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] -÷ 0E40 × 0308 × 0300 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] -÷ 0E40 × 0E40 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.2] THAI CHARACTER SARA E (Prepend) ÷ [0.3] -÷ 0E40 × 0308 ÷ 0E40 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] -÷ 0E40 × 0903 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] -÷ 0E40 × 0308 × 0903 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] -÷ 0E40 × 1100 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.2] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] -÷ 0E40 × 0308 ÷ 1100 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] -÷ 0E40 × 1160 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.2] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] -÷ 0E40 × 0308 ÷ 1160 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] -÷ 0E40 × 11A8 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.2] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] -÷ 0E40 × 0308 ÷ 11A8 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] -÷ 0E40 × AC00 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.2] HANGUL SYLLABLE GA (LV) ÷ [0.3] -÷ 0E40 × 0308 ÷ AC00 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] -÷ 0E40 × AC01 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.2] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] -÷ 0E40 × 0308 ÷ AC01 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0300 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 0300 × 0308 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 0300 ÷ D800 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] +÷ 0300 × 0308 ÷ D800 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] ÷ 0903 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ 0903 × 0308 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ 0903 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] @@ -174,8 +160,6 @@ ÷ 0903 × 0308 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] ÷ 0903 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] ÷ 0903 × 0308 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] -÷ 0903 ÷ 0E40 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] -÷ 0903 × 0308 ÷ 0E40 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] ÷ 0903 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 0903 × 0308 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 0903 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] @@ -188,6 +172,10 @@ ÷ 0903 × 0308 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 0903 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 0903 × 0308 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0903 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 0903 × 0308 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 0903 ÷ D800 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] +÷ 0903 × 0308 ÷ D800 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] ÷ 1100 ÷ 0020 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ 1100 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ 1100 ÷ 000D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] @@ -198,8 +186,6 @@ ÷ 1100 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] ÷ 1100 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] ÷ 1100 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] -÷ 1100 ÷ 0E40 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] -÷ 1100 × 0308 ÷ 0E40 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] ÷ 1100 × 0903 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 1100 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 1100 × 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] @@ -212,6 +198,10 @@ ÷ 1100 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 1100 × AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 1100 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 1100 ÷ 0378 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 1100 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 1100 ÷ D800 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] +÷ 1100 × 0308 ÷ D800 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] ÷ 1160 ÷ 0020 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ 1160 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ 1160 ÷ 000D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] @@ -222,8 +212,6 @@ ÷ 1160 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] ÷ 1160 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] ÷ 1160 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] -÷ 1160 ÷ 0E40 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] -÷ 1160 × 0308 ÷ 0E40 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] ÷ 1160 × 0903 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 1160 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 1160 ÷ 1100 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] @@ -236,6 +224,10 @@ ÷ 1160 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 1160 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 1160 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 1160 ÷ 0378 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 1160 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 1160 ÷ D800 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] +÷ 1160 × 0308 ÷ D800 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] ÷ 11A8 ÷ 0020 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ 11A8 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ 11A8 ÷ 000D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] @@ -246,8 +238,6 @@ ÷ 11A8 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] ÷ 11A8 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] ÷ 11A8 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] -÷ 11A8 ÷ 0E40 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] -÷ 11A8 × 0308 ÷ 0E40 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] ÷ 11A8 × 0903 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 11A8 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] @@ -260,6 +250,10 @@ ÷ 11A8 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 11A8 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 11A8 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 11A8 ÷ 0378 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 11A8 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 11A8 ÷ D800 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] +÷ 11A8 × 0308 ÷ D800 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] ÷ AC00 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ AC00 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ AC00 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] @@ -270,8 +264,6 @@ ÷ AC00 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] ÷ AC00 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] ÷ AC00 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] -÷ AC00 ÷ 0E40 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] -÷ AC00 × 0308 ÷ 0E40 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] ÷ AC00 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ AC00 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ AC00 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] @@ -284,6 +276,10 @@ ÷ AC00 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ AC00 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ AC00 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ AC00 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ AC00 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ AC00 ÷ D800 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] +÷ AC00 × 0308 ÷ D800 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] ÷ AC01 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ AC01 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ AC01 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] @@ -294,8 +290,6 @@ ÷ AC01 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] ÷ AC01 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] ÷ AC01 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] -÷ AC01 ÷ 0E40 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] -÷ AC01 × 0308 ÷ 0E40 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] ÷ AC01 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ AC01 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ AC01 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] @@ -308,4 +302,60 @@ ÷ AC01 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ AC01 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ AC01 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] -# Lines: 288 +÷ AC01 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ AC01 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ AC01 ÷ D800 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] +÷ AC01 × 0308 ÷ D800 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] +÷ 0378 ÷ 0020 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0378 × 0308 ÷ 0020 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0378 ÷ 000D ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0378 × 0308 ÷ 000D ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0378 ÷ 000A ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0378 × 0308 ÷ 000A ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0378 ÷ 0001 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 0378 × 0308 ÷ 0001 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 0378 × 0300 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] +÷ 0378 × 0308 × 0300 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] +÷ 0378 × 0903 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0378 × 0308 × 0903 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0378 ÷ 1100 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0378 × 0308 ÷ 1100 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0378 ÷ 1160 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0378 × 0308 ÷ 1160 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0378 ÷ 11A8 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0378 × 0308 ÷ 11A8 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0378 ÷ AC00 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0378 × 0308 ÷ AC00 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0378 ÷ AC01 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0378 × 0308 ÷ AC01 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0378 ÷ 0378 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 0378 × 0308 ÷ 0378 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 0378 ÷ D800 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] +÷ 0378 × 0308 ÷ D800 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] +÷ D800 ÷ 0020 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] SPACE (Other) ÷ [0.3] +÷ D800 ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ D800 ÷ 000D ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ D800 ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ D800 ÷ 000A ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ D800 ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ D800 ÷ 0001 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] <START OF HEADING> (Control) ÷ [0.3] +÷ D800 ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ D800 ÷ 0300 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] +÷ D800 ÷ 0308 × 0300 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] +÷ D800 ÷ 0903 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ D800 ÷ 0308 × 0903 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ D800 ÷ 1100 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ D800 ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ D800 ÷ 1160 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ D800 ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ D800 ÷ 11A8 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ D800 ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ D800 ÷ AC00 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ D800 ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ D800 ÷ AC01 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ D800 ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ D800 ÷ 0378 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3] +÷ D800 ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ D800 ÷ D800 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] <surrogate-D800> (Control) ÷ [0.3] +÷ D800 ÷ 0308 ÷ D800 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] +# Lines: 338 diff --git a/lib/unicore/auxiliary/GraphemeBreakProperty.txt b/lib/unicore/auxiliary/GraphemeBreakProperty.txt index 0432515a00..d3f480da59 100644 --- a/lib/unicore/auxiliary/GraphemeBreakProperty.txt +++ b/lib/unicore/auxiliary/GraphemeBreakProperty.txt @@ -1,8 +1,8 @@ -# GraphemeBreakProperty-6.0.0.txt -# Date: 2010-09-01, 18:48:17 GMT [MD] +# GraphemeBreakProperty-6.1.0.txt +# Date: 2011-12-05, 16:44:15 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -34,25 +34,31 @@ 000E..001F ; Control # Cc [18] <control-000E>..<control-001F> 007F..009F ; Control # Cc [33] <control-007F>..<control-009F> 00AD ; Control # Cf SOFT HYPHEN -0600..0603 ; Control # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA +0600..0604 ; Control # Cf [5] ARABIC NUMBER SIGN..ARABIC SIGN SAMVAT 06DD ; Control # Cf ARABIC END OF AYAH 070F ; Control # Cf SYRIAC ABBREVIATION MARK -17B4..17B5 ; Control # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 200B ; Control # Cf ZERO WIDTH SPACE 200E..200F ; Control # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK 2028 ; Control # Zl LINE SEPARATOR 2029 ; Control # Zp PARAGRAPH SEPARATOR 202A..202E ; Control # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE 2060..2064 ; Control # Cf [5] WORD JOINER..INVISIBLE PLUS +2065..2069 ; Control # Cn [5] <reserved-2065>..<reserved-2069> 206A..206F ; Control # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES +D800..DFFF ; Control # Cs [2048] <surrogate-D800>..<surrogate-DFFF> FEFF ; Control # Cf ZERO WIDTH NO-BREAK SPACE +FFF0..FFF8 ; Control # Cn [9] <reserved-FFF0>..<reserved-FFF8> FFF9..FFFB ; Control # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR 110BD ; Control # Cf KAITHI NUMBER SIGN 1D173..1D17A ; Control # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE +E0000 ; Control # Cn <reserved-E0000> E0001 ; Control # Cf LANGUAGE TAG +E0002..E001F ; Control # Cn [30] <reserved-E0002>..<reserved-E001F> E0020..E007F ; Control # Cf [96] TAG SPACE..CANCEL TAG +E0080..E00FF ; Control # Cn [128] <reserved-E0080>..<reserved-E00FF> +E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF> -# Total code points: 203 +# Total code points: 6023 # ================================================ @@ -80,6 +86,7 @@ E0020..E007F ; Control # Cf [96] TAG SPACE..CANCEL TAG 0825..0827 ; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +08E4..08FE ; Extend # Mn [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT 0900..0902 ; Extend # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 093A ; Extend # Mn DEVANAGARI VOWEL SIGN OE 093C ; Extend # Mn DEVANAGARI SIGN NUKTA @@ -177,6 +184,7 @@ E0020..E007F ; Control # Cf [96] TAG SPACE..CANCEL TAG 1732..1734 ; Extend # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD 1752..1753 ; Extend # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U 1772..1773 ; Extend # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +17B4..17B5 ; Extend # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 17B7..17BD ; Extend # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA 17C6 ; Extend # Mn KHMER SIGN NIKAHIT 17C9..17D3 ; Extend # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT @@ -204,6 +212,7 @@ E0020..E007F ; Control # Cf [96] TAG SPACE..CANCEL TAG 1B80..1B81 ; Extend # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR 1BA2..1BA5 ; Extend # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU 1BA8..1BA9 ; Extend # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAB ; Extend # Mn SUNDANESE SIGN VIRAMA 1BE6 ; Extend # Mn BATAK SIGN TOMPI 1BE8..1BE9 ; Extend # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE 1BED ; Extend # Mn BATAK VOWEL SIGN KARO O @@ -214,6 +223,7 @@ E0020..E007F ; Control # Cf [96] TAG SPACE..CANCEL TAG 1CD4..1CE0 ; Extend # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA 1CE2..1CE8 ; Extend # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL 1CED ; Extend # Mn VEDIC SIGN TIRYAK +1CF4 ; Extend # Mn VEDIC TONE CANDRA ABOVE 1DC0..1DE6 ; Extend # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z 1DFC..1DFF ; Extend # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW 200C..200D ; Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER @@ -225,11 +235,13 @@ E0020..E007F ; Control # Cf [96] TAG SPACE..CANCEL TAG 2CEF..2CF1 ; Extend # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS 2D7F ; Extend # Mn TIFINAGH CONSONANT JOINER 2DE0..2DFF ; Extend # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS -302A..302F ; Extend # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK +302A..302D ; Extend # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; Extend # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK 3099..309A ; Extend # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK A66F ; Extend # Mn COMBINING CYRILLIC VZMET A670..A672 ; Extend # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN -A67C..A67D ; Extend # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A674..A67D ; Extend # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A69F ; Extend # Mn COMBINING CYRILLIC LETTER IOTIFIED E A6F0..A6F1 ; Extend # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS A802 ; Extend # Mn SYLOTI NAGRI SIGN DVISVARA A806 ; Extend # Mn SYLOTI NAGRI SIGN HASANTA @@ -253,6 +265,8 @@ AAB2..AAB4 ; Extend # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U AAB7..AAB8 ; Extend # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA AABE..AABF ; Extend # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK AAC1 ; Extend # Mn TAI VIET TONE MAI THO +AAEC..AAED ; Extend # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAF6 ; Extend # Mn MEETEI MAYEK VIRAMA ABE5 ; Extend # Mn MEETEI MAYEK VOWEL SIGN ANAP ABE8 ; Extend # Mn MEETEI MAYEK VOWEL SIGN UNAP ABED ; Extend # Mn MEETEI MAYEK APUN IYEK @@ -271,6 +285,16 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11080..11081 ; Extend # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA 110B3..110B6 ; Extend # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B9..110BA ; Extend # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +11100..11102 ; Extend # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; Extend # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112D..11134 ; Extend # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11180..11181 ; Extend # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +111B6..111BE ; Extend # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +116AB ; Extend # Mn TAKRI SIGN ANUSVARA +116AD ; Extend # Mn TAKRI VOWEL SIGN AA +116B0..116B5 ; Extend # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B7 ; Extend # Mn TAKRI SIGN NUKTA +16F8F..16F92 ; Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 1D165 ; Extend # Mc MUSICAL SYMBOL COMBINING STEM 1D167..1D169 ; Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 1D16E..1D172 ; Extend # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5 @@ -280,17 +304,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 1D242..1D244 ; Extend # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1234 - -# ================================================ - -0E40..0E44 ; Prepend # Lo [5] THAI CHARACTER SARA E..THAI CHARACTER SARA AI MAIMALAI -0EC0..0EC4 ; Prepend # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI -AAB5..AAB6 ; Prepend # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O -AAB9 ; Prepend # Lo TAI VIET VOWEL UEA -AABB..AABC ; Prepend # Lo [2] TAI VIET VOWEL AUE..TAI VIET VOWEL AY - -# Total code points: 15 +# Total code points: 1317 # ================================================ @@ -333,24 +347,14 @@ AABB..AABC ; Prepend # Lo [2] TAI VIET VOWEL AUE..TAI VIET VOWEL AY 0DD0..0DD1 ; SpacingMark # Mc [2] SINHALA VOWEL SIGN KETTI AEDA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA 0DD8..0DDE ; SpacingMark # Mc [7] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN KOMBUVA HAA GAYANUKITTA 0DF2..0DF3 ; SpacingMark # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA -0E30 ; SpacingMark # Lo THAI CHARACTER SARA A -0E32..0E33 ; SpacingMark # Lo [2] THAI CHARACTER SARA AA..THAI CHARACTER SARA AM -0E45 ; SpacingMark # Lo THAI CHARACTER LAKKHANGYAO -0EB0 ; SpacingMark # Lo LAO VOWEL SIGN A -0EB2..0EB3 ; SpacingMark # Lo [2] LAO VOWEL SIGN AA..LAO VOWEL SIGN AM +0E33 ; SpacingMark # Lo THAI CHARACTER SARA AM +0EB3 ; SpacingMark # Lo LAO VOWEL SIGN AM 0F3E..0F3F ; SpacingMark # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES 0F7F ; SpacingMark # Mc TIBETAN SIGN RNAM BCAD -102B..102C ; SpacingMark # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA 1031 ; SpacingMark # Mc MYANMAR VOWEL SIGN E -1038 ; SpacingMark # Mc MYANMAR SIGN VISARGA 103B..103C ; SpacingMark # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA 1056..1057 ; SpacingMark # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR -1062..1064 ; SpacingMark # Mc [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO -1067..106D ; SpacingMark # Mc [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5 -1083..1084 ; SpacingMark # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E -1087..108C ; SpacingMark # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3 -108F ; SpacingMark # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5 -109A..109C ; SpacingMark # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A +1084 ; SpacingMark # Mc MYANMAR VOWEL SIGN SHAN E 17B6 ; SpacingMark # Mc KHMER VOWEL SIGN AA 17BE..17C5 ; SpacingMark # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU 17C7..17C8 ; SpacingMark # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU @@ -358,13 +362,11 @@ AABB..AABC ; Prepend # Lo [2] TAI VIET VOWEL AUE..TAI VIET VOWEL AY 1929..192B ; SpacingMark # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA 1930..1931 ; SpacingMark # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA 1933..1938 ; SpacingMark # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA -19B0..19C0 ; SpacingMark # Mc [17] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE VOWEL SIGN IY -19C8..19C9 ; SpacingMark # Mc [2] NEW TAI LUE TONE MARK-1..NEW TAI LUE TONE MARK-2 +19B5..19B7 ; SpacingMark # Mc [3] NEW TAI LUE VOWEL SIGN E..NEW TAI LUE VOWEL SIGN O +19BA ; SpacingMark # Mc NEW TAI LUE VOWEL SIGN AY 1A19..1A1B ; SpacingMark # Mc [3] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN AE 1A55 ; SpacingMark # Mc TAI THAM CONSONANT SIGN MEDIAL RA 1A57 ; SpacingMark # Mc TAI THAM CONSONANT SIGN LA TANG LAI -1A61 ; SpacingMark # Mc TAI THAM VOWEL SIGN A -1A63..1A64 ; SpacingMark # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA 1A6D..1A72 ; SpacingMark # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI 1B04 ; SpacingMark # Mc BALINESE SIGN BISAH 1B35 ; SpacingMark # Mc BALINESE VOWEL SIGN TEDUNG @@ -375,6 +377,7 @@ AABB..AABC ; Prepend # Lo [2] TAI VIET VOWEL AUE..TAI VIET VOWEL AY 1BA1 ; SpacingMark # Mc SUNDANESE CONSONANT SIGN PAMINGKAL 1BA6..1BA7 ; SpacingMark # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG 1BAA ; SpacingMark # Mc SUNDANESE SIGN PAMAAEH +1BAC..1BAD ; SpacingMark # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA 1BE7 ; SpacingMark # Mc BATAK VOWEL SIGN E 1BEA..1BEC ; SpacingMark # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O 1BEE ; SpacingMark # Mc BATAK VOWEL SIGN U @@ -382,7 +385,7 @@ AABB..AABC ; Prepend # Lo [2] TAI VIET VOWEL AUE..TAI VIET VOWEL AY 1C24..1C2B ; SpacingMark # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU 1C34..1C35 ; SpacingMark # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG 1CE1 ; SpacingMark # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA -1CF2 ; SpacingMark # Mc VEDIC SIGN ARDHAVISARGA +1CF2..1CF3 ; SpacingMark # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA A823..A824 ; SpacingMark # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I A827 ; SpacingMark # Mc SYLOTI NAGRI VOWEL SIGN OO A880..A881 ; SpacingMark # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA @@ -395,7 +398,9 @@ A9BD..A9C0 ; SpacingMark # Mc [4] JAVANESE CONSONANT SIGN KERET..JAVANESE P AA2F..AA30 ; SpacingMark # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI AA33..AA34 ; SpacingMark # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA AA4D ; SpacingMark # Mc CHAM CONSONANT SIGN FINAL H -AA7B ; SpacingMark # Mc MYANMAR SIGN PAO KAREN TONE +AAEB ; SpacingMark # Mc MEETEI MAYEK VOWEL SIGN II +AAEE..AAEF ; SpacingMark # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF5 ; SpacingMark # Mc MEETEI MAYEK VOWEL SIGN VISARGA ABE3..ABE4 ; SpacingMark # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP ABE6..ABE7 ; SpacingMark # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP ABE9..ABEA ; SpacingMark # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG @@ -405,10 +410,18 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK 11082 ; SpacingMark # Mc KAITHI SIGN VISARGA 110B0..110B2 ; SpacingMark # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II 110B7..110B8 ; SpacingMark # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +1112C ; SpacingMark # Mc CHAKMA VOWEL SIGN E +11182 ; SpacingMark # Mc SHARADA SIGN VISARGA +111B3..111B5 ; SpacingMark # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111BF..111C0 ; SpacingMark # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +116AC ; SpacingMark # Mc TAKRI SIGN VISARGA +116AE..116AF ; SpacingMark # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B6 ; SpacingMark # Mc TAKRI SIGN VIRAMA +16F51..16F7E ; SpacingMark # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG 1D166 ; SpacingMark # Mc MUSICAL SYMBOL COMBINING SPRECHGESANG STEM 1D16D ; SpacingMark # Mc MUSICAL SYMBOL COMBINING AUGMENTATION DOT -# Total code points: 275 +# Total code points: 291 # ================================================ diff --git a/lib/unicore/auxiliary/SentenceBreakProperty.txt b/lib/unicore/auxiliary/SentenceBreakProperty.txt index 87cf2a6762..a5eb0b71c0 100644 --- a/lib/unicore/auxiliary/SentenceBreakProperty.txt +++ b/lib/unicore/auxiliary/SentenceBreakProperty.txt @@ -1,8 +1,8 @@ -# SentenceBreakProperty-6.0.0.txt -# Date: 2010-08-19, 00:48:47 GMT [MD] +# SentenceBreakProperty-6.1.0.txt +# Date: 2011-11-27, 05:10:50 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -53,6 +53,7 @@ 0825..0827 ; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +08E4..08FE ; Extend # Mn [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT 0900..0902 ; Extend # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 0903 ; Extend # Mc DEVANAGARI SIGN VISARGA 093A ; Extend # Mn DEVANAGARI VOWEL SIGN OE @@ -195,6 +196,7 @@ 1732..1734 ; Extend # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD 1752..1753 ; Extend # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U 1772..1773 ; Extend # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +17B4..17B5 ; Extend # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 17B6 ; Extend # Mc KHMER VOWEL SIGN AA 17B7..17BD ; Extend # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA 17BE..17C5 ; Extend # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU @@ -246,6 +248,8 @@ 1BA6..1BA7 ; Extend # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG 1BA8..1BA9 ; Extend # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG 1BAA ; Extend # Mc SUNDANESE SIGN PAMAAEH +1BAB ; Extend # Mn SUNDANESE SIGN VIRAMA +1BAC..1BAD ; Extend # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA 1BE6 ; Extend # Mn BATAK SIGN TOMPI 1BE7 ; Extend # Mc BATAK VOWEL SIGN E 1BE8..1BE9 ; Extend # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE @@ -263,7 +267,8 @@ 1CE1 ; Extend # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA 1CE2..1CE8 ; Extend # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL 1CED ; Extend # Mn VEDIC SIGN TIRYAK -1CF2 ; Extend # Mc VEDIC SIGN ARDHAVISARGA +1CF2..1CF3 ; Extend # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF4 ; Extend # Mn VEDIC TONE CANDRA ABOVE 1DC0..1DE6 ; Extend # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z 1DFC..1DFF ; Extend # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW 200C..200D ; Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER @@ -275,11 +280,13 @@ 2CEF..2CF1 ; Extend # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS 2D7F ; Extend # Mn TIFINAGH CONSONANT JOINER 2DE0..2DFF ; Extend # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS -302A..302F ; Extend # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK +302A..302D ; Extend # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; Extend # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK 3099..309A ; Extend # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK A66F ; Extend # Mn COMBINING CYRILLIC VZMET A670..A672 ; Extend # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN -A67C..A67D ; Extend # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A674..A67D ; Extend # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A69F ; Extend # Mn COMBINING CYRILLIC LETTER IOTIFIED E A6F0..A6F1 ; Extend # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS A802 ; Extend # Mn SYLOTI NAGRI SIGN DVISVARA A806 ; Extend # Mn SYLOTI NAGRI SIGN HASANTA @@ -316,6 +323,11 @@ AAB2..AAB4 ; Extend # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U AAB7..AAB8 ; Extend # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA AABE..AABF ; Extend # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK AAC1 ; Extend # Mn TAI VIET TONE MAI THO +AAEB ; Extend # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; Extend # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; Extend # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF5 ; Extend # Mc MEETEI MAYEK VOWEL SIGN VISARGA +AAF6 ; Extend # Mn MEETEI MAYEK VIRAMA ABE3..ABE4 ; Extend # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP ABE5 ; Extend # Mn MEETEI MAYEK VOWEL SIGN ANAP ABE6..ABE7 ; Extend # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP @@ -343,6 +355,24 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 110B3..110B6 ; Extend # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B7..110B8 ; Extend # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU 110B9..110BA ; Extend # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +11100..11102 ; Extend # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; Extend # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; Extend # Mc CHAKMA VOWEL SIGN E +1112D..11134 ; Extend # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11180..11181 ; Extend # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; Extend # Mc SHARADA SIGN VISARGA +111B3..111B5 ; Extend # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; Extend # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF..111C0 ; Extend # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +116AB ; Extend # Mn TAKRI SIGN ANUSVARA +116AC ; Extend # Mc TAKRI SIGN VISARGA +116AD ; Extend # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; Extend # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; Extend # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B6 ; Extend # Mc TAKRI SIGN VIRAMA +116B7 ; Extend # Mn TAKRI SIGN NUKTA +16F51..16F7E ; Extend # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG +16F8F..16F92 ; Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 1D165..1D166 ; Extend # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM 1D167..1D169 ; Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 1D16D..1D172 ; Extend # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 @@ -352,7 +382,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 1D242..1D244 ; Extend # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1502 +# Total code points: 1649 # ================================================ @@ -365,10 +395,9 @@ E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 # ================================================ 00AD ; Format # Cf SOFT HYPHEN -0600..0603 ; Format # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA +0600..0604 ; Format # Cf [5] ARABIC NUMBER SIGN..ARABIC SIGN SAMVAT 06DD ; Format # Cf ARABIC END OF AYAH 070F ; Format # Cf SYRIAC ABBREVIATION MARK -17B4..17B5 ; Format # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 200B ; Format # Cf ZERO WIDTH SPACE 200E..200F ; Format # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK 202A..202E ; Format # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE @@ -381,7 +410,7 @@ FFF9..FFFB ; Format # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANN E0001 ; Format # Cf LANGUAGE TAG E0020..E007F ; Format # Cf [96] TAG SPACE..CANCEL TAG -# Total code points: 138 +# Total code points: 137 # ================================================ @@ -401,9 +430,9 @@ E0020..E007F ; Format # Cf [96] TAG SPACE..CANCEL TAG # ================================================ 0061..007A ; Lower # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z -00AA ; Lower # L& FEMININE ORDINAL INDICATOR +00AA ; Lower # Lo FEMININE ORDINAL INDICATOR 00B5 ; Lower # L& MICRO SIGN -00BA ; Lower # L& MASCULINE ORDINAL INDICATOR +00BA ; Lower # Lo MASCULINE ORDINAL INDICATOR 00DF..00F6 ; Lower # L& [24] LATIN SMALL LETTER SHARP S..LATIN SMALL LETTER O WITH DIAERESIS 00F8..00FF ; Lower # L& [8] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER Y WITH DIAERESIS 0101 ; Lower # L& LATIN SMALL LETTER A WITH MACRON @@ -673,8 +702,8 @@ E0020..E007F ; Format # Cf [96] TAG SPACE..CANCEL TAG 0527 ; Lower # L& CYRILLIC SMALL LETTER SHHA WITH DESCENDER 0561..0587 ; Lower # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN 1D00..1D2B ; Lower # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D61 ; Lower # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D77 ; Lower # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D2C..1D6A ; Lower # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; Lower # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; Lower # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; Lower # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; Lower # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA @@ -822,7 +851,9 @@ E0020..E007F ; Format # Cf [96] TAG SPACE..CANCEL TAG 1FE0..1FE7 ; Lower # L& [8] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI 1FF2..1FF4 ; Lower # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI 1FF6..1FF7 ; Lower # L& [2] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI -2090..2094 ; Lower # Lm [5] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA +2071 ; Lower # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; Lower # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; Lower # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T 210A ; Lower # L& SCRIPT SMALL G 210E..210F ; Lower # L& [2] PLANCK CONSTANT..PLANCK CONSTANT OVER TWO PI 2113 ; Lower # L& SCRIPT SMALL L @@ -843,8 +874,8 @@ E0020..E007F ; Format # Cf [96] TAG SPACE..CANCEL TAG 2C6C ; Lower # L& LATIN SMALL LETTER Z WITH DESCENDER 2C71 ; Lower # L& LATIN SMALL LETTER V WITH RIGHT HOOK 2C73..2C74 ; Lower # L& [2] LATIN SMALL LETTER W WITH HOOK..LATIN SMALL LETTER V WITH CURL -2C76..2C7C ; Lower # L& [7] LATIN SMALL LETTER HALF H..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; Lower # Lm MODIFIER LETTER CAPITAL V +2C76..2C7B ; Lower # L& [6] LATIN SMALL LETTER HALF H..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; Lower # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C81 ; Lower # L& COPTIC SMALL LETTER ALFA 2C83 ; Lower # L& COPTIC SMALL LETTER VIDA 2C85 ; Lower # L& COPTIC SMALL LETTER GAMMA @@ -897,7 +928,10 @@ E0020..E007F ; Format # Cf [96] TAG SPACE..CANCEL TAG 2CE3..2CE4 ; Lower # L& [2] COPTIC SMALL LETTER OLD NUBIAN WAU..COPTIC SYMBOL KAI 2CEC ; Lower # L& COPTIC SMALL LETTER CRYPTOGRAMMIC SHEI 2CEE ; Lower # L& COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF3 ; Lower # L& COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; Lower # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Lower # L& GEORGIAN SMALL LETTER YN +2D2D ; Lower # L& GEORGIAN SMALL LETTER AEN A641 ; Lower # L& CYRILLIC SMALL LETTER ZEMLYA A643 ; Lower # L& CYRILLIC SMALL LETTER DZELO A645 ; Lower # L& CYRILLIC SMALL LETTER REVERSED DZE @@ -983,11 +1017,13 @@ A787 ; Lower # L& LATIN SMALL LETTER INSULAR T A78C ; Lower # L& LATIN SMALL LETTER SALTILLO A78E ; Lower # L& LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A791 ; Lower # L& LATIN SMALL LETTER N WITH DESCENDER +A793 ; Lower # L& LATIN SMALL LETTER C WITH BAR A7A1 ; Lower # L& LATIN SMALL LETTER G WITH OBLIQUE STROKE A7A3 ; Lower # L& LATIN SMALL LETTER K WITH OBLIQUE STROKE A7A5 ; Lower # L& LATIN SMALL LETTER N WITH OBLIQUE STROKE A7A7 ; Lower # L& LATIN SMALL LETTER R WITH OBLIQUE STROKE A7A9 ; Lower # L& LATIN SMALL LETTER S WITH OBLIQUE STROKE +A7F8..A7F9 ; Lower # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; Lower # L& LATIN LETTER SMALL CAPITAL TURNED M FB00..FB06 ; Lower # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; Lower # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -1022,7 +1058,7 @@ FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1D7C4..1D7C9 ; Lower # L& [6] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC PI SYMBOL 1D7CB ; Lower # L& MATHEMATICAL BOLD SMALL DIGAMMA -# Total code points: 1917 +# Total code points: 1933 # ================================================ @@ -1294,6 +1330,8 @@ FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 0526 ; Upper # L& CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER 0531..0556 ; Upper # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH 10A0..10C5 ; Upper # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Upper # L& GEORGIAN CAPITAL LETTER YN +10CD ; Upper # L& GEORGIAN CAPITAL LETTER AEN 1E00 ; Upper # L& LATIN CAPITAL LETTER A WITH RING BELOW 1E02 ; Upper # L& LATIN CAPITAL LETTER B WITH DOT ABOVE 1E04 ; Upper # L& LATIN CAPITAL LETTER B WITH DOT BELOW @@ -1513,6 +1551,7 @@ FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 2CE2 ; Upper # L& COPTIC CAPITAL LETTER OLD NUBIAN WAU 2CEB ; Upper # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI 2CED ; Upper # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA +2CF2 ; Upper # L& COPTIC CAPITAL LETTER BOHAIRIC KHEI A640 ; Upper # L& CYRILLIC CAPITAL LETTER ZEMLYA A642 ; Upper # L& CYRILLIC CAPITAL LETTER DZELO A644 ; Upper # L& CYRILLIC CAPITAL LETTER REVERSED DZE @@ -1596,11 +1635,13 @@ A786 ; Upper # L& LATIN CAPITAL LETTER INSULAR T A78B ; Upper # L& LATIN CAPITAL LETTER SALTILLO A78D ; Upper # L& LATIN CAPITAL LETTER TURNED H A790 ; Upper # L& LATIN CAPITAL LETTER N WITH DESCENDER +A792 ; Upper # L& LATIN CAPITAL LETTER C WITH BAR A7A0 ; Upper # L& LATIN CAPITAL LETTER G WITH OBLIQUE STROKE A7A2 ; Upper # L& LATIN CAPITAL LETTER K WITH OBLIQUE STROKE A7A4 ; Upper # L& LATIN CAPITAL LETTER N WITH OBLIQUE STROKE A7A6 ; Upper # L& LATIN CAPITAL LETTER R WITH OBLIQUE STROKE A7A8 ; Upper # L& LATIN CAPITAL LETTER S WITH OBLIQUE STROKE +A7AA ; Upper # L& LATIN CAPITAL LETTER H WITH HOOK FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z 10400..10427 ; Upper # L& [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW 1D400..1D419 ; Upper # L& [26] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL BOLD CAPITAL Z @@ -1635,7 +1676,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 1D790..1D7A8 ; Upper # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA 1D7CA ; Upper # L& MATHEMATICAL BOLD CAPITAL DIGAMMA -# Total code points: 1509 +# Total code points: 1514 # ================================================ @@ -1673,6 +1714,8 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 0824 ; OLetter # Lm SAMARITAN MODIFIER LETTER SHORT A 0828 ; OLetter # Lm SAMARITAN MODIFIER LETTER I 0840..0858 ; OLetter # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +08A0 ; OLetter # Lo ARABIC LETTER BEH WITH SMALL V BELOW +08A2..08AC ; OLetter # Lo [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH 0904..0939 ; OLetter # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA 093D ; OLetter # Lo DEVANAGARI SIGN AVAGRAHA 0950 ; OLetter # Lo DEVANAGARI OM @@ -1780,7 +1823,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 0EBD ; OLetter # Lo LAO SEMIVOWEL SIGN NYO 0EC0..0EC4 ; OLetter # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI 0EC6 ; OLetter # Lm LAO KO LA -0EDC..0EDD ; OLetter # Lo [2] LAO HO NO..LAO HO MO +0EDC..0EDF ; OLetter # Lo [4] LAO HO NO..LAO LETTER KHMU NYO 0F00 ; OLetter # Lo TIBETAN SYLLABLE OM 0F40..0F47 ; OLetter # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA 0F49..0F6C ; OLetter # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA @@ -1796,7 +1839,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 108E ; OLetter # Lo MYANMAR LETTER RUMAI PALAUNG FA 10D0..10FA ; OLetter # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN 10FC ; OLetter # Lm MODIFIER LETTER GEORGIAN NAR -1100..1248 ; OLetter # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +10FD..1248 ; OLetter # Lo [332] GEORGIAN LETTER AEN..ETHIOPIC SYLLABLE QWA 124A..124D ; OLetter # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE 1250..1256 ; OLetter # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO 1258 ; OLetter # Lo ETHIOPIC SYLLABLE QHWA @@ -1846,20 +1889,18 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 1B45..1B4B ; OLetter # Lo [7] BALINESE LETTER KAF SASAK..BALINESE LETTER ASYURA SASAK 1B83..1BA0 ; OLetter # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA 1BAE..1BAF ; OLetter # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA -1BC0..1BE5 ; OLetter # Lo [38] BATAK LETTER A..BATAK LETTER U +1BBA..1BE5 ; OLetter # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U 1C00..1C23 ; OLetter # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A 1C4D..1C4F ; OLetter # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA 1C5A..1C77 ; OLetter # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; OLetter # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD 1CE9..1CEC ; OLetter # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL 1CEE..1CF1 ; OLetter # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA -2071 ; OLetter # Lm SUPERSCRIPT LATIN SMALL LETTER I -207F ; OLetter # Lm SUPERSCRIPT LATIN SMALL LETTER N -2095..209C ; OLetter # Lm [8] LATIN SUBSCRIPT SMALL LETTER H..LATIN SUBSCRIPT SMALL LETTER T +1CF5..1CF6 ; OLetter # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 2135..2138 ; OLetter # Lo [4] ALEF SYMBOL..DALET SYMBOL 2180..2182 ; OLetter # Nl [3] ROMAN NUMERAL ONE THOUSAND C D..ROMAN NUMERAL TEN THOUSAND 2185..2188 ; OLetter # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND -2D30..2D65 ; OLetter # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D30..2D67 ; OLetter # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO 2D6F ; OLetter # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D80..2D96 ; OLetter # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE 2DA0..2DA6 ; OLetter # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO @@ -1890,7 +1931,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 31A0..31BA ; OLetter # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY 31F0..31FF ; OLetter # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3400..4DB5 ; OLetter # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 -4E00..9FCB ; OLetter # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB +4E00..9FCC ; OLetter # Lo [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC A000..A014 ; OLetter # Lo [21] YI SYLLABLE IT..YI SYLLABLE E A015 ; OLetter # Lm YI SYLLABLE WU A016..A48C ; OLetter # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR @@ -1934,6 +1975,9 @@ AAC0 ; OLetter # Lo TAI VIET TONE MAI NUENG AAC2 ; OLetter # Lo TAI VIET TONE MAI SONG AADB..AADC ; OLetter # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG AADD ; OLetter # Lm TAI VIET SYMBOL SAM +AAE0..AAEA ; OLetter # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAF2 ; OLetter # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; OLetter # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK AB01..AB06 ; OLetter # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO AB09..AB0E ; OLetter # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO AB11..AB16 ; OLetter # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO @@ -1943,8 +1987,7 @@ ABC0..ABE2 ; OLetter # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER AC00..D7A3 ; OLetter # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH D7B0..D7C6 ; OLetter # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E D7CB..D7FB ; OLetter # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH -F900..FA2D ; OLetter # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; OLetter # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +F900..FA6D ; OLetter # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; OLetter # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB1D ; OLetter # Lo HEBREW LETTER YOD WITH HIRIQ FB1F..FB28 ; OLetter # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV @@ -1996,6 +2039,8 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1083F..10855 ; OLetter # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW 10900..10915 ; OLetter # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; OLetter # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10980..109B7 ; OLetter # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; OLetter # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; OLetter # Lo KHAROSHTHI LETTER A 10A10..10A13 ; OLetter # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA 10A15..10A17 ; OLetter # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA @@ -2007,17 +2052,58 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10C00..10C48 ; OLetter # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH 11003..11037 ; OLetter # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA 11083..110AF ; OLetter # Lo [45] KAITHI LETTER A..KAITHI LETTER HA +110D0..110E8 ; OLetter # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +11103..11126 ; OLetter # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11183..111B2 ; OLetter # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111C1..111C4 ; OLetter # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +11680..116AA ; OLetter # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA 12000..1236E ; OLetter # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM 12400..12462 ; OLetter # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER 13000..1342E ; OLetter # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 16800..16A38 ; OLetter # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16F00..16F44 ; OLetter # Lo [69] MIAO LETTER PA..MIAO LETTER HHA +16F50 ; OLetter # Lo MIAO LETTER NASALIZATION +16F93..16F9F ; OLetter # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 1B000..1B001 ; OLetter # Lo [2] KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE +1EE00..1EE03 ; OLetter # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; OLetter # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; OLetter # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; OLetter # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; OLetter # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; OLetter # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; OLetter # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; OLetter # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; OLetter # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; OLetter # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; OLetter # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; OLetter # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; OLetter # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; OLetter # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; OLetter # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; OLetter # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; OLetter # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; OLetter # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; OLetter # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; OLetter # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; OLetter # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; OLetter # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; OLetter # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; OLetter # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; OLetter # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; OLetter # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; OLetter # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; OLetter # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; OLetter # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; OLetter # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; OLetter # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; OLetter # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; OLetter # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 20000..2A6D6 ; OLetter # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 2A700..2B734 ; OLetter # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 2B740..2B81D ; OLetter # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2F800..2FA1D ; OLetter # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 97369 +# Total code points: 97841 # ================================================ @@ -2058,9 +2144,13 @@ AA50..AA59 ; Numeric # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE ABF0..ABF9 ; Numeric # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 104A0..104A9 ; Numeric # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE 11066..1106F ; Numeric # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE +110F0..110F9 ; Numeric # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11136..1113F ; Numeric # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +111D0..111D9 ; Numeric # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +116C0..116C9 ; Numeric # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 1D7CE..1D7FF ; Numeric # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE -# Total code points: 412 +# Total code points: 452 # ================================================ @@ -2109,6 +2199,7 @@ A8CE..A8CF ; STerm # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA A92F ; STerm # Po KAYAH LI SIGN SHYA A9C8..A9C9 ; STerm # Po [2] JAVANESE PADA LINGSA..JAVANESE PADA LUNGSI AA5D..AA5F ; STerm # Po [3] CHAM PUNCTUATION DANDA..CHAM PUNCTUATION TRIPLE DANDA +AAF0..AAF1 ; STerm # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM ABEB ; STerm # Po MEETEI MAYEK CHEIKHEI FE56..FE57 ; STerm # Po [2] SMALL QUESTION MARK..SMALL EXCLAMATION MARK FF01 ; STerm # Po FULLWIDTH EXCLAMATION MARK @@ -2117,8 +2208,10 @@ FF61 ; STerm # Po HALFWIDTH IDEOGRAPHIC FULL STOP 10A56..10A57 ; STerm # Po [2] KHAROSHTHI PUNCTUATION DANDA..KHAROSHTHI PUNCTUATION DOUBLE DANDA 11047..11048 ; STerm # Po [2] BRAHMI DANDA..BRAHMI DOUBLE DANDA 110BE..110C1 ; STerm # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +11141..11143 ; STerm # Po [3] CHAKMA DANDA..CHAKMA QUESTION MARK +111C5..111C6 ; STerm # Po [2] SHARADA DANDA..SHARADA DOUBLE DANDA -# Total code points: 73 +# Total code points: 80 # ================================================ diff --git a/lib/unicore/auxiliary/WordBreakProperty.txt b/lib/unicore/auxiliary/WordBreakProperty.txt index 4a3b6e4ab3..7f3225c6a8 100644 --- a/lib/unicore/auxiliary/WordBreakProperty.txt +++ b/lib/unicore/auxiliary/WordBreakProperty.txt @@ -1,8 +1,8 @@ -# WordBreakProperty-6.0.0.txt -# Date: 2010-08-19, 00:48:48 GMT [MD] +# WordBreakProperty-6.1.0.txt +# Date: 2011-11-27, 05:10:51 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -62,6 +62,7 @@ 0825..0827 ; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +08E4..08FE ; Extend # Mn [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT 0900..0902 ; Extend # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 0903 ; Extend # Mc DEVANAGARI SIGN VISARGA 093A ; Extend # Mn DEVANAGARI VOWEL SIGN OE @@ -204,6 +205,7 @@ 1732..1734 ; Extend # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD 1752..1753 ; Extend # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U 1772..1773 ; Extend # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +17B4..17B5 ; Extend # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 17B6 ; Extend # Mc KHMER VOWEL SIGN AA 17B7..17BD ; Extend # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA 17BE..17C5 ; Extend # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU @@ -255,6 +257,8 @@ 1BA6..1BA7 ; Extend # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG 1BA8..1BA9 ; Extend # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG 1BAA ; Extend # Mc SUNDANESE SIGN PAMAAEH +1BAB ; Extend # Mn SUNDANESE SIGN VIRAMA +1BAC..1BAD ; Extend # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA 1BE6 ; Extend # Mn BATAK SIGN TOMPI 1BE7 ; Extend # Mc BATAK VOWEL SIGN E 1BE8..1BE9 ; Extend # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE @@ -272,7 +276,8 @@ 1CE1 ; Extend # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA 1CE2..1CE8 ; Extend # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL 1CED ; Extend # Mn VEDIC SIGN TIRYAK -1CF2 ; Extend # Mc VEDIC SIGN ARDHAVISARGA +1CF2..1CF3 ; Extend # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF4 ; Extend # Mn VEDIC TONE CANDRA ABOVE 1DC0..1DE6 ; Extend # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z 1DFC..1DFF ; Extend # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW 200C..200D ; Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER @@ -284,11 +289,13 @@ 2CEF..2CF1 ; Extend # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS 2D7F ; Extend # Mn TIFINAGH CONSONANT JOINER 2DE0..2DFF ; Extend # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS -302A..302F ; Extend # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK +302A..302D ; Extend # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; Extend # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK 3099..309A ; Extend # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK A66F ; Extend # Mn COMBINING CYRILLIC VZMET A670..A672 ; Extend # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN -A67C..A67D ; Extend # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A674..A67D ; Extend # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A69F ; Extend # Mn COMBINING CYRILLIC LETTER IOTIFIED E A6F0..A6F1 ; Extend # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS A802 ; Extend # Mn SYLOTI NAGRI SIGN DVISVARA A806 ; Extend # Mn SYLOTI NAGRI SIGN HASANTA @@ -325,6 +332,11 @@ AAB2..AAB4 ; Extend # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U AAB7..AAB8 ; Extend # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA AABE..AABF ; Extend # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK AAC1 ; Extend # Mn TAI VIET TONE MAI THO +AAEB ; Extend # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; Extend # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; Extend # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF5 ; Extend # Mc MEETEI MAYEK VOWEL SIGN VISARGA +AAF6 ; Extend # Mn MEETEI MAYEK VIRAMA ABE3..ABE4 ; Extend # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP ABE5 ; Extend # Mn MEETEI MAYEK VOWEL SIGN ANAP ABE6..ABE7 ; Extend # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP @@ -352,6 +364,24 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 110B3..110B6 ; Extend # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B7..110B8 ; Extend # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU 110B9..110BA ; Extend # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +11100..11102 ; Extend # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; Extend # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; Extend # Mc CHAKMA VOWEL SIGN E +1112D..11134 ; Extend # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11180..11181 ; Extend # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; Extend # Mc SHARADA SIGN VISARGA +111B3..111B5 ; Extend # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; Extend # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF..111C0 ; Extend # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +116AB ; Extend # Mn TAKRI SIGN ANUSVARA +116AC ; Extend # Mc TAKRI SIGN VISARGA +116AD ; Extend # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; Extend # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; Extend # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B6 ; Extend # Mc TAKRI SIGN VIRAMA +116B7 ; Extend # Mn TAKRI SIGN NUKTA +16F51..16F7E ; Extend # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG +16F8F..16F92 ; Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 1D165..1D166 ; Extend # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM 1D167..1D169 ; Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 1D16D..1D172 ; Extend # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 @@ -361,15 +391,14 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 1D242..1D244 ; Extend # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1502 +# Total code points: 1649 # ================================================ 00AD ; Format # Cf SOFT HYPHEN -0600..0603 ; Format # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA +0600..0604 ; Format # Cf [5] ARABIC NUMBER SIGN..ARABIC SIGN SAMVAT 06DD ; Format # Cf ARABIC END OF AYAH 070F ; Format # Cf SYRIAC ABBREVIATION MARK -17B4..17B5 ; Format # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 200E..200F ; Format # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK 202A..202E ; Format # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE 2060..2064 ; Format # Cf [5] WORD JOINER..INVISIBLE PLUS @@ -381,7 +410,7 @@ FFF9..FFFB ; Format # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANN E0001 ; Format # Cf LANGUAGE TAG E0020..E007F ; Format # Cf [96] TAG SPACE..CANCEL TAG -# Total code points: 137 +# Total code points: 136 # ================================================ @@ -405,9 +434,9 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 0041..005A ; ALetter # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z 0061..007A ; ALetter # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z -00AA ; ALetter # L& FEMININE ORDINAL INDICATOR +00AA ; ALetter # Lo FEMININE ORDINAL INDICATOR 00B5 ; ALetter # L& MICRO SIGN -00BA ; ALetter # L& MASCULINE ORDINAL INDICATOR +00BA ; ALetter # Lo MASCULINE ORDINAL INDICATOR 00C0..00D6 ; ALetter # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS 00D8..00F6 ; ALetter # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS 00F8..01BA ; ALetter # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL @@ -462,6 +491,8 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 0824 ; ALetter # Lm SAMARITAN MODIFIER LETTER SHORT A 0828 ; ALetter # Lm SAMARITAN MODIFIER LETTER I 0840..0858 ; ALetter # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +08A0 ; ALetter # Lo ARABIC LETTER BEH WITH SMALL V BELOW +08A2..08AC ; ALetter # Lo [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH 0904..0939 ; ALetter # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA 093D ; ALetter # Lo DEVANAGARI SIGN AVAGRAHA 0950 ; ALetter # Lo DEVANAGARI OM @@ -554,9 +585,11 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 0F49..0F6C ; ALetter # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA 0F88..0F8C ; ALetter # Lo [5] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN INVERTED MCHU CAN 10A0..10C5 ; ALetter # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; ALetter # L& GEORGIAN CAPITAL LETTER YN +10CD ; ALetter # L& GEORGIAN CAPITAL LETTER AEN 10D0..10FA ; ALetter # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN 10FC ; ALetter # Lm MODIFIER LETTER GEORGIAN NAR -1100..1248 ; ALetter # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +10FD..1248 ; ALetter # Lo [332] GEORGIAN LETTER AEN..ETHIOPIC SYLLABLE QWA 124A..124D ; ALetter # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE 1250..1256 ; ALetter # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO 1258 ; ALetter # Lo ETHIOPIC SYLLABLE QHWA @@ -597,16 +630,17 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 1B45..1B4B ; ALetter # Lo [7] BALINESE LETTER KAF SASAK..BALINESE LETTER ASYURA SASAK 1B83..1BA0 ; ALetter # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA 1BAE..1BAF ; ALetter # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA -1BC0..1BE5 ; ALetter # Lo [38] BATAK LETTER A..BATAK LETTER U +1BBA..1BE5 ; ALetter # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U 1C00..1C23 ; ALetter # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A 1C4D..1C4F ; ALetter # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA 1C5A..1C77 ; ALetter # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; ALetter # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD 1CE9..1CEC ; ALetter # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL 1CEE..1CF1 ; ALetter # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA +1CF5..1CF6 ; ALetter # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 1D00..1D2B ; ALetter # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D61 ; ALetter # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D77 ; ALetter # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D2C..1D6A ; ALetter # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; ALetter # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; ALetter # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; ALetter # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; ALetter # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA @@ -653,12 +687,15 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 24B6..24E9 ; ALetter # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z 2C00..2C2E ; ALetter # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 2C30..2C5E ; ALetter # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE -2C60..2C7C ; ALetter # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; ALetter # Lm MODIFIER LETTER CAPITAL V +2C60..2C7B ; ALetter # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; ALetter # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; ALetter # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI 2CEB..2CEE ; ALetter # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; ALetter # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; ALetter # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE -2D30..2D65 ; ALetter # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D27 ; ALetter # L& GEORGIAN SMALL LETTER YN +2D2D ; ALetter # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; ALetter # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO 2D6F ; ALetter # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D80..2D96 ; ALetter # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE 2DA0..2DA6 ; ALetter # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO @@ -697,8 +734,9 @@ A770 ; ALetter # Lm MODIFIER LETTER US A771..A787 ; ALetter # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A788 ; ALetter # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; ALetter # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A791 ; ALetter # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER -A7A0..A7A9 ; ALetter # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE +A790..A793 ; ALetter # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR +A7A0..A7AA ; ALetter # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK +A7F8..A7F9 ; ALetter # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; ALetter # L& LATIN LETTER SMALL CAPITAL TURNED M A7FB..A801 ; ALetter # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I A803..A805 ; ALetter # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O @@ -716,6 +754,9 @@ A9CF ; ALetter # Lm JAVANESE PANGRANGKEP AA00..AA28 ; ALetter # Lo [41] CHAM LETTER A..CHAM LETTER HA AA40..AA42 ; ALetter # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG AA44..AA4B ; ALetter # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS +AAE0..AAEA ; ALetter # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAF2 ; ALetter # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; ALetter # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK AB01..AB06 ; ALetter # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO AB09..AB0E ; ALetter # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO AB11..AB16 ; ALetter # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO @@ -777,6 +818,8 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1083F..10855 ; ALetter # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW 10900..10915 ; ALetter # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; ALetter # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10980..109B7 ; ALetter # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; ALetter # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; ALetter # Lo KHAROSHTHI LETTER A 10A10..10A13 ; ALetter # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA 10A15..10A17 ; ALetter # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA @@ -788,10 +831,18 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10C00..10C48 ; ALetter # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH 11003..11037 ; ALetter # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA 11083..110AF ; ALetter # Lo [45] KAITHI LETTER A..KAITHI LETTER HA +110D0..110E8 ; ALetter # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +11103..11126 ; ALetter # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11183..111B2 ; ALetter # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111C1..111C4 ; ALetter # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +11680..116AA ; ALetter # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA 12000..1236E ; ALetter # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM 12400..12462 ; ALetter # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER 13000..1342E ; ALetter # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 16800..16A38 ; ALetter # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16F00..16F44 ; ALetter # Lo [69] MIAO LETTER PA..MIAO LETTER HHA +16F50 ; ALetter # Lo MIAO LETTER NASALIZATION +16F93..16F9F ; ALetter # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 1D400..1D454 ; ALetter # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G 1D456..1D49C ; ALetter # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A 1D49E..1D49F ; ALetter # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D @@ -822,8 +873,41 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1D78A..1D7A8 ; ALetter # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA 1D7AA..1D7C2 ; ALetter # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA 1D7C4..1D7CB ; ALetter # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA - -# Total code points: 24453 +1EE00..1EE03 ; ALetter # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; ALetter # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; ALetter # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; ALetter # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; ALetter # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; ALetter # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; ALetter # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; ALetter # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; ALetter # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; ALetter # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; ALetter # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; ALetter # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; ALetter # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; ALetter # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; ALetter # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; ALetter # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; ALetter # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; ALetter # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; ALetter # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; ALetter # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; ALetter # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; ALetter # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; ALetter # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; ALetter # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; ALetter # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; ALetter # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; ALetter # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; ALetter # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; ALetter # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; ALetter # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; ALetter # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; ALetter # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; ALetter # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN + +# Total code points: 24941 # ================================================ @@ -909,9 +993,13 @@ AA50..AA59 ; Numeric # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE ABF0..ABF9 ; Numeric # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 104A0..104A9 ; Numeric # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE 11066..1106F ; Numeric # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE +110F0..110F9 ; Numeric # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11136..1113F ; Numeric # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +111D0..111D9 ; Numeric # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +116C0..116C9 ; Numeric # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 1D7CE..1D7FF ; Numeric # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE -# Total code points: 411 +# Total code points: 451 # ================================================ diff --git a/lib/unicore/extracted/DBidiClass.txt b/lib/unicore/extracted/DBidiClass.txt index 8720776158..270a87e847 100644 --- a/lib/unicore/extracted/DBidiClass.txt +++ b/lib/unicore/extracted/DBidiClass.txt @@ -1,8 +1,8 @@ -# DerivedBidiClass-6.0.0.txt -# Date: 2010-08-19, 00:48:03 GMT [MD] +# DerivedBidiClass-6.1.0.txt +# Date: 2011-12-11, 18:26:53 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -13,32 +13,39 @@ # reserved for right-to-left scripts are given either types R or AL. # # The unassigned code points that default to AL are in the ranges: -# [\u0600-\u07BF \uFB50-\uFDFF \uFE70-\uFEFF] +# [\u0600-\u07BF \u08A0-\u08FF \uFB50-\uFDCF \uFDF0-\uFDFF \uFE70-\uFEFF \U0001EE00-\U0001EEFF] # # Arabic: U+0600 - U+06FF # Syriac: U+0700 - U+074F # Arabic_Supplement: U+0750 - U+077F # Thaana: U+0780 - U+07BF +# Arabic Extended-A: U+08A0 - U+08FF # Arabic_Presentation_Forms_A: -# U+FB50 - U+FDFF +# U+FB50 - U+FDCF +# U+FDF0 - U+FDFF # Arabic_Presentation_Forms_B: # U+FE70 - U+FEFF -# minus noncharacter code points. +# Arabic Mathematical Alphabetic Symbols: +# U+1EE00 - U+1EEFF # # The unassigned code points that default to R are in the ranges: -# [\u0590-\u05FF \u07C0-\u08FF \uFB1D-\uFB4F \U00010800-\U00010FFF \U0001E800-\U0001EFFF] +# [\u0590-\u05FF \u07C0-\u089F \uFB1D-\uFB4F \U00010800-\U00010FFF \U0001E800-\U0001EDFF \U0001EF00-\U0001EFFF] # # Hebrew: U+0590 - U+05FF # NKo: U+07C0 - U+07FF # Cypriot_Syllabary: U+10800 - U+1083F # Phoenician: U+10900 - U+1091F # Lydian: U+10920 - U+1093F +# Meroitic Hieroglyphs: +# U+10980 - U+1099F +# Meroitic Cursive: U+109A0 - U+109FF # Kharoshthi: U+10A00 - U+10A5F # and any others in the ranges: -# U+0800 - U+08FF, +# U+0800 - U+089F, # U+FB1D - U+FB4F, # U+10840 - U+10FFF, -# U+1E800 - U+1EFFF +# U+1E800 - U+1EDFF, +# U+1EF00 - U+1EFFF # # For all other cases: @@ -53,9 +60,9 @@ 0041..005A ; L # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z 0061..007A ; L # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z -00AA ; L # L& FEMININE ORDINAL INDICATOR +00AA ; L # Lo FEMININE ORDINAL INDICATOR 00B5 ; L # L& MICRO SIGN -00BA ; L # L& MASCULINE ORDINAL INDICATOR +00BA ; L # Lo MASCULINE ORDINAL INDICATOR 00C0..00D6 ; L # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS 00D8..00F6 ; L # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS 00F8..01BA ; L # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL @@ -148,6 +155,7 @@ 0AD0 ; L # Lo GUJARATI OM 0AE0..0AE1 ; L # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL 0AE6..0AEF ; L # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE +0AF0 ; L # Po GUJARATI ABBREVIATION SIGN 0B02..0B03 ; L # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA 0B05..0B0C ; L # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L 0B0F..0B10 ; L # Lo [2] ORIYA LETTER E..ORIYA LETTER AI @@ -264,11 +272,13 @@ 0EC0..0EC4 ; L # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI 0EC6 ; L # Lm LAO KO LA 0ED0..0ED9 ; L # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE -0EDC..0EDD ; L # Lo [2] LAO HO NO..LAO HO MO +0EDC..0EDF ; L # Lo [4] LAO HO NO..LAO LETTER KHMU NYO 0F00 ; L # Lo TIBETAN SYLLABLE OM 0F01..0F03 ; L # So [3] TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA 0F04..0F12 ; L # Po [15] TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK RGYA GRAM SHAD -0F13..0F17 ; L # So [5] TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS +0F13 ; L # So TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN +0F14 ; L # Po TIBETAN MARK GTER TSHEG +0F15..0F17 ; L # So [3] TIBETAN LOGOTYPE SIGN CHAD RTAGS..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS 0F1A..0F1F ; L # So [6] TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RDEL DKAR RDEL NAG 0F20..0F29 ; L # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE 0F2A..0F33 ; L # No [10] TIBETAN DIGIT HALF ONE..TIBETAN DIGIT HALF ZERO @@ -312,10 +322,12 @@ 109A..109C ; L # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A 109E..109F ; L # So [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION 10A0..10C5 ; L # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; L # L& GEORGIAN CAPITAL LETTER YN +10CD ; L # L& GEORGIAN CAPITAL LETTER AEN 10D0..10FA ; L # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN 10FB ; L # Po GEORGIAN PARAGRAPH SEPARATOR 10FC ; L # Lm MODIFIER LETTER GEORGIAN NAR -1100..1248 ; L # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +10FD..1248 ; L # Lo [332] GEORGIAN LETTER AEN..ETHIOPIC SYLLABLE QWA 124A..124D ; L # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE 1250..1256 ; L # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO 1258 ; L # Lo ETHIOPIC SYLLABLE QHWA @@ -331,8 +343,7 @@ 12D8..1310 ; L # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA 1312..1315 ; L # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE 1318..135A ; L # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA -1360 ; L # So ETHIOPIC SECTION MARK -1361..1368 ; L # Po [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR +1360..1368 ; L # Po [9] ETHIOPIC SECTION MARK..ETHIOPIC PARAGRAPH SEPARATOR 1369..137C ; L # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND 1380..138F ; L # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE 13A0..13F4 ; L # Lo [85] CHEROKEE LETTER A..CHEROKEE LETTER YV @@ -351,7 +362,6 @@ 1760..176C ; L # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA 176E..1770 ; L # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA 1780..17B3 ; L # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU -17B4..17B5 ; L # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 17B6 ; L # Mc KHMER VOWEL SIGN AA 17BE..17C5 ; L # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU 17C7..17C8 ; L # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU @@ -411,9 +421,10 @@ 1BA1 ; L # Mc SUNDANESE CONSONANT SIGN PAMINGKAL 1BA6..1BA7 ; L # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG 1BAA ; L # Mc SUNDANESE SIGN PAMAAEH +1BAC..1BAD ; L # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA 1BAE..1BAF ; L # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA 1BB0..1BB9 ; L # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE -1BC0..1BE5 ; L # Lo [38] BATAK LETTER A..BATAK LETTER U +1BBA..1BE5 ; L # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U 1BE7 ; L # Mc BATAK VOWEL SIGN E 1BEA..1BEC ; L # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O 1BEE ; L # Mc BATAK VOWEL SIGN U @@ -429,14 +440,16 @@ 1C5A..1C77 ; L # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; L # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD 1C7E..1C7F ; L # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD +1CC0..1CC7 ; L # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA 1CD3 ; L # Po VEDIC SIGN NIHSHVASA 1CE1 ; L # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA 1CE9..1CEC ; L # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL 1CEE..1CF1 ; L # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA -1CF2 ; L # Mc VEDIC SIGN ARDHAVISARGA +1CF2..1CF3 ; L # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF5..1CF6 ; L # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 1D00..1D2B ; L # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D61 ; L # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D77 ; L # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D2C..1D6A ; L # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; L # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; L # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; L # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; L # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA @@ -489,12 +502,15 @@ 2800..28FF ; L # So [256] BRAILLE PATTERN BLANK..BRAILLE PATTERN DOTS-12345678 2C00..2C2E ; L # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 2C30..2C5E ; L # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE -2C60..2C7C ; L # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; L # Lm MODIFIER LETTER CAPITAL V +2C60..2C7B ; L # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; L # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; L # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI 2CEB..2CEE ; L # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; L # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; L # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE -2D30..2D65 ; L # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D27 ; L # L& GEORGIAN SMALL LETTER YN +2D2D ; L # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; L # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO 2D6F ; L # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D70 ; L # Po TIFINAGH SEPARATOR MARK 2D80..2D96 ; L # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE @@ -510,6 +526,7 @@ 3006 ; L # Lo IDEOGRAPHIC CLOSING MARK 3007 ; L # Nl IDEOGRAPHIC NUMBER ZERO 3021..3029 ; L # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE +302E..302F ; L # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK 3031..3035 ; L # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF 3038..303A ; L # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY 303B ; L # Lm VERTICAL IDEOGRAPHIC ITERATION MARK @@ -529,7 +546,8 @@ 31F0..31FF ; L # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3200..321C ; L # So [29] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED HANGUL CIEUC U 3220..3229 ; L # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN -322A..324F ; L # So [38] PARENTHESIZED IDEOGRAPH MOON..CIRCLED NUMBER EIGHTY ON BLACK SQUARE +322A..3247 ; L # So [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO +3248..324F ; L # No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE 3260..327B ; L # So [28] CIRCLED HANGUL KIYEOK..CIRCLED HANGUL HIEUH A 327F ; L # So KOREAN STANDARD SYMBOL 3280..3289 ; L # No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN @@ -540,7 +558,7 @@ 337B..33DD ; L # So [99] SQUARE ERA NAME HEISEI..SQUARE WB 33E0..33FE ; L # So [31] IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY ONE..IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THIRTY-ONE 3400..4DB5 ; L # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 -4E00..9FCB ; L # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB +4E00..9FCC ; L # Lo [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC A000..A014 ; L # Lo [21] YI SYLLABLE IT..YI SYLLABLE E A015 ; L # Lm YI SYLLABLE WU A016..A48C ; L # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR @@ -563,8 +581,9 @@ A770 ; L # Lm MODIFIER LETTER US A771..A787 ; L # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A789..A78A ; L # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; L # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A791 ; L # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER -A7A0..A7A9 ; L # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE +A790..A793 ; L # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR +A7A0..A7AA ; L # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK +A7F8..A7F9 ; L # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; L # L& LATIN LETTER SMALL CAPITAL TURNED M A7FB..A801 ; L # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I A803..A805 ; L # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O @@ -622,6 +641,13 @@ AAC2 ; L # Lo TAI VIET TONE MAI SONG AADB..AADC ; L # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG AADD ; L # Lm TAI VIET SYMBOL SAM AADE..AADF ; L # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI +AAE0..AAEA ; L # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAEB ; L # Mc MEETEI MAYEK VOWEL SIGN II +AAEE..AAEF ; L # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF0..AAF1 ; L # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM +AAF2 ; L # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; L # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF5 ; L # Mc MEETEI MAYEK VOWEL SIGN VISARGA AB01..AB06 ; L # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO AB09..AB0E ; L # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO AB11..AB16 ; L # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO @@ -638,8 +664,7 @@ AC00..D7A3 ; L # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH D7B0..D7C6 ; L # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E D7CB..D7FB ; L # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH E000..F8FF ; L # Co [6400] <private-use-E000>..<private-use-F8FF> -F900..FA2D ; L # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; L # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +F900..FA6D ; L # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; L # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB00..FB06 ; L # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; L # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -662,7 +687,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 10050..1005D ; L # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 10080..100FA ; L # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 10100 ; L # Po AEGEAN WORD SEPARATOR LINE -10102 ; L # So AEGEAN CHECK MARK +10102 ; L # Po AEGEAN CHECK MARK 10107..10133 ; L # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND 10137..1013F ; L # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT 101D0..101FC ; L # So [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND @@ -695,11 +720,33 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 110BB..110BC ; L # Po [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN 110BD ; L # Cf KAITHI NUMBER SIGN 110BE..110C1 ; L # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +110D0..110E8 ; L # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +110F0..110F9 ; L # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11103..11126 ; L # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +1112C ; L # Mc CHAKMA VOWEL SIGN E +11136..1113F ; L # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +11140..11143 ; L # Po [4] CHAKMA SECTION MARK..CHAKMA QUESTION MARK +11182 ; L # Mc SHARADA SIGN VISARGA +11183..111B2 ; L # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111B3..111B5 ; L # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111BF..111C0 ; L # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +111C1..111C4 ; L # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111C5..111C8 ; L # Po [4] SHARADA DANDA..SHARADA SEPARATOR +111D0..111D9 ; L # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +11680..116AA ; L # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116AC ; L # Mc TAKRI SIGN VISARGA +116AE..116AF ; L # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B6 ; L # Mc TAKRI SIGN VIRAMA +116C0..116C9 ; L # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 12000..1236E ; L # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM 12400..12462 ; L # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER 12470..12473 ; L # Po [4] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON 13000..1342E ; L # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 16800..16A38 ; L # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16F00..16F44 ; L # Lo [69] MIAO LETTER PA..MIAO LETTER HHA +16F50 ; L # Lo MIAO LETTER NASALIZATION +16F51..16F7E ; L # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG +16F93..16F9F ; L # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 1B000..1B001 ; L # Lo [2] KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE 1D000..1D0F5 ; L # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO 1D100..1D126 ; L # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 @@ -753,8 +800,6 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 1F210..1F23A ; L # So [43] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-55B6 1F240..1F248 ; L # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 1F250..1F251 ; L # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT -1F48C ; L # So LOVE LETTER -1F524 ; L # So INPUT SYMBOL FOR LATIN LETTERS 20000..2A6D6 ; L # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 2A700..2B734 ; L # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 2B740..2B81D ; L # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D @@ -762,8 +807,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] <private-use-F0000>..<private-use-FFFFD> 100000..10FFFD; L # Co [65534] <private-use-100000>..<private-use-10FFFD> -# The above property value applies to 859451 code points not listed here. -# Total code points: 1098619 +# The above property value applies to 858960 code points not listed here. +# Total code points: 1098531 # ================================================ @@ -795,7 +840,7 @@ F0000..FFFFD ; L # Co [65534] <private-use-F0000>..<private-use-FFFFD> 0840..0858 ; R # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 085C..085D ; R # Cn [2] <reserved-085C>..<reserved-085D> 085E ; R # Po MANDAIC PUNCTUATION -085F..08FF ; R # Cn [161] <reserved-085F>..<reserved-08FF> +085F..089F ; R # Cn [65] <reserved-085F>..<reserved-089F> 200F ; R # Cf RIGHT-TO-LEFT MARK FB1D ; R # Lo HEBREW LETTER YOD WITH HIRIQ FB1F..FB28 ; R # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV @@ -831,7 +876,11 @@ FB46..FB4F ; R # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE AL 10920..10939 ; R # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C 1093A..1093E ; R # Cn [5] <reserved-1093A>..<reserved-1093E> 1093F ; R # Po LYDIAN TRIANGULAR MARK -10940..109FF ; R # Cn [192] <reserved-10940>..<reserved-109FF> +10940..1097F ; R # Cn [64] <reserved-10940>..<reserved-1097F> +10980..109B7 ; R # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109B8..109BD ; R # Cn [6] <reserved-109B8>..<reserved-109BD> +109BE..109BF ; R # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN +109C0..109FF ; R # Cn [64] <reserved-109C0>..<reserved-109FF> 10A00 ; R # Lo KHAROSHTHI LETTER A 10A04 ; R # Cn <reserved-10A04> 10A07..10A0B ; R # Cn [5] <reserved-10A07>..<reserved-10A0B> @@ -862,9 +911,10 @@ FB46..FB4F ; R # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE AL 10C00..10C48 ; R # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH 10C49..10E5F ; R # Cn [535] <reserved-10C49>..<reserved-10E5F> 10E7F..10FFF ; R # Cn [385] <reserved-10E7F>..<reserved-10FFF> -1E800..1EFFF ; R # Cn [2048] <reserved-1E800>..<reserved-1EFFF> +1E800..1EDFF ; R # Cn [1536] <reserved-1E800>..<reserved-1EDFF> +1EF00..1EFFF ; R # Cn [256] <reserved-1EF00>..<reserved-1EFFF> -# Total code points: 4438 +# Total code points: 4086 # ================================================ @@ -911,6 +961,7 @@ FF0D ; ES # Pd FULLWIDTH HYPHEN-MINUS 00A2..00A5 ; ET # Sc [4] CENT SIGN..YEN SIGN 00B0 ; ET # So DEGREE SIGN 00B1 ; ET # Sm PLUS-MINUS SIGN +058F ; ET # Sc ARMENIAN DRAM SIGN 0609..060A ; ET # Po [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN 066A ; ET # Po ARABIC PERCENT SIGN 09F2..09F3 ; ET # Sc [2] BENGALI RUPEE MARK..BENGALI RUPEE SIGN @@ -934,17 +985,16 @@ FF05 ; ET # Po FULLWIDTH PERCENT SIGN FFE0..FFE1 ; ET # Sc [2] FULLWIDTH CENT SIGN..FULLWIDTH POUND SIGN FFE5..FFE6 ; ET # Sc [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN -# Total code points: 64 +# Total code points: 65 # ================================================ # Bidi_Class=Arabic_Number -0600..0603 ; AN # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA +0600..0604 ; AN # Cf [5] ARABIC NUMBER SIGN..ARABIC SIGN SAMVAT 0660..0669 ; AN # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE 066B..066C ; AN # Po [2] ARABIC DECIMAL SEPARATOR..ARABIC THOUSANDS SEPARATOR 06DD ; AN # Cf ARABIC END OF AYAH -070F ; AN # Cf SYRIAC ABBREVIATION MARK 10E60..10E7E ; AN # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS # Total code points: 49 @@ -1029,7 +1079,8 @@ FF1A ; CS # Po FULLWIDTH COLON 007D ; ON # Pe RIGHT CURLY BRACKET 007E ; ON # Sm TILDE 00A1 ; ON # Po INVERTED EXCLAMATION MARK -00A6..00A7 ; ON # So [2] BROKEN BAR..SECTION SIGN +00A6 ; ON # So BROKEN BAR +00A7 ; ON # Po SECTION SIGN 00A8 ; ON # Sk DIAERESIS 00A9 ; ON # So COPYRIGHT SIGN 00AB ; ON # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK @@ -1037,8 +1088,7 @@ FF1A ; CS # Po FULLWIDTH COLON 00AE ; ON # So REGISTERED SIGN 00AF ; ON # Sk MACRON 00B4 ; ON # Sk ACUTE ACCENT -00B6 ; ON # So PILCROW SIGN -00B7 ; ON # Po MIDDLE DOT +00B6..00B7 ; ON # Po [2] PILCROW SIGN..MIDDLE DOT 00B8 ; ON # Sk CEDILLA 00BB ; ON # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 00BC..00BE ; ON # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS @@ -1206,9 +1256,7 @@ FF1A ; CS # Po FULLWIDTH COLON 27C0..27C4 ; ON # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET 27C5 ; ON # Ps LEFT S-SHAPED BAG DELIMITER 27C6 ; ON # Pe RIGHT S-SHAPED BAG DELIMITER -27C7..27CA ; ON # Sm [4] OR WITH DOT INSIDE..VERTICAL BAR WITH HORIZONTAL STROKE -27CC ; ON # Sm LONG DIVISION -27CE..27E5 ; ON # Sm [24] SQUARED LOGICAL AND..WHITE SQUARE WITH RIGHTWARDS TICK +27C7..27E5 ; ON # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK 27E6 ; ON # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET 27E7 ; ON # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET 27E8 ; ON # Ps MATHEMATICAL LEFT ANGLE BRACKET @@ -1292,7 +1340,8 @@ FF1A ; CS # Po FULLWIDTH COLON 2E29 ; ON # Pe RIGHT DOUBLE PARENTHESIS 2E2A..2E2E ; ON # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK 2E2F ; ON # Lm VERTICAL TILDE -2E30..2E31 ; ON # Po [2] RING POINT..WORD SEPARATOR MIDDLE DOT +2E30..2E39 ; ON # Po [10] RING POINT..TOP HALF SECTION SIGN +2E3A..2E3B ; ON # Pd [2] TWO-EM DASH..THREE-EM DASH 2E80..2E99 ; ON # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP 2E9B..2EF3 ; ON # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE 2F00..2FD5 ; ON # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE @@ -1445,12 +1494,14 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 1D74F ; ON # Sm MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL 1D789 ; ON # Sm MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL 1D7C3 ; ON # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL +1EEF0..1EEF1 ; ON # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL 1F000..1F02B ; ON # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK 1F030..1F093 ; ON # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06 1F0A0..1F0AE ; ON # So [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES 1F0B1..1F0BE ; ON # So [14] PLAYING CARD ACE OF HEARTS..PLAYING CARD KING OF HEARTS 1F0C1..1F0CF ; ON # So [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER 1F0D1..1F0DF ; ON # So [15] PLAYING CARD ACE OF CLUBS..PLAYING CARD WHITE JOKER +1F16A..1F16B ; ON # So [2] RAISED MC SIGN..RAISED MD SIGN 1F300..1F320 ; ON # So [33] CYCLONE..SHOOTING STAR 1F330..1F335 ; ON # So [6] CHESTNUT..CACTUS 1F337..1F37C ; ON # So [70] TULIP..BABY BOTTLE @@ -1460,29 +1511,17 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 1F3E0..1F3F0 ; ON # So [17] HOUSE BUILDING..EUROPEAN CASTLE 1F400..1F43E ; ON # So [63] RAT..PAW PRINTS 1F440 ; ON # So EYES -1F442..1F48B ; ON # So [74] EAR..KISS MARK -1F48D..1F4F7 ; ON # So [107] RING..CAMERA +1F442..1F4F7 ; ON # So [182] EAR..CAMERA 1F4F9..1F4FC ; ON # So [4] VIDEO CAMERA..VIDEOCASSETTE -1F500..1F523 ; ON # So [36] TWISTED RIGHTWARDS ARROWS..INPUT SYMBOL FOR SYMBOLS -1F525..1F53D ; ON # So [25] FIRE..DOWN-POINTING SMALL RED TRIANGLE +1F500..1F53D ; ON # So [62] TWISTED RIGHTWARDS ARROWS..DOWN-POINTING SMALL RED TRIANGLE +1F540..1F543 ; ON # So [4] CIRCLED CROSS POMMEE..NOTCHED LEFT SEMICIRCLE WITH THREE DOTS 1F550..1F567 ; ON # So [24] CLOCK FACE ONE OCLOCK..CLOCK FACE TWELVE-THIRTY -1F5FB..1F5FF ; ON # So [5] MOUNT FUJI..MOYAI -1F601..1F610 ; ON # So [16] GRINNING FACE WITH SMILING EYES..NEUTRAL FACE -1F612..1F614 ; ON # So [3] UNAMUSED FACE..PENSIVE FACE -1F616 ; ON # So CONFOUNDED FACE -1F618 ; ON # So FACE THROWING A KISS -1F61A ; ON # So KISSING FACE WITH CLOSED EYES -1F61C..1F61E ; ON # So [3] FACE WITH STUCK-OUT TONGUE AND WINKING EYE..DISAPPOINTED FACE -1F620..1F625 ; ON # So [6] ANGRY FACE..DISAPPOINTED BUT RELIEVED FACE -1F628..1F62B ; ON # So [4] FEARFUL FACE..TIRED FACE -1F62D ; ON # So LOUDLY CRYING FACE -1F630..1F633 ; ON # So [4] FACE WITH OPEN MOUTH AND COLD SWEAT..FLUSHED FACE -1F635..1F640 ; ON # So [12] DIZZY FACE..WEARY CAT FACE +1F5FB..1F640 ; ON # So [70] MOUNT FUJI..WEARY CAT FACE 1F645..1F64F ; ON # So [11] FACE WITH NO GOOD GESTURE..PERSON WITH FOLDED HANDS 1F680..1F6C5 ; ON # So [70] ROCKET..LEFT LUGGAGE 1F700..1F773 ; ON # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE -# Total code points: 4412 +# Total code points: 4447 # ================================================ @@ -1554,6 +1593,7 @@ FFFFE..FFFFF ; BN # Cn [2] <noncharacter-FFFFE>..<noncharacter-FFFFF> 0825..0827 ; NSM # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; NSM # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; NSM # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +08E4..08FE ; NSM # Mn [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT 0900..0902 ; NSM # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 093A ; NSM # Mn DEVANAGARI VOWEL SIGN OE 093C ; NSM # Mn DEVANAGARI SIGN NUKTA @@ -1637,6 +1677,7 @@ FFFFE..FFFFF ; BN # Cn [2] <noncharacter-FFFFE>..<noncharacter-FFFFF> 1732..1734 ; NSM # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD 1752..1753 ; NSM # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U 1772..1773 ; NSM # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +17B4..17B5 ; NSM # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 17B7..17BD ; NSM # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA 17C6 ; NSM # Mn KHMER SIGN NIKAHIT 17C9..17D3 ; NSM # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT @@ -1664,6 +1705,7 @@ FFFFE..FFFFF ; BN # Cn [2] <noncharacter-FFFFE>..<noncharacter-FFFFF> 1B80..1B81 ; NSM # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR 1BA2..1BA5 ; NSM # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU 1BA8..1BA9 ; NSM # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAB ; NSM # Mn SUNDANESE SIGN VIRAMA 1BE6 ; NSM # Mn BATAK SIGN TOMPI 1BE8..1BE9 ; NSM # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE 1BED ; NSM # Mn BATAK VOWEL SIGN KARO O @@ -1674,6 +1716,7 @@ FFFFE..FFFFF ; BN # Cn [2] <noncharacter-FFFFE>..<noncharacter-FFFFF> 1CD4..1CE0 ; NSM # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA 1CE2..1CE8 ; NSM # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL 1CED ; NSM # Mn VEDIC SIGN TIRYAK +1CF4 ; NSM # Mn VEDIC TONE CANDRA ABOVE 1DC0..1DE6 ; NSM # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z 1DFC..1DFF ; NSM # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW 20D0..20DC ; NSM # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE @@ -1684,11 +1727,12 @@ FFFFE..FFFFF ; BN # Cn [2] <noncharacter-FFFFE>..<noncharacter-FFFFF> 2CEF..2CF1 ; NSM # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS 2D7F ; NSM # Mn TIFINAGH CONSONANT JOINER 2DE0..2DFF ; NSM # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS -302A..302F ; NSM # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK +302A..302D ; NSM # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK 3099..309A ; NSM # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK A66F ; NSM # Mn COMBINING CYRILLIC VZMET A670..A672 ; NSM # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN -A67C..A67D ; NSM # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A674..A67D ; NSM # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A69F ; NSM # Mn COMBINING CYRILLIC LETTER IOTIFIED E A6F0..A6F1 ; NSM # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS A802 ; NSM # Mn SYLOTI NAGRI SIGN DVISVARA A806 ; NSM # Mn SYLOTI NAGRI SIGN HASANTA @@ -1712,6 +1756,8 @@ AAB2..AAB4 ; NSM # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U AAB7..AAB8 ; NSM # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA AABE..AABF ; NSM # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK AAC1 ; NSM # Mn TAI VIET TONE MAI THO +AAEC..AAED ; NSM # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAF6 ; NSM # Mn MEETEI MAYEK VIRAMA ABE5 ; NSM # Mn MEETEI MAYEK VOWEL SIGN ANAP ABE8 ; NSM # Mn MEETEI MAYEK VOWEL SIGN UNAP ABED ; NSM # Mn MEETEI MAYEK APUN IYEK @@ -1729,6 +1775,16 @@ FE20..FE26 ; NSM # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOININ 11080..11081 ; NSM # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA 110B3..110B6 ; NSM # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B9..110BA ; NSM # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +11100..11102 ; NSM # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; NSM # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112D..11134 ; NSM # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11180..11181 ; NSM # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +111B6..111BE ; NSM # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +116AB ; NSM # Mn TAKRI SIGN ANUSVARA +116AD ; NSM # Mn TAKRI VOWEL SIGN AA +116B0..116B5 ; NSM # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B7 ; NSM # Mn TAKRI SIGN NUKTA +16F8F..16F92 ; NSM # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 1D167..1D169 ; NSM # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 1D17B..1D182 ; NSM # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE 1D185..1D18B ; NSM # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE @@ -1736,13 +1792,13 @@ FE20..FE26 ; NSM # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOININ 1D242..1D244 ; NSM # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1209 +# Total code points: 1290 # ================================================ # Bidi_Class=Arabic_Letter -0604..0605 ; AL # Cn [2] <reserved-0604>..<reserved-0605> +0605 ; AL # Cn <reserved-0605> 0608 ; AL # Sm ARABIC RAY 060B ; AL # Sc AFGHANI SIGN 060D ; AL # Po ARABIC DATE SEPARATOR @@ -1764,12 +1820,18 @@ E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 06FF ; AL # Lo ARABIC LETTER HEH WITH INVERTED V 0700..070D ; AL # Po [14] SYRIAC END OF PARAGRAPH..SYRIAC HARKLEAN ASTERISCUS 070E ; AL # Cn <reserved-070E> +070F ; AL # Cf SYRIAC ABBREVIATION MARK 0710 ; AL # Lo SYRIAC LETTER ALAPH 0712..072F ; AL # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH 074B..074C ; AL # Cn [2] <reserved-074B>..<reserved-074C> 074D..07A5 ; AL # Lo [89] SYRIAC LETTER SOGDIAN ZHAIN..THAANA LETTER WAAVU 07B1 ; AL # Lo THAANA LETTER NAA 07B2..07BF ; AL # Cn [14] <reserved-07B2>..<reserved-07BF> +08A0 ; AL # Lo ARABIC LETTER BEH WITH SMALL V BELOW +08A1 ; AL # Cn <reserved-08A1> +08A2..08AC ; AL # Lo [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH +08AD..08E3 ; AL # Cn [55] <reserved-08AD>..<reserved-08E3> +08FF ; AL # Cn <reserved-08FF> FB50..FBB1 ; AL # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM FBB2..FBC1 ; AL # Sk [16] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL SMALL TAH BELOW FBC2..FBD2 ; AL # Cn [17] <reserved-FBC2>..<reserved-FBD2> @@ -1786,8 +1848,75 @@ FE70..FE74 ; AL # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISO FE75 ; AL # Cn <reserved-FE75> FE76..FEFC ; AL # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM FEFD..FEFE ; AL # Cn [2] <reserved-FEFD>..<reserved-FEFE> - -# Total code points: 1115 +1EE00..1EE03 ; AL # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE04 ; AL # Cn <reserved-1EE04> +1EE05..1EE1F ; AL # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE20 ; AL # Cn <reserved-1EE20> +1EE21..1EE22 ; AL # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE23 ; AL # Cn <reserved-1EE23> +1EE24 ; AL # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE25..1EE26 ; AL # Cn [2] <reserved-1EE25>..<reserved-1EE26> +1EE27 ; AL # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE28 ; AL # Cn <reserved-1EE28> +1EE29..1EE32 ; AL # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE33 ; AL # Cn <reserved-1EE33> +1EE34..1EE37 ; AL # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE38 ; AL # Cn <reserved-1EE38> +1EE39 ; AL # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3A ; AL # Cn <reserved-1EE3A> +1EE3B ; AL # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE3C..1EE41 ; AL # Cn [6] <reserved-1EE3C>..<reserved-1EE41> +1EE42 ; AL # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE43..1EE46 ; AL # Cn [4] <reserved-1EE43>..<reserved-1EE46> +1EE47 ; AL # Lo ARABIC MATHEMATICAL TAILED HAH +1EE48 ; AL # Cn <reserved-1EE48> +1EE49 ; AL # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4A ; AL # Cn <reserved-1EE4A> +1EE4B ; AL # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4C ; AL # Cn <reserved-1EE4C> +1EE4D..1EE4F ; AL # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE50 ; AL # Cn <reserved-1EE50> +1EE51..1EE52 ; AL # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE53 ; AL # Cn <reserved-1EE53> +1EE54 ; AL # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE55..1EE56 ; AL # Cn [2] <reserved-1EE55>..<reserved-1EE56> +1EE57 ; AL # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE58 ; AL # Cn <reserved-1EE58> +1EE59 ; AL # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5A ; AL # Cn <reserved-1EE5A> +1EE5B ; AL # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5C ; AL # Cn <reserved-1EE5C> +1EE5D ; AL # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5E ; AL # Cn <reserved-1EE5E> +1EE5F ; AL # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE60 ; AL # Cn <reserved-1EE60> +1EE61..1EE62 ; AL # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE63 ; AL # Cn <reserved-1EE63> +1EE64 ; AL # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE65..1EE66 ; AL # Cn [2] <reserved-1EE65>..<reserved-1EE66> +1EE67..1EE6A ; AL # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6B ; AL # Cn <reserved-1EE6B> +1EE6C..1EE72 ; AL # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE73 ; AL # Cn <reserved-1EE73> +1EE74..1EE77 ; AL # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE78 ; AL # Cn <reserved-1EE78> +1EE79..1EE7C ; AL # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7D ; AL # Cn <reserved-1EE7D> +1EE7E ; AL # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE7F ; AL # Cn <reserved-1EE7F> +1EE80..1EE89 ; AL # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8A ; AL # Cn <reserved-1EE8A> +1EE8B..1EE9B ; AL # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EE9C..1EEA0 ; AL # Cn [5] <reserved-1EE9C>..<reserved-1EEA0> +1EEA1..1EEA3 ; AL # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA4 ; AL # Cn <reserved-1EEA4> +1EEA5..1EEA9 ; AL # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAA ; AL # Cn <reserved-1EEAA> +1EEAB..1EEBB ; AL # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1EEBC..1EEEF ; AL # Cn [52] <reserved-1EEBC>..<reserved-1EEEF> +1EEF2..1EEFF ; AL # Cn [14] <reserved-1EEF2>..<reserved-1EEFF> + +# Total code points: 1438 # ================================================ diff --git a/lib/unicore/extracted/DBinaryProperties.txt b/lib/unicore/extracted/DBinaryProperties.txt index c5a10dcf8f..6d23c068a9 100644 --- a/lib/unicore/extracted/DBinaryProperties.txt +++ b/lib/unicore/extracted/DBinaryProperties.txt @@ -1,8 +1,8 @@ -# DerivedBinaryProperties-6.0.0.txt -# Date: 2010-05-18, 00:49:04 GMT [MD] +# DerivedBinaryProperties-6.1.0.txt +# Date: 2011-07-25, 00:54:10 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -83,7 +83,7 @@ 27C5 ; Bidi_Mirrored # Ps LEFT S-SHAPED BAG DELIMITER 27C6 ; Bidi_Mirrored # Pe RIGHT S-SHAPED BAG DELIMITER 27C8..27C9 ; Bidi_Mirrored # Sm [2] REVERSE SOLIDUS PRECEDING SUBSET..SUPERSET PRECEDING SOLIDUS -27CC ; Bidi_Mirrored # Sm LONG DIVISION +27CB..27CD ; Bidi_Mirrored # Sm [3] MATHEMATICAL RISING DIAGONAL..MATHEMATICAL FALLING DIAGONAL 27D3..27D6 ; Bidi_Mirrored # Sm [4] LOWER RIGHT CORNER WITH DOT..RIGHT OUTER JOIN 27DC..27DE ; Bidi_Mirrored # Sm [3] LEFT MULTIMAP..LONG LEFT TACK 27E2..27E5 ; Bidi_Mirrored # Sm [4] WHITE CONCAVE-SIDED DIAMOND WITH LEFTWARDS TICK..WHITE SQUARE WITH RIGHTWARDS TICK @@ -222,6 +222,6 @@ FF63 ; Bidi_Mirrored # Pe HALFWIDTH RIGHT CORNER BRACKET 1D789 ; Bidi_Mirrored # Sm MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL 1D7C3 ; Bidi_Mirrored # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL -# Total code points: 543 +# Total code points: 545 # EOF diff --git a/lib/unicore/extracted/DCombiningClass.txt b/lib/unicore/extracted/DCombiningClass.txt index f2695cf61e..33495d2c50 100644 --- a/lib/unicore/extracted/DCombiningClass.txt +++ b/lib/unicore/extracted/DCombiningClass.txt @@ -1,8 +1,8 @@ -# DerivedCombiningClass-6.0.0.txt -# Date: 2010-08-19, 00:48:04 GMT [MD] +# DerivedCombiningClass-6.1.0.txt +# Date: 2011-12-05, 16:44:07 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -51,10 +51,11 @@ 00A0 ; 0 # Zs NO-BREAK SPACE 00A1 ; 0 # Po INVERTED EXCLAMATION MARK 00A2..00A5 ; 0 # Sc [4] CENT SIGN..YEN SIGN -00A6..00A7 ; 0 # So [2] BROKEN BAR..SECTION SIGN +00A6 ; 0 # So BROKEN BAR +00A7 ; 0 # Po SECTION SIGN 00A8 ; 0 # Sk DIAERESIS 00A9 ; 0 # So COPYRIGHT SIGN -00AA ; 0 # L& FEMININE ORDINAL INDICATOR +00AA ; 0 # Lo FEMININE ORDINAL INDICATOR 00AB ; 0 # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 00AC ; 0 # Sm NOT SIGN 00AD ; 0 # Cf SOFT HYPHEN @@ -65,11 +66,10 @@ 00B2..00B3 ; 0 # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE 00B4 ; 0 # Sk ACUTE ACCENT 00B5 ; 0 # L& MICRO SIGN -00B6 ; 0 # So PILCROW SIGN -00B7 ; 0 # Po MIDDLE DOT +00B6..00B7 ; 0 # Po [2] PILCROW SIGN..MIDDLE DOT 00B8 ; 0 # Sk CEDILLA 00B9 ; 0 # No SUPERSCRIPT ONE -00BA ; 0 # L& MASCULINE ORDINAL INDICATOR +00BA ; 0 # Lo MASCULINE ORDINAL INDICATOR 00BB ; 0 # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 00BC..00BE ; 0 # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS 00BF ; 0 # Po INVERTED QUESTION MARK @@ -120,6 +120,7 @@ 0561..0587 ; 0 # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN 0589 ; 0 # Po ARMENIAN FULL STOP 058A ; 0 # Pd ARMENIAN HYPHEN +058F ; 0 # Sc ARMENIAN DRAM SIGN 05BE ; 0 # Pd HEBREW PUNCTUATION MAQAF 05C0 ; 0 # Po HEBREW PUNCTUATION PASEQ 05C3 ; 0 # Po HEBREW PUNCTUATION SOF PASUQ @@ -127,7 +128,7 @@ 05D0..05EA ; 0 # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV 05F0..05F2 ; 0 # Lo [3] HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD 05F3..05F4 ; 0 # Po [2] HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM -0600..0603 ; 0 # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA +0600..0604 ; 0 # Cf [5] ARABIC NUMBER SIGN..ARABIC SIGN SAMVAT 0606..0608 ; 0 # Sm [3] ARABIC-INDIC CUBE ROOT..ARABIC RAY 0609..060A ; 0 # Po [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN 060B ; 0 # Sc AFGHANI SIGN @@ -173,6 +174,8 @@ 0830..083E ; 0 # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU 0840..0858 ; 0 # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 085E ; 0 # Po MANDAIC PUNCTUATION +08A0 ; 0 # Lo ARABIC LETTER BEH WITH SMALL V BELOW +08A2..08AC ; 0 # Lo [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH 0900..0902 ; 0 # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 0903 ; 0 # Mc DEVANAGARI SIGN VISARGA 0904..0939 ; 0 # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA @@ -255,6 +258,7 @@ 0AE0..0AE1 ; 0 # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL 0AE2..0AE3 ; 0 # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL 0AE6..0AEF ; 0 # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE +0AF0 ; 0 # Po GUJARATI ABBREVIATION SIGN 0AF1 ; 0 # Sc GUJARATI RUPEE SIGN 0B01 ; 0 # Mn ORIYA SIGN CANDRABINDU 0B02..0B03 ; 0 # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA @@ -402,11 +406,13 @@ 0EC6 ; 0 # Lm LAO KO LA 0ECC..0ECD ; 0 # Mn [2] LAO CANCELLATION MARK..LAO NIGGAHITA 0ED0..0ED9 ; 0 # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE -0EDC..0EDD ; 0 # Lo [2] LAO HO NO..LAO HO MO +0EDC..0EDF ; 0 # Lo [4] LAO HO NO..LAO LETTER KHMU NYO 0F00 ; 0 # Lo TIBETAN SYLLABLE OM 0F01..0F03 ; 0 # So [3] TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA 0F04..0F12 ; 0 # Po [15] TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK RGYA GRAM SHAD -0F13..0F17 ; 0 # So [5] TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS +0F13 ; 0 # So TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN +0F14 ; 0 # Po TIBETAN MARK GTER TSHEG +0F15..0F17 ; 0 # So [3] TIBETAN LOGOTYPE SIGN CHAD RTAGS..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS 0F1A..0F1F ; 0 # So [6] TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RDEL DKAR RDEL NAG 0F20..0F29 ; 0 # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE 0F2A..0F33 ; 0 # No [10] TIBETAN DIGIT HALF ONE..TIBETAN DIGIT HALF ZERO @@ -469,10 +475,12 @@ 109D ; 0 # Mn MYANMAR VOWEL SIGN AITON AI 109E..109F ; 0 # So [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION 10A0..10C5 ; 0 # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; 0 # L& GEORGIAN CAPITAL LETTER YN +10CD ; 0 # L& GEORGIAN CAPITAL LETTER AEN 10D0..10FA ; 0 # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN 10FB ; 0 # Po GEORGIAN PARAGRAPH SEPARATOR 10FC ; 0 # Lm MODIFIER LETTER GEORGIAN NAR -1100..1248 ; 0 # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +10FD..1248 ; 0 # Lo [332] GEORGIAN LETTER AEN..ETHIOPIC SYLLABLE QWA 124A..124D ; 0 # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE 1250..1256 ; 0 # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO 1258 ; 0 # Lo ETHIOPIC SYLLABLE QHWA @@ -488,8 +496,7 @@ 12D8..1310 ; 0 # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA 1312..1315 ; 0 # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE 1318..135A ; 0 # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA -1360 ; 0 # So ETHIOPIC SECTION MARK -1361..1368 ; 0 # Po [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR +1360..1368 ; 0 # Po [9] ETHIOPIC SECTION MARK..ETHIOPIC PARAGRAPH SEPARATOR 1369..137C ; 0 # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND 1380..138F ; 0 # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE 1390..1399 ; 0 # So [10] ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT @@ -517,7 +524,7 @@ 176E..1770 ; 0 # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA 1772..1773 ; 0 # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U 1780..17B3 ; 0 # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU -17B4..17B5 ; 0 # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B4..17B5 ; 0 # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 17B6 ; 0 # Mc KHMER VOWEL SIGN AA 17B7..17BD ; 0 # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA 17BE..17C5 ; 0 # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU @@ -605,9 +612,10 @@ 1BA2..1BA5 ; 0 # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU 1BA6..1BA7 ; 0 # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG 1BA8..1BA9 ; 0 # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAC..1BAD ; 0 # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA 1BAE..1BAF ; 0 # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA 1BB0..1BB9 ; 0 # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE -1BC0..1BE5 ; 0 # Lo [38] BATAK LETTER A..BATAK LETTER U +1BBA..1BE5 ; 0 # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U 1BE7 ; 0 # Mc BATAK VOWEL SIGN E 1BE8..1BE9 ; 0 # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE 1BEA..1BEC ; 0 # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O @@ -627,14 +635,16 @@ 1C5A..1C77 ; 0 # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; 0 # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD 1C7E..1C7F ; 0 # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD +1CC0..1CC7 ; 0 # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA 1CD3 ; 0 # Po VEDIC SIGN NIHSHVASA 1CE1 ; 0 # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA 1CE9..1CEC ; 0 # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL 1CEE..1CF1 ; 0 # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA -1CF2 ; 0 # Mc VEDIC SIGN ARDHAVISARGA +1CF2..1CF3 ; 0 # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF5..1CF6 ; 0 # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 1D00..1D2B ; 0 # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D61 ; 0 # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D77 ; 0 # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D2C..1D6A ; 0 # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; 0 # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; 0 # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; 0 # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; 0 # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA @@ -815,9 +825,7 @@ 27C0..27C4 ; 0 # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET 27C5 ; 0 # Ps LEFT S-SHAPED BAG DELIMITER 27C6 ; 0 # Pe RIGHT S-SHAPED BAG DELIMITER -27C7..27CA ; 0 # Sm [4] OR WITH DOT INSIDE..VERTICAL BAR WITH HORIZONTAL STROKE -27CC ; 0 # Sm LONG DIVISION -27CE..27E5 ; 0 # Sm [24] SQUARED LOGICAL AND..WHITE SQUARE WITH RIGHTWARDS TICK +27C7..27E5 ; 0 # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK 27E6 ; 0 # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET 27E7 ; 0 # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET 27E8 ; 0 # Ps MATHEMATICAL LEFT ANGLE BRACKET @@ -869,16 +877,19 @@ 2B50..2B59 ; 0 # So [10] WHITE MEDIUM STAR..HEAVY CIRCLED SALTIRE 2C00..2C2E ; 0 # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 2C30..2C5E ; 0 # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE -2C60..2C7C ; 0 # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; 0 # Lm MODIFIER LETTER CAPITAL V +2C60..2C7B ; 0 # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; 0 # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; 0 # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI 2CE5..2CEA ; 0 # So [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA 2CEB..2CEE ; 0 # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; 0 # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2CF9..2CFC ; 0 # Po [4] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN VERSE DIVIDER 2CFD ; 0 # No COPTIC FRACTION ONE HALF 2CFE..2CFF ; 0 # Po [2] COPTIC FULL STOP..COPTIC MORPHOLOGICAL DIVIDER 2D00..2D25 ; 0 # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE -2D30..2D65 ; 0 # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D27 ; 0 # L& GEORGIAN SMALL LETTER YN +2D2D ; 0 # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; 0 # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO 2D6F ; 0 # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D70 ; 0 # Po TIFINAGH SEPARATOR MARK 2D80..2D96 ; 0 # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE @@ -921,7 +932,8 @@ 2E29 ; 0 # Pe RIGHT DOUBLE PARENTHESIS 2E2A..2E2E ; 0 # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK 2E2F ; 0 # Lm VERTICAL TILDE -2E30..2E31 ; 0 # Po [2] RING POINT..WORD SEPARATOR MIDDLE DOT +2E30..2E39 ; 0 # Po [10] RING POINT..TOP HALF SECTION SIGN +2E3A..2E3B ; 0 # Pd [2] TWO-EM DASH..THREE-EM DASH 2E80..2E99 ; 0 # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP 2E9B..2EF3 ; 0 # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE 2F00..2FD5 ; 0 # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE @@ -983,7 +995,9 @@ 31F0..31FF ; 0 # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3200..321E ; 0 # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU 3220..3229 ; 0 # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN -322A..3250 ; 0 # So [39] PARENTHESIZED IDEOGRAPH MOON..PARTNERSHIP SIGN +322A..3247 ; 0 # So [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO +3248..324F ; 0 # No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE +3250 ; 0 # So PARTNERSHIP SIGN 3251..325F ; 0 # No [15] CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE 3260..327F ; 0 # So [32] CIRCLED HANGUL KIYEOK..KOREAN STANDARD SYMBOL 3280..3289 ; 0 # No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN @@ -993,7 +1007,7 @@ 3300..33FF ; 0 # So [256] SQUARE APAATO..SQUARE GAL 3400..4DB5 ; 0 # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 4DC0..4DFF ; 0 # So [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION -4E00..9FCB ; 0 # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB +4E00..9FCC ; 0 # Lo [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC A000..A014 ; 0 # Lo [21] YI SYLLABLE IT..YI SYLLABLE E A015 ; 0 # Lm YI SYLLABLE WU A016..A48C ; 0 # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR @@ -1026,8 +1040,9 @@ A771..A787 ; 0 # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR A788 ; 0 # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; 0 # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; 0 # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A791 ; 0 # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER -A7A0..A7A9 ; 0 # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE +A790..A793 ; 0 # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR +A7A0..A7AA ; 0 # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK +A7F8..A7F9 ; 0 # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; 0 # L& LATIN LETTER SMALL CAPITAL TURNED M A7FB..A801 ; 0 # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I A802 ; 0 # Mn SYLOTI NAGRI SIGN DVISVARA @@ -1102,6 +1117,14 @@ AAC2 ; 0 # Lo TAI VIET TONE MAI SONG AADB..AADC ; 0 # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG AADD ; 0 # Lm TAI VIET SYMBOL SAM AADE..AADF ; 0 # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI +AAE0..AAEA ; 0 # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAEB ; 0 # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; 0 # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; 0 # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF0..AAF1 ; 0 # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM +AAF2 ; 0 # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; 0 # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF5 ; 0 # Mc MEETEI MAYEK VOWEL SIGN VISARGA AB01..AB06 ; 0 # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO AB09..AB0E ; 0 # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO AB11..AB16 ; 0 # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO @@ -1120,8 +1143,7 @@ AC00..D7A3 ; 0 # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH D7B0..D7C6 ; 0 # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E D7CB..D7FB ; 0 # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH E000..F8FF ; 0 # Co [6400] <private-use-E000>..<private-use-F8FF> -F900..FA2D ; 0 # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; 0 # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +F900..FA6D ; 0 # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; 0 # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB00..FB06 ; 0 # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; 0 # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -1249,8 +1271,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1003F..1004D ; 0 # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO 10050..1005D ; 0 # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 10080..100FA ; 0 # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 -10100..10101 ; 0 # Po [2] AEGEAN WORD SEPARATOR LINE..AEGEAN WORD SEPARATOR DOT -10102 ; 0 # So AEGEAN CHECK MARK +10100..10102 ; 0 # Po [3] AEGEAN WORD SEPARATOR LINE..AEGEAN CHECK MARK 10107..10133 ; 0 # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND 10137..1013F ; 0 # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT 10140..10174 ; 0 # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS @@ -1289,6 +1310,8 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1091F ; 0 # Po PHOENICIAN WORD SEPARATOR 10920..10939 ; 0 # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C 1093F ; 0 # Po LYDIAN TRIANGULAR MARK +10980..109B7 ; 0 # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; 0 # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; 0 # Lo KHAROSHTHI LETTER A 10A01..10A03 ; 0 # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R 10A05..10A06 ; 0 # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O @@ -1327,11 +1350,40 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 110BB..110BC ; 0 # Po [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN 110BD ; 0 # Cf KAITHI NUMBER SIGN 110BE..110C1 ; 0 # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +110D0..110E8 ; 0 # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +110F0..110F9 ; 0 # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11103..11126 ; 0 # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11127..1112B ; 0 # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; 0 # Mc CHAKMA VOWEL SIGN E +1112D..11132 ; 0 # Mn [6] CHAKMA VOWEL SIGN AI..CHAKMA AU MARK +11136..1113F ; 0 # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +11140..11143 ; 0 # Po [4] CHAKMA SECTION MARK..CHAKMA QUESTION MARK +11180..11181 ; 0 # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; 0 # Mc SHARADA SIGN VISARGA +11183..111B2 ; 0 # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111B3..111B5 ; 0 # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; 0 # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF ; 0 # Mc SHARADA VOWEL SIGN AU +111C1..111C4 ; 0 # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111C5..111C8 ; 0 # Po [4] SHARADA DANDA..SHARADA SEPARATOR +111D0..111D9 ; 0 # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +11680..116AA ; 0 # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116AB ; 0 # Mn TAKRI SIGN ANUSVARA +116AC ; 0 # Mc TAKRI SIGN VISARGA +116AD ; 0 # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; 0 # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; 0 # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116C0..116C9 ; 0 # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 12000..1236E ; 0 # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM 12400..12462 ; 0 # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER 12470..12473 ; 0 # Po [4] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON 13000..1342E ; 0 # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 16800..16A38 ; 0 # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16F00..16F44 ; 0 # Lo [69] MIAO LETTER PA..MIAO LETTER HHA +16F50 ; 0 # Lo MIAO LETTER NASALIZATION +16F51..16F7E ; 0 # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG +16F8F..16F92 ; 0 # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16F93..16F9F ; 0 # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 1B000..1B001 ; 0 # Lo [2] KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE 1D000..1D0F5 ; 0 # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO 1D100..1D126 ; 0 # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 @@ -1386,6 +1438,40 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1D7C3 ; 0 # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL 1D7C4..1D7CB ; 0 # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1D7CE..1D7FF ; 0 # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1EE00..1EE03 ; 0 # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; 0 # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; 0 # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; 0 # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; 0 # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; 0 # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; 0 # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; 0 # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; 0 # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; 0 # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; 0 # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; 0 # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; 0 # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; 0 # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; 0 # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; 0 # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; 0 # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; 0 # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; 0 # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; 0 # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; 0 # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; 0 # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; 0 # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; 0 # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; 0 # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; 0 # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; 0 # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; 0 # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; 0 # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; 0 # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; 0 # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; 0 # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; 0 # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1EEF0..1EEF1 ; 0 # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL 1F000..1F02B ; 0 # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK 1F030..1F093 ; 0 # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06 1F0A0..1F0AE ; 0 # So [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES @@ -1394,7 +1480,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1F0D1..1F0DF ; 0 # So [15] PLAYING CARD ACE OF CLUBS..PLAYING CARD WHITE JOKER 1F100..1F10A ; 0 # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA 1F110..1F12E ; 0 # So [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ -1F130..1F169 ; 0 # So [58] SQUARED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F130..1F16B ; 0 # So [60] SQUARED LATIN CAPITAL LETTER A..RAISED MD SIGN 1F170..1F19A ; 0 # So [43] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VS 1F1E6..1F202 ; 0 # So [29] REGIONAL INDICATOR SYMBOL LETTER A..SQUARED KATAKANA SA 1F210..1F23A ; 0 # So [43] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-55B6 @@ -1412,19 +1498,9 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1F442..1F4F7 ; 0 # So [182] EAR..CAMERA 1F4F9..1F4FC ; 0 # So [4] VIDEO CAMERA..VIDEOCASSETTE 1F500..1F53D ; 0 # So [62] TWISTED RIGHTWARDS ARROWS..DOWN-POINTING SMALL RED TRIANGLE +1F540..1F543 ; 0 # So [4] CIRCLED CROSS POMMEE..NOTCHED LEFT SEMICIRCLE WITH THREE DOTS 1F550..1F567 ; 0 # So [24] CLOCK FACE ONE OCLOCK..CLOCK FACE TWELVE-THIRTY -1F5FB..1F5FF ; 0 # So [5] MOUNT FUJI..MOYAI -1F601..1F610 ; 0 # So [16] GRINNING FACE WITH SMILING EYES..NEUTRAL FACE -1F612..1F614 ; 0 # So [3] UNAMUSED FACE..PENSIVE FACE -1F616 ; 0 # So CONFOUNDED FACE -1F618 ; 0 # So FACE THROWING A KISS -1F61A ; 0 # So KISSING FACE WITH CLOSED EYES -1F61C..1F61E ; 0 # So [3] FACE WITH STUCK-OUT TONGUE AND WINKING EYE..DISAPPOINTED FACE -1F620..1F625 ; 0 # So [6] ANGRY FACE..DISAPPOINTED BUT RELIEVED FACE -1F628..1F62B ; 0 # So [4] FEARFUL FACE..TIRED FACE -1F62D ; 0 # So LOUDLY CRYING FACE -1F630..1F633 ; 0 # So [4] FACE WITH OPEN MOUTH AND COLD SWEAT..FLUSHED FACE -1F635..1F640 ; 0 # So [12] DIZZY FACE..WEARY CAT FACE +1F5FB..1F640 ; 0 # So [70] MOUNT FUJI..WEARY CAT FACE 1F645..1F64F ; 0 # So [11] FACE WITH NO GOOD GESTURE..PERSON WITH FOLDED HANDS 1F680..1F6C5 ; 0 # So [70] ROCKET..LEFT LUGGAGE 1F700..1F773 ; 0 # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE @@ -1438,8 +1514,8 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] <private-use-F0000>..<private-use-FFFFD> 100000..10FFFD; 0 # Co [65534] <private-use-100000>..<private-use-10FFFD> -# The above property value applies to 867195 code points not listed here. -# Total code points: 1113506 +# The above property value applies to 866463 code points not listed here. +# Total code points: 1113459 # ================================================ @@ -1473,8 +1549,9 @@ F0000..FFFFD ; 0 # Co [65534] <private-use-F0000>..<private-use-FFFFD> 1C37 ; 7 # Mn LEPCHA SIGN NUKTA A9B3 ; 7 # Mn JAVANESE SIGN CECAK TELU 110BA ; 7 # Mn KAITHI SIGN NUKTA +116B7 ; 7 # Mn TAKRI SIGN NUKTA -# Total code points: 12 +# Total code points: 13 # ================================================ @@ -1507,22 +1584,27 @@ A9B3 ; 7 # Mn JAVANESE SIGN CECAK TELU 1A60 ; 9 # Mn TAI THAM SIGN SAKOT 1B44 ; 9 # Mc BALINESE ADEG ADEG 1BAA ; 9 # Mc SUNDANESE SIGN PAMAAEH +1BAB ; 9 # Mn SUNDANESE SIGN VIRAMA 1BF2..1BF3 ; 9 # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN 2D7F ; 9 # Mn TIFINAGH CONSONANT JOINER A806 ; 9 # Mn SYLOTI NAGRI SIGN HASANTA A8C4 ; 9 # Mn SAURASHTRA SIGN VIRAMA A953 ; 9 # Mc REJANG VIRAMA A9C0 ; 9 # Mc JAVANESE PANGKON +AAF6 ; 9 # Mn MEETEI MAYEK VIRAMA ABED ; 9 # Mn MEETEI MAYEK APUN IYEK 10A3F ; 9 # Mn KHAROSHTHI VIRAMA 11046 ; 9 # Mn BRAHMI VIRAMA 110B9 ; 9 # Mn KAITHI SIGN VIRAMA +11133..11134 ; 9 # Mn [2] CHAKMA VIRAMA..CHAKMA MAAYYAA +111C0 ; 9 # Mc SHARADA SIGN VIRAMA +116B6 ; 9 # Mc TAKRI SIGN VIRAMA -# Total code points: 31 +# Total code points: 37 # ================================================ -# Canonical_Combining_Class=10 +# Canonical_Combining_Class=CCC10 05B0 ; 10 # Mn HEBREW POINT SHEVA @@ -1530,7 +1612,7 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK # ================================================ -# Canonical_Combining_Class=11 +# Canonical_Combining_Class=CCC11 05B1 ; 11 # Mn HEBREW POINT HATAF SEGOL @@ -1538,7 +1620,7 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK # ================================================ -# Canonical_Combining_Class=12 +# Canonical_Combining_Class=CCC12 05B2 ; 12 # Mn HEBREW POINT HATAF PATAH @@ -1546,7 +1628,7 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK # ================================================ -# Canonical_Combining_Class=13 +# Canonical_Combining_Class=CCC13 05B3 ; 13 # Mn HEBREW POINT HATAF QAMATS @@ -1554,7 +1636,7 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK # ================================================ -# Canonical_Combining_Class=14 +# Canonical_Combining_Class=CCC14 05B4 ; 14 # Mn HEBREW POINT HIRIQ @@ -1562,7 +1644,7 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK # ================================================ -# Canonical_Combining_Class=15 +# Canonical_Combining_Class=CCC15 05B5 ; 15 # Mn HEBREW POINT TSERE @@ -1570,7 +1652,7 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK # ================================================ -# Canonical_Combining_Class=16 +# Canonical_Combining_Class=CCC16 05B6 ; 16 # Mn HEBREW POINT SEGOL @@ -1578,7 +1660,7 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK # ================================================ -# Canonical_Combining_Class=17 +# Canonical_Combining_Class=CCC17 05B7 ; 17 # Mn HEBREW POINT PATAH @@ -1586,7 +1668,7 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK # ================================================ -# Canonical_Combining_Class=18 +# Canonical_Combining_Class=CCC18 05B8 ; 18 # Mn HEBREW POINT QAMATS 05C7 ; 18 # Mn HEBREW POINT QAMATS QATAN @@ -1595,7 +1677,7 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK # ================================================ -# Canonical_Combining_Class=19 +# Canonical_Combining_Class=CCC19 05B9..05BA ; 19 # Mn [2] HEBREW POINT HOLAM..HEBREW POINT HOLAM HASER FOR VAV @@ -1603,7 +1685,7 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK # ================================================ -# Canonical_Combining_Class=20 +# Canonical_Combining_Class=CCC20 05BB ; 20 # Mn HEBREW POINT QUBUTS @@ -1611,7 +1693,7 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK # ================================================ -# Canonical_Combining_Class=21 +# Canonical_Combining_Class=CCC21 05BC ; 21 # Mn HEBREW POINT DAGESH OR MAPIQ @@ -1619,7 +1701,7 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK # ================================================ -# Canonical_Combining_Class=22 +# Canonical_Combining_Class=CCC22 05BD ; 22 # Mn HEBREW POINT METEG @@ -1627,7 +1709,7 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK # ================================================ -# Canonical_Combining_Class=23 +# Canonical_Combining_Class=CCC23 05BF ; 23 # Mn HEBREW POINT RAFE @@ -1635,7 +1717,7 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK # ================================================ -# Canonical_Combining_Class=24 +# Canonical_Combining_Class=CCC24 05C1 ; 24 # Mn HEBREW POINT SHIN DOT @@ -1643,7 +1725,7 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK # ================================================ -# Canonical_Combining_Class=25 +# Canonical_Combining_Class=CCC25 05C2 ; 25 # Mn HEBREW POINT SIN DOT @@ -1651,7 +1733,7 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK # ================================================ -# Canonical_Combining_Class=26 +# Canonical_Combining_Class=CCC26 FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA @@ -1659,31 +1741,34 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA # ================================================ -# Canonical_Combining_Class=27 +# Canonical_Combining_Class=CCC27 064B ; 27 # Mn ARABIC FATHATAN +08F0 ; 27 # Mn ARABIC OPEN FATHATAN -# Total code points: 1 +# Total code points: 2 # ================================================ -# Canonical_Combining_Class=28 +# Canonical_Combining_Class=CCC28 064C ; 28 # Mn ARABIC DAMMATAN +08F1 ; 28 # Mn ARABIC OPEN DAMMATAN -# Total code points: 1 +# Total code points: 2 # ================================================ -# Canonical_Combining_Class=29 +# Canonical_Combining_Class=CCC29 064D ; 29 # Mn ARABIC KASRATAN +08F2 ; 29 # Mn ARABIC OPEN KASRATAN -# Total code points: 1 +# Total code points: 2 # ================================================ -# Canonical_Combining_Class=30 +# Canonical_Combining_Class=CCC30 0618 ; 30 # Mn ARABIC SMALL FATHA 064E ; 30 # Mn ARABIC FATHA @@ -1692,7 +1777,7 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA # ================================================ -# Canonical_Combining_Class=31 +# Canonical_Combining_Class=CCC31 0619 ; 31 # Mn ARABIC SMALL DAMMA 064F ; 31 # Mn ARABIC DAMMA @@ -1701,7 +1786,7 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA # ================================================ -# Canonical_Combining_Class=32 +# Canonical_Combining_Class=CCC32 061A ; 32 # Mn ARABIC SMALL KASRA 0650 ; 32 # Mn ARABIC KASRA @@ -1710,7 +1795,7 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA # ================================================ -# Canonical_Combining_Class=33 +# Canonical_Combining_Class=CCC33 0651 ; 33 # Mn ARABIC SHADDA @@ -1718,7 +1803,7 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA # ================================================ -# Canonical_Combining_Class=34 +# Canonical_Combining_Class=CCC34 0652 ; 34 # Mn ARABIC SUKUN @@ -1726,7 +1811,7 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA # ================================================ -# Canonical_Combining_Class=35 +# Canonical_Combining_Class=CCC35 0670 ; 35 # Mn ARABIC LETTER SUPERSCRIPT ALEF @@ -1734,7 +1819,7 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA # ================================================ -# Canonical_Combining_Class=36 +# Canonical_Combining_Class=CCC36 0711 ; 36 # Mn SYRIAC LETTER SUPERSCRIPT ALAPH @@ -1742,7 +1827,7 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA # ================================================ -# Canonical_Combining_Class=84 +# Canonical_Combining_Class=CCC84 0C55 ; 84 # Mn TELUGU LENGTH MARK @@ -1750,7 +1835,7 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA # ================================================ -# Canonical_Combining_Class=91 +# Canonical_Combining_Class=CCC91 0C56 ; 91 # Mn TELUGU AI LENGTH MARK @@ -1758,7 +1843,7 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA # ================================================ -# Canonical_Combining_Class=103 +# Canonical_Combining_Class=CCC103 0E38..0E39 ; 103 # Mn [2] THAI CHARACTER SARA U..THAI CHARACTER SARA UU @@ -1766,7 +1851,7 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA # ================================================ -# Canonical_Combining_Class=107 +# Canonical_Combining_Class=CCC107 0E48..0E4B ; 107 # Mn [4] THAI CHARACTER MAI EK..THAI CHARACTER MAI CHATTAWA @@ -1774,7 +1859,7 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA # ================================================ -# Canonical_Combining_Class=118 +# Canonical_Combining_Class=CCC118 0EB8..0EB9 ; 118 # Mn [2] LAO VOWEL SIGN U..LAO VOWEL SIGN UU @@ -1782,7 +1867,7 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA # ================================================ -# Canonical_Combining_Class=122 +# Canonical_Combining_Class=CCC122 0EC8..0ECB ; 122 # Mn [4] LAO TONE MAI EK..LAO TONE MAI CATAWA @@ -1790,7 +1875,7 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA # ================================================ -# Canonical_Combining_Class=129 +# Canonical_Combining_Class=CCC129 0F71 ; 129 # Mn TIBETAN VOWEL SIGN AA @@ -1798,7 +1883,7 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA # ================================================ -# Canonical_Combining_Class=130 +# Canonical_Combining_Class=CCC130 0F72 ; 130 # Mn TIBETAN VOWEL SIGN I 0F7A..0F7D ; 130 # Mn [4] TIBETAN VOWEL SIGN E..TIBETAN VOWEL SIGN OO @@ -1808,7 +1893,7 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA # ================================================ -# Canonical_Combining_Class=132 +# Canonical_Combining_Class=CCC133 0F74 ; 132 # Mn TIBETAN VOWEL SIGN U @@ -1887,6 +1972,11 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA 0748 ; 220 # Mn SYRIAC OBLIQUE LINE BELOW 07F2 ; 220 # Mn NKO COMBINING NASALIZATION MARK 0859..085B ; 220 # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +08E6 ; 220 # Mn ARABIC CURLY KASRA +08E9 ; 220 # Mn ARABIC CURLY KASRATAN +08ED..08EF ; 220 # Mn [3] ARABIC TONE ONE DOT BELOW..ARABIC TONE LOOP BELOW +08F6 ; 220 # Mn ARABIC KASRA WITH DOT BELOW +08F9..08FA ; 220 # Mn [2] ARABIC LEFT ARROWHEAD BELOW..ARABIC RIGHT ARROWHEAD BELOW 0952 ; 220 # Mn DEVANAGARI STRESS SIGN ANUDATTA 0F18..0F19 ; 220 # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS 0F35 ; 220 # Mn TIBETAN MARK NGAS BZUNG NYI ZLA @@ -1915,7 +2005,7 @@ AAB4 ; 220 # Mn TAI VIET VOWEL U 1D17B..1D182 ; 220 # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE 1D18A..1D18B ; 220 # Mn [2] MUSICAL SYMBOL COMBINING DOUBLE TONGUE..MUSICAL SYMBOL COMBINING TRIPLE TONGUE -# Total code points: 121 +# Total code points: 129 # ================================================ @@ -1932,7 +2022,7 @@ AAB4 ; 220 # Mn TAI VIET VOWEL U # Canonical_Combining_Class=Left -302E..302F ; 224 # Mn [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK +302E..302F ; 224 # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK # Total code points: 2 @@ -1999,6 +2089,12 @@ AAB4 ; 220 # Mn TAI VIET VOWEL U 081B..0823 ; 230 # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A 0825..0827 ; 230 # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; 230 # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA +08E4..08E5 ; 230 # Mn [2] ARABIC CURLY FATHA..ARABIC CURLY DAMMA +08E7..08E8 ; 230 # Mn [2] ARABIC CURLY FATHATAN..ARABIC CURLY DAMMATAN +08EA..08EC ; 230 # Mn [3] ARABIC TONE ONE DOT ABOVE..ARABIC TONE LOOP ABOVE +08F3..08F5 ; 230 # Mn [3] ARABIC SMALL HIGH WAW..ARABIC FATHA WITH DOT ABOVE +08F7..08F8 ; 230 # Mn [2] ARABIC LEFT ARROWHEAD ABOVE..ARABIC RIGHT ARROWHEAD ABOVE +08FB..08FE ; 230 # Mn [4] ARABIC DOUBLE RIGHT ARROWHEAD ABOVE..ARABIC DAMMA WITH DOT 0951 ; 230 # Mn DEVANAGARI STRESS SIGN UDATTA 0953..0954 ; 230 # Mn [2] DEVANAGARI GRAVE ACCENT..DEVANAGARI ACUTE ACCENT 0F82..0F83 ; 230 # Mn [2] TIBETAN SIGN NYI ZLA NAA DA..TIBETAN SIGN SNA LDAN @@ -2013,6 +2109,7 @@ AAB4 ; 220 # Mn TAI VIET VOWEL U 1CD0..1CD2 ; 230 # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA 1CDA..1CDB ; 230 # Mn [2] VEDIC TONE DOUBLE SVARITA..VEDIC TONE TRIPLE SVARITA 1CE0 ; 230 # Mn VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA +1CF4 ; 230 # Mn VEDIC TONE CANDRA ABOVE 1DC0..1DC1 ; 230 # Mn [2] COMBINING DOTTED GRAVE ACCENT..COMBINING DOTTED ACUTE ACCENT 1DC3..1DC9 ; 230 # Mn [7] COMBINING SUSPENSION MARK..COMBINING ACUTE-GRAVE-ACUTE 1DCB..1DCC ; 230 # Mn [2] COMBINING BREVE-MACRON..COMBINING MACRON-BREVE @@ -2028,7 +2125,8 @@ AAB4 ; 220 # Mn TAI VIET VOWEL U 2CEF..2CF1 ; 230 # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS 2DE0..2DFF ; 230 # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS A66F ; 230 # Mn COMBINING CYRILLIC VZMET -A67C..A67D ; 230 # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A674..A67D ; 230 # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A69F ; 230 # Mn COMBINING CYRILLIC LETTER IOTIFIED E A6F0..A6F1 ; 230 # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS A8E0..A8F1 ; 230 # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA AAB0 ; 230 # Mn TAI VIET MAI KANG @@ -2039,11 +2137,12 @@ AAC1 ; 230 # Mn TAI VIET TONE MAI THO FE20..FE26 ; 230 # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON 10A0F ; 230 # Mn KHAROSHTHI SIGN VISARGA 10A38 ; 230 # Mn KHAROSHTHI SIGN BAR ABOVE +11100..11102 ; 230 # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA 1D185..1D189 ; 230 # Mn [5] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING BEND 1D1AA..1D1AD ; 230 # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO 1D242..1D244 ; 230 # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME -# Total code points: 320 +# Total code points: 349 # ================================================ diff --git a/lib/unicore/extracted/DDecompositionType.txt b/lib/unicore/extracted/DDecompositionType.txt index ed03af79aa..7a40724f0c 100644 --- a/lib/unicore/extracted/DDecompositionType.txt +++ b/lib/unicore/extracted/DDecompositionType.txt @@ -1,8 +1,8 @@ -# DerivedDecompositionType-6.0.0.txt -# Date: 2010-05-18, 00:49:11 GMT [MD] +# DerivedDecompositionType-6.1.0.txt +# Date: 2011-07-25, 00:54:13 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -238,8 +238,7 @@ FA15..FA1E ; Canonical # Lo [10] CJK COMPATIBILITY IDEOGRAPH-FA15..CJK COMPA FA20 ; Canonical # Lo CJK COMPATIBILITY IDEOGRAPH-FA20 FA22 ; Canonical # Lo CJK COMPATIBILITY IDEOGRAPH-FA22 FA25..FA26 ; Canonical # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26 -FA2A..FA2D ; Canonical # Lo [4] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; Canonical # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA2A..FA6D ; Canonical # Lo [68] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; Canonical # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB1D ; Canonical # Lo HEBREW LETTER YOD WITH HIRIQ FB1F ; Canonical # Lo HEBREW LIGATURE YIDDISH YOD YOD PATAH @@ -252,11 +251,12 @@ FB46..FB4E ; Canonical # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBREW LET 1109A ; Canonical # Lo KAITHI LETTER DDDHA 1109C ; Canonical # Lo KAITHI LETTER RHA 110AB ; Canonical # Lo KAITHI LETTER VA +1112E..1112F ; Canonical # Mn [2] CHAKMA VOWEL SIGN O..CHAKMA VOWEL SIGN AU 1D15E..1D164 ; Canonical # So [7] MUSICAL SYMBOL HALF NOTE..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE 1D1BB..1D1C0 ; Canonical # So [6] MUSICAL SYMBOL MINIMA..MUSICAL SYMBOL FUSA BLACK 2F800..2FA1D ; Canonical # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 13221 +# Total code points: 13225 # ================================================ @@ -400,8 +400,41 @@ FB29 ; Font # Sm HEBREW LETTER ALTERNATIVE PLUS SIGN 1D7C3 ; Font # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL 1D7C4..1D7CB ; Font # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1D7CE..1D7FF ; Font # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE - -# Total code points: 1043 +1EE00..1EE03 ; Font # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; Font # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; Font # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; Font # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; Font # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; Font # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; Font # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; Font # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; Font # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; Font # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; Font # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; Font # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; Font # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; Font # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; Font # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; Font # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; Font # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; Font # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; Font # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; Font # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; Font # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; Font # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; Font # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; Font # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; Font # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; Font # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; Font # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; Font # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; Font # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; Font # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; Font # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; Font # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; Font # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN + +# Total code points: 1184 # ================================================ @@ -793,10 +826,10 @@ FEFB ; Isolated # Lo ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM # ================================================ -00AA ; Super # L& FEMININE ORDINAL INDICATOR +00AA ; Super # Lo FEMININE ORDINAL INDICATOR 00B2..00B3 ; Super # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE 00B9 ; Super # No SUPERSCRIPT ONE -00BA ; Super # L& MASCULINE ORDINAL INDICATOR +00BA ; Super # Lo MASCULINE ORDINAL INDICATOR 02B0..02B8 ; Super # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y 02E0..02E4 ; Super # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP 10FC ; Super # Lm MODIFIER LETTER GEORGIAN NAR @@ -820,18 +853,20 @@ FEFB ; Isolated # Lo ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM 3192..3195 ; Super # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK 3196..319F ; Super # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK A770 ; Super # Lm MODIFIER LETTER US +A7F8..A7F9 ; Super # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +1F16A..1F16B ; Super # So [2] RAISED MC SIGN..RAISED MD SIGN -# Total code points: 142 +# Total code points: 146 # ================================================ -1D62..1D6A ; Sub # L& [9] LATIN SUBSCRIPT SMALL LETTER I..GREEK SUBSCRIPT SMALL LETTER CHI +1D62..1D6A ; Sub # Lm [9] LATIN SUBSCRIPT SMALL LETTER I..GREEK SUBSCRIPT SMALL LETTER CHI 2080..2089 ; Sub # No [10] SUBSCRIPT ZERO..SUBSCRIPT NINE 208A..208C ; Sub # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN 208D ; Sub # Ps SUBSCRIPT LEFT PARENTHESIS 208E ; Sub # Pe SUBSCRIPT RIGHT PARENTHESIS 2090..209C ; Sub # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T -2C7C ; Sub # L& LATIN SUBSCRIPT SMALL LETTER J +2C7C ; Sub # Lm LATIN SUBSCRIPT SMALL LETTER J # Total code points: 38 diff --git a/lib/unicore/extracted/DEastAsianWidth.txt b/lib/unicore/extracted/DEastAsianWidth.txt index 778bde9f66..f55967a245 100644 --- a/lib/unicore/extracted/DEastAsianWidth.txt +++ b/lib/unicore/extracted/DEastAsianWidth.txt @@ -1,8 +1,8 @@ -# DerivedEastAsianWidth-6.0.0.txt -# Date: 2010-08-19, 00:48:08 GMT [MD] +# DerivedEastAsianWidth-6.1.0.txt +# Date: 2011-11-27, 05:10:22 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -118,6 +118,7 @@ 0561..0587 ; N # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN 0589 ; N # Po ARMENIAN FULL STOP 058A ; N # Pd ARMENIAN HYPHEN +058F ; N # Sc ARMENIAN DRAM SIGN 0591..05BD ; N # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG 05BE ; N # Pd HEBREW PUNCTUATION MAQAF 05BF ; N # Mn HEBREW POINT RAFE @@ -130,7 +131,7 @@ 05D0..05EA ; N # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV 05F0..05F2 ; N # Lo [3] HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD 05F3..05F4 ; N # Po [2] HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM -0600..0603 ; N # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA +0600..0604 ; N # Cf [5] ARABIC NUMBER SIGN..ARABIC SIGN SAMVAT 0606..0608 ; N # Sm [3] ARABIC-INDIC CUBE ROOT..ARABIC RAY 0609..060A ; N # Po [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN 060B ; N # Sc AFGHANI SIGN @@ -191,6 +192,9 @@ 0840..0858 ; N # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 0859..085B ; N # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK 085E ; N # Po MANDAIC PUNCTUATION +08A0 ; N # Lo ARABIC LETTER BEH WITH SMALL V BELOW +08A2..08AC ; N # Lo [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH +08E4..08FE ; N # Mn [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT 0900..0902 ; N # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 0903 ; N # Mc DEVANAGARI SIGN VISARGA 0904..0939 ; N # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA @@ -280,6 +284,7 @@ 0AE0..0AE1 ; N # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL 0AE2..0AE3 ; N # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL 0AE6..0AEF ; N # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE +0AF0 ; N # Po GUJARATI ABBREVIATION SIGN 0AF1 ; N # Sc GUJARATI RUPEE SIGN 0B01 ; N # Mn ORIYA SIGN CANDRABINDU 0B02..0B03 ; N # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA @@ -433,11 +438,13 @@ 0EC6 ; N # Lm LAO KO LA 0EC8..0ECD ; N # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA 0ED0..0ED9 ; N # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE -0EDC..0EDD ; N # Lo [2] LAO HO NO..LAO HO MO +0EDC..0EDF ; N # Lo [4] LAO HO NO..LAO LETTER KHMU NYO 0F00 ; N # Lo TIBETAN SYLLABLE OM 0F01..0F03 ; N # So [3] TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA 0F04..0F12 ; N # Po [15] TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK RGYA GRAM SHAD -0F13..0F17 ; N # So [5] TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS +0F13 ; N # So TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN +0F14 ; N # Po TIBETAN MARK GTER TSHEG +0F15..0F17 ; N # So [3] TIBETAN LOGOTYPE SIGN CHAD RTAGS..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS 0F18..0F19 ; N # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS 0F1A..0F1F ; N # So [6] TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RDEL DKAR RDEL NAG 0F20..0F29 ; N # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE @@ -506,9 +513,12 @@ 109D ; N # Mn MYANMAR VOWEL SIGN AITON AI 109E..109F ; N # So [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION 10A0..10C5 ; N # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; N # L& GEORGIAN CAPITAL LETTER YN +10CD ; N # L& GEORGIAN CAPITAL LETTER AEN 10D0..10FA ; N # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN 10FB ; N # Po GEORGIAN PARAGRAPH SEPARATOR 10FC ; N # Lm MODIFIER LETTER GEORGIAN NAR +10FD..10FF ; N # Lo [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN 1160..11A2 ; N # Lo [67] HANGUL JUNGSEONG FILLER..HANGUL JUNGSEONG SSANGARAEA 11A8..11F9 ; N # Lo [82] HANGUL JONGSEONG KIYEOK..HANGUL JONGSEONG YEORINHIEUH 1200..1248 ; N # Lo [73] ETHIOPIC SYLLABLE HA..ETHIOPIC SYLLABLE QWA @@ -528,8 +538,7 @@ 1312..1315 ; N # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE 1318..135A ; N # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA 135D..135F ; N # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK -1360 ; N # So ETHIOPIC SECTION MARK -1361..1368 ; N # Po [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR +1360..1368 ; N # Po [9] ETHIOPIC SECTION MARK..ETHIOPIC PARAGRAPH SEPARATOR 1369..137C ; N # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND 1380..138F ; N # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE 1390..1399 ; N # So [10] ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT @@ -557,7 +566,7 @@ 176E..1770 ; N # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA 1772..1773 ; N # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U 1780..17B3 ; N # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU -17B4..17B5 ; N # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B4..17B5 ; N # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 17B6 ; N # Mc KHMER VOWEL SIGN AA 17B7..17BD ; N # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA 17BE..17C5 ; N # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU @@ -653,9 +662,11 @@ 1BA6..1BA7 ; N # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG 1BA8..1BA9 ; N # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG 1BAA ; N # Mc SUNDANESE SIGN PAMAAEH +1BAB ; N # Mn SUNDANESE SIGN VIRAMA +1BAC..1BAD ; N # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA 1BAE..1BAF ; N # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA 1BB0..1BB9 ; N # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE -1BC0..1BE5 ; N # Lo [38] BATAK LETTER A..BATAK LETTER U +1BBA..1BE5 ; N # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U 1BE6 ; N # Mn BATAK SIGN TOMPI 1BE7 ; N # Mc BATAK VOWEL SIGN E 1BE8..1BE9 ; N # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE @@ -677,6 +688,7 @@ 1C5A..1C77 ; N # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; N # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD 1C7E..1C7F ; N # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD +1CC0..1CC7 ; N # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA 1CD0..1CD2 ; N # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA 1CD3 ; N # Po VEDIC SIGN NIHSHVASA 1CD4..1CE0 ; N # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA @@ -685,10 +697,12 @@ 1CE9..1CEC ; N # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL 1CED ; N # Mn VEDIC SIGN TIRYAK 1CEE..1CF1 ; N # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA -1CF2 ; N # Mc VEDIC SIGN ARDHAVISARGA +1CF2..1CF3 ; N # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF4 ; N # Mn VEDIC TONE CANDRA ABOVE +1CF5..1CF6 ; N # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 1D00..1D2B ; N # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D61 ; N # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D77 ; N # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D2C..1D6A ; N # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; N # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; N # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; N # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; N # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA @@ -930,9 +944,7 @@ 27C0..27C4 ; N # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET 27C5 ; N # Ps LEFT S-SHAPED BAG DELIMITER 27C6 ; N # Pe RIGHT S-SHAPED BAG DELIMITER -27C7..27CA ; N # Sm [4] OR WITH DOT INSIDE..VERTICAL BAR WITH HORIZONTAL STROKE -27CC ; N # Sm LONG DIVISION -27CE..27E5 ; N # Sm [24] SQUARED LOGICAL AND..WHITE SQUARE WITH RIGHTWARDS TICK +27C7..27E5 ; N # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK 27EE ; N # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS 27EF ; N # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS 27F0..27FF ; N # Sm [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW @@ -974,17 +986,20 @@ 2B50..2B54 ; N # So [5] WHITE MEDIUM STAR..WHITE RIGHT-POINTING PENTAGON 2C00..2C2E ; N # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 2C30..2C5E ; N # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE -2C60..2C7C ; N # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; N # Lm MODIFIER LETTER CAPITAL V +2C60..2C7B ; N # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; N # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; N # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI 2CE5..2CEA ; N # So [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA 2CEB..2CEE ; N # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA 2CEF..2CF1 ; N # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2CF2..2CF3 ; N # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2CF9..2CFC ; N # Po [4] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN VERSE DIVIDER 2CFD ; N # No COPTIC FRACTION ONE HALF 2CFE..2CFF ; N # Po [2] COPTIC FULL STOP..COPTIC MORPHOLOGICAL DIVIDER 2D00..2D25 ; N # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE -2D30..2D65 ; N # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D27 ; N # L& GEORGIAN SMALL LETTER YN +2D2D ; N # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; N # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO 2D6F ; N # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D70 ; N # Po TIFINAGH SEPARATOR MARK 2D7F ; N # Mn TIFINAGH CONSONANT JOINER @@ -1029,7 +1044,8 @@ 2E29 ; N # Pe RIGHT DOUBLE PARENTHESIS 2E2A..2E2E ; N # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK 2E2F ; N # Lm VERTICAL TILDE -2E30..2E31 ; N # Po [2] RING POINT..WORD SEPARATOR MIDDLE DOT +2E30..2E39 ; N # Po [10] RING POINT..TOP HALF SECTION SIGN +2E3A..2E3B ; N # Pd [2] TWO-EM DASH..THREE-EM DASH 303F ; N # So IDEOGRAPHIC HALF FILL SPACE 4DC0..4DFF ; N # So [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION A4D0..A4F7 ; N # Lo [40] LISU LETTER BA..LISU LETTER OE @@ -1046,10 +1062,11 @@ A66E ; N # Lo CYRILLIC LETTER MULTIOCULAR O A66F ; N # Mn COMBINING CYRILLIC VZMET A670..A672 ; N # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN A673 ; N # Po SLAVONIC ASTERISK -A67C..A67D ; N # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A674..A67D ; N # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK A67E ; N # Po CYRILLIC KAVYKA A67F ; N # Lm CYRILLIC PAYEROK A680..A697 ; N # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE +A69F ; N # Mn COMBINING CYRILLIC LETTER IOTIFIED E A6A0..A6E5 ; N # Lo [70] BAMUM LETTER A..BAMUM LETTER KI A6E6..A6EF ; N # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM A6F0..A6F1 ; N # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS @@ -1063,8 +1080,9 @@ A771..A787 ; N # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR A788 ; N # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; N # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; N # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A791 ; N # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER -A7A0..A7A9 ; N # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE +A790..A793 ; N # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR +A7A0..A7AA ; N # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK +A7F8..A7F9 ; N # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; N # L& LATIN LETTER SMALL CAPITAL TURNED M A7FB..A801 ; N # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I A802 ; N # Mn SYLOTI NAGRI SIGN DVISVARA @@ -1147,6 +1165,15 @@ AAC2 ; N # Lo TAI VIET TONE MAI SONG AADB..AADC ; N # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG AADD ; N # Lm TAI VIET SYMBOL SAM AADE..AADF ; N # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI +AAE0..AAEA ; N # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAEB ; N # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; N # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; N # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF0..AAF1 ; N # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM +AAF2 ; N # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; N # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF5 ; N # Mc MEETEI MAYEK VOWEL SIGN VISARGA +AAF6 ; N # Mn MEETEI MAYEK VIRAMA AB01..AB06 ; N # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO AB09..AB0E ; N # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO AB11..AB16 ; N # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO @@ -1196,8 +1223,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 1003F..1004D ; N # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO 10050..1005D ; N # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 10080..100FA ; N # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 -10100..10101 ; N # Po [2] AEGEAN WORD SEPARATOR LINE..AEGEAN WORD SEPARATOR DOT -10102 ; N # So AEGEAN CHECK MARK +10100..10102 ; N # Po [3] AEGEAN WORD SEPARATOR LINE..AEGEAN CHECK MARK 10107..10133 ; N # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND 10137..1013F ; N # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT 10140..10174 ; N # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS @@ -1237,6 +1263,8 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 1091F ; N # Po PHOENICIAN WORD SEPARATOR 10920..10939 ; N # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C 1093F ; N # Po LYDIAN TRIANGULAR MARK +10980..109B7 ; N # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; N # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; N # Lo KHAROSHTHI LETTER A 10A01..10A03 ; N # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R 10A05..10A06 ; N # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O @@ -1277,11 +1305,43 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 110BB..110BC ; N # Po [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN 110BD ; N # Cf KAITHI NUMBER SIGN 110BE..110C1 ; N # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +110D0..110E8 ; N # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +110F0..110F9 ; N # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11100..11102 ; N # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11103..11126 ; N # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11127..1112B ; N # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; N # Mc CHAKMA VOWEL SIGN E +1112D..11134 ; N # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11136..1113F ; N # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +11140..11143 ; N # Po [4] CHAKMA SECTION MARK..CHAKMA QUESTION MARK +11180..11181 ; N # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; N # Mc SHARADA SIGN VISARGA +11183..111B2 ; N # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111B3..111B5 ; N # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; N # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF..111C0 ; N # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +111C1..111C4 ; N # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111C5..111C8 ; N # Po [4] SHARADA DANDA..SHARADA SEPARATOR +111D0..111D9 ; N # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +11680..116AA ; N # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116AB ; N # Mn TAKRI SIGN ANUSVARA +116AC ; N # Mc TAKRI SIGN VISARGA +116AD ; N # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; N # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; N # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B6 ; N # Mc TAKRI SIGN VIRAMA +116B7 ; N # Mn TAKRI SIGN NUKTA +116C0..116C9 ; N # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 12000..1236E ; N # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM 12400..12462 ; N # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER 12470..12473 ; N # Po [4] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON 13000..1342E ; N # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 16800..16A38 ; N # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16F00..16F44 ; N # Lo [69] MIAO LETTER PA..MIAO LETTER HHA +16F50 ; N # Lo MIAO LETTER NASALIZATION +16F51..16F7E ; N # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG +16F8F..16F92 ; N # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16F93..16F9F ; N # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 1D000..1D0F5 ; N # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO 1D100..1D126 ; N # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 1D129..1D164 ; N # So [60] MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE @@ -1342,6 +1402,40 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 1D7C3 ; N # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL 1D7C4..1D7CB ; N # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1D7CE..1D7FF ; N # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1EE00..1EE03 ; N # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; N # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; N # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; N # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; N # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; N # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; N # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; N # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; N # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; N # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; N # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; N # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; N # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; N # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; N # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; N # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; N # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; N # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; N # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; N # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; N # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; N # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; N # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; N # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; N # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; N # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; N # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; N # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; N # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; N # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; N # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; N # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; N # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1EEF0..1EEF1 ; N # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL 1F000..1F02B ; N # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK 1F030..1F093 ; N # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06 1F0A0..1F0AE ; N # So [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES @@ -1349,6 +1443,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 1F0C1..1F0CF ; N # So [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER 1F0D1..1F0DF ; N # So [15] PLAYING CARD ACE OF CLUBS..PLAYING CARD WHITE JOKER 1F12E ; N # So CIRCLED WZ +1F16A..1F16B ; N # So [2] RAISED MC SIGN..RAISED MD SIGN 1F1E6..1F1FF ; N # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z 1F300..1F320 ; N # So [33] CYCLONE..SHOOTING STAR 1F330..1F335 ; N # So [6] CHESTNUT..CACTUS @@ -1362,26 +1457,16 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 1F442..1F4F7 ; N # So [182] EAR..CAMERA 1F4F9..1F4FC ; N # So [4] VIDEO CAMERA..VIDEOCASSETTE 1F500..1F53D ; N # So [62] TWISTED RIGHTWARDS ARROWS..DOWN-POINTING SMALL RED TRIANGLE +1F540..1F543 ; N # So [4] CIRCLED CROSS POMMEE..NOTCHED LEFT SEMICIRCLE WITH THREE DOTS 1F550..1F567 ; N # So [24] CLOCK FACE ONE OCLOCK..CLOCK FACE TWELVE-THIRTY -1F5FB..1F5FF ; N # So [5] MOUNT FUJI..MOYAI -1F601..1F610 ; N # So [16] GRINNING FACE WITH SMILING EYES..NEUTRAL FACE -1F612..1F614 ; N # So [3] UNAMUSED FACE..PENSIVE FACE -1F616 ; N # So CONFOUNDED FACE -1F618 ; N # So FACE THROWING A KISS -1F61A ; N # So KISSING FACE WITH CLOSED EYES -1F61C..1F61E ; N # So [3] FACE WITH STUCK-OUT TONGUE AND WINKING EYE..DISAPPOINTED FACE -1F620..1F625 ; N # So [6] ANGRY FACE..DISAPPOINTED BUT RELIEVED FACE -1F628..1F62B ; N # So [4] FEARFUL FACE..TIRED FACE -1F62D ; N # So LOUDLY CRYING FACE -1F630..1F633 ; N # So [4] FACE WITH OPEN MOUTH AND COLD SWEAT..FLUSHED FACE -1F635..1F640 ; N # So [12] DIZZY FACE..WEARY CAT FACE +1F5FB..1F640 ; N # So [70] MOUNT FUJI..WEARY CAT FACE 1F645..1F64F ; N # So [11] FACE WITH NO GOOD GESTURE..PERSON WITH FOLDED HANDS 1F680..1F6C5 ; N # So [70] ROCKET..LEFT LUGGAGE 1F700..1F773 ; N # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 783647 code points not listed here. +# The above property value applies to 782918 code points not listed here. # Total code points: 801811 # ================================================ @@ -1390,20 +1475,19 @@ E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG 00A1 ; A # Po INVERTED EXCLAMATION MARK 00A4 ; A # Sc CURRENCY SIGN -00A7 ; A # So SECTION SIGN +00A7 ; A # Po SECTION SIGN 00A8 ; A # Sk DIAERESIS -00AA ; A # L& FEMININE ORDINAL INDICATOR +00AA ; A # Lo FEMININE ORDINAL INDICATOR 00AD ; A # Cf SOFT HYPHEN 00AE ; A # So REGISTERED SIGN 00B0 ; A # So DEGREE SIGN 00B1 ; A # Sm PLUS-MINUS SIGN 00B2..00B3 ; A # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE 00B4 ; A # Sk ACUTE ACCENT -00B6 ; A # So PILCROW SIGN -00B7 ; A # Po MIDDLE DOT +00B6..00B7 ; A # Po [2] PILCROW SIGN..MIDDLE DOT 00B8 ; A # Sk CEDILLA 00B9 ; A # No SUPERSCRIPT ONE -00BA ; A # L& MASCULINE ORDINAL INDICATOR +00BA ; A # Lo MASCULINE ORDINAL INDICATOR 00BC..00BE ; A # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS 00BF ; A # Po INVERTED QUESTION MARK 00C6 ; A # L& LATIN CAPITAL LETTER AE @@ -1570,7 +1654,7 @@ E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG 2757 ; A # So HEAVY EXCLAMATION MARK SYMBOL 2776..277F ; A # No [10] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED NUMBER TEN 2B55..2B59 ; A # So [5] HEAVY LARGE CIRCLE..HEAVY CIRCLED SALTIRE -3248..324F ; A # So [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE +3248..324F ; A # No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE E000..F8FF ; A # Co [6400] <private-use-E000>..<private-use-F8FF> FE00..FE0F ; A # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 FFFD ; A # So REPLACEMENT CHARACTER @@ -1650,7 +1734,8 @@ FFED..FFEE ; H # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 301E..301F ; W # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK 3020 ; W # So POSTAL MARK FACE 3021..3029 ; W # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE -302A..302F ; W # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK +302A..302D ; W # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; W # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK 3030 ; W # Pd WAVY DASH 3031..3035 ; W # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF 3036..3037 ; W # So [2] CIRCLED POSTAL MARK..IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL @@ -1690,8 +1775,8 @@ FFED..FFEE ; H # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 3300..33FF ; W # So [256] SQUARE APAATO..SQUARE GAL 3400..4DB5 ; W # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 4DB6..4DBF ; W # Cn [10] <reserved-4DB6>..<reserved-4DBF> -4E00..9FCB ; W # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB -9FCC..9FFF ; W # Cn [52] <reserved-9FCC>..<reserved-9FFF> +4E00..9FCC ; W # Lo [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC +9FCD..9FFF ; W # Cn [51] <reserved-9FCD>..<reserved-9FFF> A000..A014 ; W # Lo [21] YI SYLLABLE IT..YI SYLLABLE E A015 ; W # Lm YI SYLLABLE WU A016..A48C ; W # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR @@ -1700,9 +1785,7 @@ A960..A97C ; W # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANG AC00..D7A3 ; W # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH D7B0..D7C6 ; W # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E D7CB..D7FB ; W # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH -F900..FA2D ; W # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA2E..FA2F ; W # Cn [2] <reserved-FA2E>..<reserved-FA2F> -FA30..FA6D ; W # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +F900..FA6D ; W # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA6E..FA6F ; W # Cn [2] <reserved-FA6E>..<reserved-FA6F> FA70..FAD9 ; W # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FADA..FAFF ; W # Cn [38] <reserved-FADA>..<reserved-FAFF> diff --git a/lib/unicore/extracted/DGeneralCategory.txt b/lib/unicore/extracted/DGeneralCategory.txt index ee2bbb7bbd..12a346f753 100644 --- a/lib/unicore/extracted/DGeneralCategory.txt +++ b/lib/unicore/extracted/DGeneralCategory.txt @@ -1,8 +1,8 @@ -# DerivedGeneralCategory-6.0.0.txt -# Date: 2010-08-19, 00:48:09 GMT [MD] +# DerivedGeneralCategory-6.1.0.txt +# Date: 2011-11-27, 05:10:22 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -23,11 +23,12 @@ 0557..0558 ; Cn # [2] <reserved-0557>..<reserved-0558> 0560 ; Cn # <reserved-0560> 0588 ; Cn # <reserved-0588> -058B..0590 ; Cn # [6] <reserved-058B>..<reserved-0590> +058B..058E ; Cn # [4] <reserved-058B>..<reserved-058E> +0590 ; Cn # <reserved-0590> 05C8..05CF ; Cn # [8] <reserved-05C8>..<reserved-05CF> 05EB..05EF ; Cn # [5] <reserved-05EB>..<reserved-05EF> 05F5..05FF ; Cn # [11] <reserved-05F5>..<reserved-05FF> -0604..0605 ; Cn # [2] <reserved-0604>..<reserved-0605> +0605 ; Cn # <reserved-0605> 061C..061D ; Cn # [2] <reserved-061C>..<reserved-061D> 070E ; Cn # <reserved-070E> 074B..074C ; Cn # [2] <reserved-074B>..<reserved-074C> @@ -36,7 +37,10 @@ 082E..082F ; Cn # [2] <reserved-082E>..<reserved-082F> 083F ; Cn # <reserved-083F> 085C..085D ; Cn # [2] <reserved-085C>..<reserved-085D> -085F..08FF ; Cn # [161] <reserved-085F>..<reserved-08FF> +085F..089F ; Cn # [65] <reserved-085F>..<reserved-089F> +08A1 ; Cn # <reserved-08A1> +08AD..08E3 ; Cn # [55] <reserved-08AD>..<reserved-08E3> +08FF ; Cn # <reserved-08FF> 0978 ; Cn # <reserved-0978> 0980 ; Cn # <reserved-0980> 0984 ; Cn # <reserved-0984> @@ -81,7 +85,6 @@ 0ACE..0ACF ; Cn # [2] <reserved-0ACE>..<reserved-0ACF> 0AD1..0ADF ; Cn # [15] <reserved-0AD1>..<reserved-0ADF> 0AE4..0AE5 ; Cn # [2] <reserved-0AE4>..<reserved-0AE5> -0AF0 ; Cn # <reserved-0AF0> 0AF2..0B00 ; Cn # [15] <reserved-0AF2>..<reserved-0B00> 0B04 ; Cn # <reserved-0B04> 0B0D..0B0E ; Cn # [2] <reserved-0B0D>..<reserved-0B0E> @@ -182,15 +185,16 @@ 0EC7 ; Cn # <reserved-0EC7> 0ECE..0ECF ; Cn # [2] <reserved-0ECE>..<reserved-0ECF> 0EDA..0EDB ; Cn # [2] <reserved-0EDA>..<reserved-0EDB> -0EDE..0EFF ; Cn # [34] <reserved-0EDE>..<reserved-0EFF> +0EE0..0EFF ; Cn # [32] <reserved-0EE0>..<reserved-0EFF> 0F48 ; Cn # <reserved-0F48> 0F6D..0F70 ; Cn # [4] <reserved-0F6D>..<reserved-0F70> 0F98 ; Cn # <reserved-0F98> 0FBD ; Cn # <reserved-0FBD> 0FCD ; Cn # <reserved-0FCD> 0FDB..0FFF ; Cn # [37] <reserved-0FDB>..<reserved-0FFF> -10C6..10CF ; Cn # [10] <reserved-10C6>..<reserved-10CF> -10FD..10FF ; Cn # [3] <reserved-10FD>..<reserved-10FF> +10C6 ; Cn # <reserved-10C6> +10C8..10CC ; Cn # [5] <reserved-10C8>..<reserved-10CC> +10CE..10CF ; Cn # [2] <reserved-10CE>..<reserved-10CF> 1249 ; Cn # <reserved-1249> 124E..124F ; Cn # [2] <reserved-124E>..<reserved-124F> 1257 ; Cn # <reserved-1257> @@ -244,13 +248,12 @@ 1AAE..1AFF ; Cn # [82] <reserved-1AAE>..<reserved-1AFF> 1B4C..1B4F ; Cn # [4] <reserved-1B4C>..<reserved-1B4F> 1B7D..1B7F ; Cn # [3] <reserved-1B7D>..<reserved-1B7F> -1BAB..1BAD ; Cn # [3] <reserved-1BAB>..<reserved-1BAD> -1BBA..1BBF ; Cn # [6] <reserved-1BBA>..<reserved-1BBF> 1BF4..1BFB ; Cn # [8] <reserved-1BF4>..<reserved-1BFB> 1C38..1C3A ; Cn # [3] <reserved-1C38>..<reserved-1C3A> 1C4A..1C4C ; Cn # [3] <reserved-1C4A>..<reserved-1C4C> -1C80..1CCF ; Cn # [80] <reserved-1C80>..<reserved-1CCF> -1CF3..1CFF ; Cn # [13] <reserved-1CF3>..<reserved-1CFF> +1C80..1CBF ; Cn # [64] <reserved-1C80>..<reserved-1CBF> +1CC8..1CCF ; Cn # [8] <reserved-1CC8>..<reserved-1CCF> +1CF7..1CFF ; Cn # [9] <reserved-1CF7>..<reserved-1CFF> 1DE7..1DFB ; Cn # [21] <reserved-1DE7>..<reserved-1DFB> 1F16..1F17 ; Cn # [2] <reserved-1F16>..<reserved-1F17> 1F1E..1F1F ; Cn # [2] <reserved-1F1E>..<reserved-1F1F> @@ -279,15 +282,15 @@ 2427..243F ; Cn # [25] <reserved-2427>..<reserved-243F> 244B..245F ; Cn # [21] <reserved-244B>..<reserved-245F> 2700 ; Cn # <reserved-2700> -27CB ; Cn # <reserved-27CB> -27CD ; Cn # <reserved-27CD> 2B4D..2B4F ; Cn # [3] <reserved-2B4D>..<reserved-2B4F> 2B5A..2BFF ; Cn # [166] <reserved-2B5A>..<reserved-2BFF> 2C2F ; Cn # <reserved-2C2F> 2C5F ; Cn # <reserved-2C5F> -2CF2..2CF8 ; Cn # [7] <reserved-2CF2>..<reserved-2CF8> -2D26..2D2F ; Cn # [10] <reserved-2D26>..<reserved-2D2F> -2D66..2D6E ; Cn # [9] <reserved-2D66>..<reserved-2D6E> +2CF4..2CF8 ; Cn # [5] <reserved-2CF4>..<reserved-2CF8> +2D26 ; Cn # <reserved-2D26> +2D28..2D2C ; Cn # [5] <reserved-2D28>..<reserved-2D2C> +2D2E..2D2F ; Cn # [2] <reserved-2D2E>..<reserved-2D2F> +2D68..2D6E ; Cn # [7] <reserved-2D68>..<reserved-2D6E> 2D71..2D7E ; Cn # [14] <reserved-2D71>..<reserved-2D7E> 2D97..2D9F ; Cn # [9] <reserved-2D97>..<reserved-2D9F> 2DA7 ; Cn # <reserved-2DA7> @@ -298,7 +301,7 @@ 2DCF ; Cn # <reserved-2DCF> 2DD7 ; Cn # <reserved-2DD7> 2DDF ; Cn # <reserved-2DDF> -2E32..2E7F ; Cn # [78] <reserved-2E32>..<reserved-2E7F> +2E3C..2E7F ; Cn # [68] <reserved-2E3C>..<reserved-2E7F> 2E9A ; Cn # <reserved-2E9A> 2EF4..2EFF ; Cn # [12] <reserved-2EF4>..<reserved-2EFF> 2FD6..2FEF ; Cn # [26] <reserved-2FD6>..<reserved-2FEF> @@ -313,16 +316,15 @@ 321F ; Cn # <reserved-321F> 32FF ; Cn # <reserved-32FF> 4DB6..4DBF ; Cn # [10] <reserved-4DB6>..<reserved-4DBF> -9FCC..9FFF ; Cn # [52] <reserved-9FCC>..<reserved-9FFF> +9FCD..9FFF ; Cn # [51] <reserved-9FCD>..<reserved-9FFF> A48D..A48F ; Cn # [3] <reserved-A48D>..<reserved-A48F> A4C7..A4CF ; Cn # [9] <reserved-A4C7>..<reserved-A4CF> A62C..A63F ; Cn # [20] <reserved-A62C>..<reserved-A63F> -A674..A67B ; Cn # [8] <reserved-A674>..<reserved-A67B> -A698..A69F ; Cn # [8] <reserved-A698>..<reserved-A69F> +A698..A69E ; Cn # [7] <reserved-A698>..<reserved-A69E> A6F8..A6FF ; Cn # [8] <reserved-A6F8>..<reserved-A6FF> A78F ; Cn # <reserved-A78F> -A792..A79F ; Cn # [14] <reserved-A792>..<reserved-A79F> -A7AA..A7F9 ; Cn # [80] <reserved-A7AA>..<reserved-A7F9> +A794..A79F ; Cn # [12] <reserved-A794>..<reserved-A79F> +A7AB..A7F7 ; Cn # [77] <reserved-A7AB>..<reserved-A7F7> A82C..A82F ; Cn # [4] <reserved-A82C>..<reserved-A82F> A83A..A83F ; Cn # [6] <reserved-A83A>..<reserved-A83F> A878..A87F ; Cn # [8] <reserved-A878>..<reserved-A87F> @@ -339,7 +341,7 @@ AA4E..AA4F ; Cn # [2] <reserved-AA4E>..<reserved-AA4F> AA5A..AA5B ; Cn # [2] <reserved-AA5A>..<reserved-AA5B> AA7C..AA7F ; Cn # [4] <reserved-AA7C>..<reserved-AA7F> AAC3..AADA ; Cn # [24] <reserved-AAC3>..<reserved-AADA> -AAE0..AB00 ; Cn # [33] <reserved-AAE0>..<reserved-AB00> +AAF7..AB00 ; Cn # [10] <reserved-AAF7>..<reserved-AB00> AB07..AB08 ; Cn # [2] <reserved-AB07>..<reserved-AB08> AB0F..AB10 ; Cn # [2] <reserved-AB0F>..<reserved-AB10> AB17..AB1F ; Cn # [9] <reserved-AB17>..<reserved-AB1F> @@ -350,7 +352,6 @@ ABFA..ABFF ; Cn # [6] <reserved-ABFA>..<reserved-ABFF> D7A4..D7AF ; Cn # [12] <reserved-D7A4>..<reserved-D7AF> D7C7..D7CA ; Cn # [4] <reserved-D7C7>..<reserved-D7CA> D7FC..D7FF ; Cn # [4] <reserved-D7FC>..<reserved-D7FF> -FA2E..FA2F ; Cn # [2] <reserved-FA2E>..<reserved-FA2F> FA6E..FA6F ; Cn # [2] <reserved-FA6E>..<reserved-FA6F> FADA..FAFF ; Cn # [38] <reserved-FADA>..<reserved-FAFF> FB07..FB12 ; Cn # [12] <reserved-FB07>..<reserved-FB12> @@ -412,7 +413,9 @@ FFFE..FFFF ; Cn # [2] <noncharacter-FFFE>..<noncharacter-FFFF> 10860..108FF ; Cn # [160] <reserved-10860>..<reserved-108FF> 1091C..1091E ; Cn # [3] <reserved-1091C>..<reserved-1091E> 1093A..1093E ; Cn # [5] <reserved-1093A>..<reserved-1093E> -10940..109FF ; Cn # [192] <reserved-10940>..<reserved-109FF> +10940..1097F ; Cn # [64] <reserved-10940>..<reserved-1097F> +109B8..109BD ; Cn # [6] <reserved-109B8>..<reserved-109BD> +109C0..109FF ; Cn # [64] <reserved-109C0>..<reserved-109FF> 10A04 ; Cn # <reserved-10A04> 10A07..10A0B ; Cn # [5] <reserved-10A07>..<reserved-10A0B> 10A14 ; Cn # <reserved-10A14> @@ -430,12 +433,23 @@ FFFE..FFFF ; Cn # [2] <noncharacter-FFFE>..<noncharacter-FFFF> 10E7F..10FFF ; Cn # [385] <reserved-10E7F>..<reserved-10FFF> 1104E..11051 ; Cn # [4] <reserved-1104E>..<reserved-11051> 11070..1107F ; Cn # [16] <reserved-11070>..<reserved-1107F> -110C2..11FFF ; Cn # [3902] <reserved-110C2>..<reserved-11FFF> +110C2..110CF ; Cn # [14] <reserved-110C2>..<reserved-110CF> +110E9..110EF ; Cn # [7] <reserved-110E9>..<reserved-110EF> +110FA..110FF ; Cn # [6] <reserved-110FA>..<reserved-110FF> +11135 ; Cn # <reserved-11135> +11144..1117F ; Cn # [60] <reserved-11144>..<reserved-1117F> +111C9..111CF ; Cn # [7] <reserved-111C9>..<reserved-111CF> +111DA..1167F ; Cn # [1190] <reserved-111DA>..<reserved-1167F> +116B8..116BF ; Cn # [8] <reserved-116B8>..<reserved-116BF> +116CA..11FFF ; Cn # [2358] <reserved-116CA>..<reserved-11FFF> 1236F..123FF ; Cn # [145] <reserved-1236F>..<reserved-123FF> 12463..1246F ; Cn # [13] <reserved-12463>..<reserved-1246F> 12474..12FFF ; Cn # [2956] <reserved-12474>..<reserved-12FFF> 1342F..167FF ; Cn # [13265] <reserved-1342F>..<reserved-167FF> -16A39..1AFFF ; Cn # [17863] <reserved-16A39>..<reserved-1AFFF> +16A39..16EFF ; Cn # [1223] <reserved-16A39>..<reserved-16EFF> +16F45..16F4F ; Cn # [11] <reserved-16F45>..<reserved-16F4F> +16F7F..16F8E ; Cn # [16] <reserved-16F7F>..<reserved-16F8E> +16FA0..1AFFF ; Cn # [16480] <reserved-16FA0>..<reserved-1AFFF> 1B002..1CFFF ; Cn # [8190] <reserved-1B002>..<reserved-1CFFF> 1D0F6..1D0FF ; Cn # [10] <reserved-1D0F6>..<reserved-1D0FF> 1D127..1D128 ; Cn # [2] <reserved-1D127>..<reserved-1D128> @@ -463,7 +477,41 @@ FFFE..FFFF ; Cn # [2] <noncharacter-FFFE>..<noncharacter-FFFF> 1D551 ; Cn # <reserved-1D551> 1D6A6..1D6A7 ; Cn # [2] <reserved-1D6A6>..<reserved-1D6A7> 1D7CC..1D7CD ; Cn # [2] <reserved-1D7CC>..<reserved-1D7CD> -1D800..1EFFF ; Cn # [6144] <reserved-1D800>..<reserved-1EFFF> +1D800..1EDFF ; Cn # [5632] <reserved-1D800>..<reserved-1EDFF> +1EE04 ; Cn # <reserved-1EE04> +1EE20 ; Cn # <reserved-1EE20> +1EE23 ; Cn # <reserved-1EE23> +1EE25..1EE26 ; Cn # [2] <reserved-1EE25>..<reserved-1EE26> +1EE28 ; Cn # <reserved-1EE28> +1EE33 ; Cn # <reserved-1EE33> +1EE38 ; Cn # <reserved-1EE38> +1EE3A ; Cn # <reserved-1EE3A> +1EE3C..1EE41 ; Cn # [6] <reserved-1EE3C>..<reserved-1EE41> +1EE43..1EE46 ; Cn # [4] <reserved-1EE43>..<reserved-1EE46> +1EE48 ; Cn # <reserved-1EE48> +1EE4A ; Cn # <reserved-1EE4A> +1EE4C ; Cn # <reserved-1EE4C> +1EE50 ; Cn # <reserved-1EE50> +1EE53 ; Cn # <reserved-1EE53> +1EE55..1EE56 ; Cn # [2] <reserved-1EE55>..<reserved-1EE56> +1EE58 ; Cn # <reserved-1EE58> +1EE5A ; Cn # <reserved-1EE5A> +1EE5C ; Cn # <reserved-1EE5C> +1EE5E ; Cn # <reserved-1EE5E> +1EE60 ; Cn # <reserved-1EE60> +1EE63 ; Cn # <reserved-1EE63> +1EE65..1EE66 ; Cn # [2] <reserved-1EE65>..<reserved-1EE66> +1EE6B ; Cn # <reserved-1EE6B> +1EE73 ; Cn # <reserved-1EE73> +1EE78 ; Cn # <reserved-1EE78> +1EE7D ; Cn # <reserved-1EE7D> +1EE7F ; Cn # <reserved-1EE7F> +1EE8A ; Cn # <reserved-1EE8A> +1EE9C..1EEA0 ; Cn # [5] <reserved-1EE9C>..<reserved-1EEA0> +1EEA4 ; Cn # <reserved-1EEA4> +1EEAA ; Cn # <reserved-1EEAA> +1EEBC..1EEEF ; Cn # [52] <reserved-1EEBC>..<reserved-1EEEF> +1EEF2..1EFFF ; Cn # [270] <reserved-1EEF2>..<reserved-1EFFF> 1F02C..1F02F ; Cn # [4] <reserved-1F02C>..<reserved-1F02F> 1F094..1F09F ; Cn # [12] <reserved-1F094>..<reserved-1F09F> 1F0AF..1F0B0 ; Cn # [2] <reserved-1F0AF>..<reserved-1F0B0> @@ -472,7 +520,7 @@ FFFE..FFFF ; Cn # [2] <noncharacter-FFFE>..<noncharacter-FFFF> 1F0E0..1F0FF ; Cn # [32] <reserved-1F0E0>..<reserved-1F0FF> 1F10B..1F10F ; Cn # [5] <reserved-1F10B>..<reserved-1F10F> 1F12F ; Cn # <reserved-1F12F> -1F16A..1F16F ; Cn # [6] <reserved-1F16A>..<reserved-1F16F> +1F16C..1F16F ; Cn # [4] <reserved-1F16C>..<reserved-1F16F> 1F19B..1F1E5 ; Cn # [75] <reserved-1F19B>..<reserved-1F1E5> 1F203..1F20F ; Cn # [13] <reserved-1F203>..<reserved-1F20F> 1F23B..1F23F ; Cn # [5] <reserved-1F23B>..<reserved-1F23F> @@ -489,19 +537,9 @@ FFFE..FFFF ; Cn # [2] <noncharacter-FFFE>..<noncharacter-FFFF> 1F441 ; Cn # <reserved-1F441> 1F4F8 ; Cn # <reserved-1F4F8> 1F4FD..1F4FF ; Cn # [3] <reserved-1F4FD>..<reserved-1F4FF> -1F53E..1F54F ; Cn # [18] <reserved-1F53E>..<reserved-1F54F> +1F53E..1F53F ; Cn # [2] <reserved-1F53E>..<reserved-1F53F> +1F544..1F54F ; Cn # [12] <reserved-1F544>..<reserved-1F54F> 1F568..1F5FA ; Cn # [147] <reserved-1F568>..<reserved-1F5FA> -1F600 ; Cn # <reserved-1F600> -1F611 ; Cn # <reserved-1F611> -1F615 ; Cn # <reserved-1F615> -1F617 ; Cn # <reserved-1F617> -1F619 ; Cn # <reserved-1F619> -1F61B ; Cn # <reserved-1F61B> -1F61F ; Cn # <reserved-1F61F> -1F626..1F627 ; Cn # [2] <reserved-1F626>..<reserved-1F627> -1F62C ; Cn # <reserved-1F62C> -1F62E..1F62F ; Cn # [2] <reserved-1F62E>..<reserved-1F62F> -1F634 ; Cn # <reserved-1F634> 1F641..1F644 ; Cn # [4] <reserved-1F641>..<reserved-1F644> 1F650..1F67F ; Cn # [48] <reserved-1F650>..<reserved-1F67F> 1F6C6..1F6FF ; Cn # [58] <reserved-1F6C6>..<reserved-1F6FF> @@ -516,7 +554,7 @@ E01F0..EFFFF ; Cn # [65040] <reserved-E01F0>..<noncharacter-EFFFF> FFFFE..FFFFF ; Cn # [2] <noncharacter-FFFFE>..<noncharacter-FFFFF> 10FFFE..10FFFF; Cn # [2] <noncharacter-10FFFE>..<noncharacter-10FFFF> -# Total code points: 865147 +# Total code points: 864415 # ================================================ @@ -790,6 +828,8 @@ FFFFE..FFFFF ; Cn # [2] <noncharacter-FFFFE>..<noncharacter-FFFFF> 0526 ; Lu # CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER 0531..0556 ; Lu # [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH 10A0..10C5 ; Lu # [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Lu # GEORGIAN CAPITAL LETTER YN +10CD ; Lu # GEORGIAN CAPITAL LETTER AEN 1E00 ; Lu # LATIN CAPITAL LETTER A WITH RING BELOW 1E02 ; Lu # LATIN CAPITAL LETTER B WITH DOT ABOVE 1E04 ; Lu # LATIN CAPITAL LETTER B WITH DOT BELOW @@ -1004,6 +1044,7 @@ FFFFE..FFFFF ; Cn # [2] <noncharacter-FFFFE>..<noncharacter-FFFFF> 2CE2 ; Lu # COPTIC CAPITAL LETTER OLD NUBIAN WAU 2CEB ; Lu # COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI 2CED ; Lu # COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA +2CF2 ; Lu # COPTIC CAPITAL LETTER BOHAIRIC KHEI A640 ; Lu # CYRILLIC CAPITAL LETTER ZEMLYA A642 ; Lu # CYRILLIC CAPITAL LETTER DZELO A644 ; Lu # CYRILLIC CAPITAL LETTER REVERSED DZE @@ -1087,11 +1128,13 @@ A786 ; Lu # LATIN CAPITAL LETTER INSULAR T A78B ; Lu # LATIN CAPITAL LETTER SALTILLO A78D ; Lu # LATIN CAPITAL LETTER TURNED H A790 ; Lu # LATIN CAPITAL LETTER N WITH DESCENDER +A792 ; Lu # LATIN CAPITAL LETTER C WITH BAR A7A0 ; Lu # LATIN CAPITAL LETTER G WITH OBLIQUE STROKE A7A2 ; Lu # LATIN CAPITAL LETTER K WITH OBLIQUE STROKE A7A4 ; Lu # LATIN CAPITAL LETTER N WITH OBLIQUE STROKE A7A6 ; Lu # LATIN CAPITAL LETTER R WITH OBLIQUE STROKE A7A8 ; Lu # LATIN CAPITAL LETTER S WITH OBLIQUE STROKE +A7AA ; Lu # LATIN CAPITAL LETTER H WITH HOOK FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z 10400..10427 ; Lu # [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW 1D400..1D419 ; Lu # [26] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL BOLD CAPITAL Z @@ -1126,16 +1169,14 @@ FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP 1D790..1D7A8 ; Lu # [25] MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA 1D7CA ; Lu # MATHEMATICAL BOLD CAPITAL DIGAMMA -# Total code points: 1436 +# Total code points: 1441 # ================================================ # General_Category=Lowercase_Letter 0061..007A ; Ll # [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z -00AA ; Ll # FEMININE ORDINAL INDICATOR 00B5 ; Ll # MICRO SIGN -00BA ; Ll # MASCULINE ORDINAL INDICATOR 00DF..00F6 ; Ll # [24] LATIN SMALL LETTER SHARP S..LATIN SMALL LETTER O WITH DIAERESIS 00F8..00FF ; Ll # [8] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER Y WITH DIAERESIS 0101 ; Ll # LATIN SMALL LETTER A WITH MACRON @@ -1401,7 +1442,7 @@ FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP 0527 ; Ll # CYRILLIC SMALL LETTER SHHA WITH DESCENDER 0561..0587 ; Ll # [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN 1D00..1D2B ; Ll # [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D62..1D77 ; Ll # [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D6B..1D77 ; Ll # [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D79..1D9A ; Ll # [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1E01 ; Ll # LATIN SMALL LETTER A WITH RING BELOW 1E03 ; Ll # LATIN SMALL LETTER B WITH DOT ABOVE @@ -1565,7 +1606,7 @@ FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP 2C6C ; Ll # LATIN SMALL LETTER Z WITH DESCENDER 2C71 ; Ll # LATIN SMALL LETTER V WITH RIGHT HOOK 2C73..2C74 ; Ll # [2] LATIN SMALL LETTER W WITH HOOK..LATIN SMALL LETTER V WITH CURL -2C76..2C7C ; Ll # [7] LATIN SMALL LETTER HALF H..LATIN SUBSCRIPT SMALL LETTER J +2C76..2C7B ; Ll # [6] LATIN SMALL LETTER HALF H..LATIN LETTER SMALL CAPITAL TURNED E 2C81 ; Ll # COPTIC SMALL LETTER ALFA 2C83 ; Ll # COPTIC SMALL LETTER VIDA 2C85 ; Ll # COPTIC SMALL LETTER GAMMA @@ -1618,7 +1659,10 @@ FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP 2CE3..2CE4 ; Ll # [2] COPTIC SMALL LETTER OLD NUBIAN WAU..COPTIC SYMBOL KAI 2CEC ; Ll # COPTIC SMALL LETTER CRYPTOGRAMMIC SHEI 2CEE ; Ll # COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF3 ; Ll # COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; Ll # [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Ll # GEORGIAN SMALL LETTER YN +2D2D ; Ll # GEORGIAN SMALL LETTER AEN A641 ; Ll # CYRILLIC SMALL LETTER ZEMLYA A643 ; Ll # CYRILLIC SMALL LETTER DZELO A645 ; Ll # CYRILLIC SMALL LETTER REVERSED DZE @@ -1703,6 +1747,7 @@ A787 ; Ll # LATIN SMALL LETTER INSULAR T A78C ; Ll # LATIN SMALL LETTER SALTILLO A78E ; Ll # LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A791 ; Ll # LATIN SMALL LETTER N WITH DESCENDER +A793 ; Ll # LATIN SMALL LETTER C WITH BAR A7A1 ; Ll # LATIN SMALL LETTER G WITH OBLIQUE STROKE A7A3 ; Ll # LATIN SMALL LETTER K WITH OBLIQUE STROKE A7A5 ; Ll # LATIN SMALL LETTER N WITH OBLIQUE STROKE @@ -1742,7 +1787,7 @@ FF41..FF5A ; Ll # [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL 1D7C4..1D7C9 ; Ll # [6] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC PI SYMBOL 1D7CB ; Ll # MATHEMATICAL BOLD SMALL DIGAMMA -# Total code points: 1759 +# Total code points: 1751 # ================================================ @@ -1788,13 +1833,13 @@ FF41..FF5A ; Ll # [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL 1843 ; Lm # MONGOLIAN LETTER TODO LONG VOWEL SIGN 1AA7 ; Lm # TAI THAM SIGN MAI YAMOK 1C78..1C7D ; Lm # [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD -1D2C..1D61 ; Lm # [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI +1D2C..1D6A ; Lm # [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI 1D78 ; Lm # MODIFIER LETTER CYRILLIC EN 1D9B..1DBF ; Lm # [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA 2071 ; Lm # SUPERSCRIPT LATIN SMALL LETTER I 207F ; Lm # SUPERSCRIPT LATIN SMALL LETTER N 2090..209C ; Lm # [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T -2C7D ; Lm # MODIFIER LETTER CAPITAL V +2C7C..2C7D ; Lm # [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2D6F ; Lm # TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2E2F ; Lm # VERTICAL TILDE 3005 ; Lm # IDEOGRAPHIC ITERATION MARK @@ -1809,18 +1854,23 @@ A67F ; Lm # CYRILLIC PAYEROK A717..A71F ; Lm # [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK A770 ; Lm # MODIFIER LETTER US A788 ; Lm # MODIFIER LETTER LOW CIRCUMFLEX ACCENT +A7F8..A7F9 ; Lm # [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A9CF ; Lm # JAVANESE PANGRANGKEP AA70 ; Lm # MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION AADD ; Lm # TAI VIET SYMBOL SAM +AAF3..AAF4 ; Lm # [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK FF70 ; Lm # HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +16F93..16F9F ; Lm # [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 -# Total code points: 210 +# Total code points: 237 # ================================================ # General_Category=Other_Letter +00AA ; Lo # FEMININE ORDINAL INDICATOR +00BA ; Lo # MASCULINE ORDINAL INDICATOR 01BB ; Lo # LATIN LETTER TWO WITH STROKE 01C0..01C3 ; Lo # [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 0294 ; Lo # LATIN LETTER GLOTTAL STOP @@ -1841,6 +1891,8 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 07CA..07EA ; Lo # [33] NKO LETTER A..NKO LETTER JONA RA 0800..0815 ; Lo # [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF 0840..0858 ; Lo # [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +08A0 ; Lo # ARABIC LETTER BEH WITH SMALL V BELOW +08A2..08AC ; Lo # [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH 0904..0939 ; Lo # [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA 093D ; Lo # DEVANAGARI SIGN AVAGRAHA 0950 ; Lo # DEVANAGARI OM @@ -1945,7 +1997,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 0EB2..0EB3 ; Lo # [2] LAO VOWEL SIGN AA..LAO VOWEL SIGN AM 0EBD ; Lo # LAO SEMIVOWEL SIGN NYO 0EC0..0EC4 ; Lo # [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI -0EDC..0EDD ; Lo # [2] LAO HO NO..LAO HO MO +0EDC..0EDF ; Lo # [4] LAO HO NO..LAO LETTER KHMU NYO 0F00 ; Lo # TIBETAN SYLLABLE OM 0F40..0F47 ; Lo # [8] TIBETAN LETTER KA..TIBETAN LETTER JA 0F49..0F6C ; Lo # [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA @@ -1960,7 +2012,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 1075..1081 ; Lo # [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA 108E ; Lo # MYANMAR LETTER RUMAI PALAUNG FA 10D0..10FA ; Lo # [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN -1100..1248 ; Lo # [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +10FD..1248 ; Lo # [332] GEORGIAN LETTER AEN..ETHIOPIC SYLLABLE QWA 124A..124D ; Lo # [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE 1250..1256 ; Lo # [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO 1258 ; Lo # ETHIOPIC SYLLABLE QHWA @@ -2006,14 +2058,15 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 1B45..1B4B ; Lo # [7] BALINESE LETTER KAF SASAK..BALINESE LETTER ASYURA SASAK 1B83..1BA0 ; Lo # [30] SUNDANESE LETTER A..SUNDANESE LETTER HA 1BAE..1BAF ; Lo # [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA -1BC0..1BE5 ; Lo # [38] BATAK LETTER A..BATAK LETTER U +1BBA..1BE5 ; Lo # [44] SUNDANESE AVAGRAHA..BATAK LETTER U 1C00..1C23 ; Lo # [36] LEPCHA LETTER KA..LEPCHA LETTER A 1C4D..1C4F ; Lo # [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA 1C5A..1C77 ; Lo # [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1CE9..1CEC ; Lo # [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL 1CEE..1CF1 ; Lo # [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA +1CF5..1CF6 ; Lo # [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 2135..2138 ; Lo # [4] ALEF SYMBOL..DALET SYMBOL -2D30..2D65 ; Lo # [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D30..2D67 ; Lo # [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO 2D80..2D96 ; Lo # [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE 2DA0..2DA6 ; Lo # [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO 2DA8..2DAE ; Lo # [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO @@ -2034,7 +2087,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 31A0..31BA ; Lo # [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY 31F0..31FF ; Lo # [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3400..4DB5 ; Lo # [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 -4E00..9FCB ; Lo # [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB +4E00..9FCC ; Lo # [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC A000..A014 ; Lo # [21] YI SYLLABLE IT..YI SYLLABLE E A016..A48C ; Lo # [1143] YI SYLLABLE BIT..YI SYLLABLE YYR A4D0..A4F7 ; Lo # [40] LISU LETTER BA..LISU LETTER OE @@ -2068,6 +2121,8 @@ AAB9..AABD ; Lo # [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN AAC0 ; Lo # TAI VIET TONE MAI NUENG AAC2 ; Lo # TAI VIET TONE MAI SONG AADB..AADC ; Lo # [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG +AAE0..AAEA ; Lo # [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAF2 ; Lo # MEETEI MAYEK ANJI AB01..AB06 ; Lo # [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO AB09..AB0E ; Lo # [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO AB11..AB16 ; Lo # [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO @@ -2077,8 +2132,7 @@ ABC0..ABE2 ; Lo # [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM AC00..D7A3 ; Lo # [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH D7B0..D7C6 ; Lo # [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E D7CB..D7FB ; Lo # [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH -F900..FA2D ; Lo # [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; Lo # [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +F900..FA6D ; Lo # [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; Lo # [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB1D ; Lo # HEBREW LETTER YOD WITH HIRIQ FB1F..FB28 ; Lo # [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV @@ -2125,6 +2179,8 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 1083F..10855 ; Lo # [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW 10900..10915 ; Lo # [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; Lo # [26] LYDIAN LETTER A..LYDIAN LETTER C +10980..109B7 ; Lo # [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; Lo # [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; Lo # KHAROSHTHI LETTER A 10A10..10A13 ; Lo # [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA 10A15..10A17 ; Lo # [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA @@ -2136,16 +2192,56 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 10C00..10C48 ; Lo # [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH 11003..11037 ; Lo # [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA 11083..110AF ; Lo # [45] KAITHI LETTER A..KAITHI LETTER HA +110D0..110E8 ; Lo # [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +11103..11126 ; Lo # [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11183..111B2 ; Lo # [48] SHARADA LETTER A..SHARADA LETTER HA +111C1..111C4 ; Lo # [4] SHARADA SIGN AVAGRAHA..SHARADA OM +11680..116AA ; Lo # [43] TAKRI LETTER A..TAKRI LETTER RRA 12000..1236E ; Lo # [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM 13000..1342E ; Lo # [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 16800..16A38 ; Lo # [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16F00..16F44 ; Lo # [69] MIAO LETTER PA..MIAO LETTER HHA +16F50 ; Lo # MIAO LETTER NASALIZATION 1B000..1B001 ; Lo # [2] KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE +1EE00..1EE03 ; Lo # [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; Lo # [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; Lo # [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; Lo # ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; Lo # ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; Lo # [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; Lo # [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; Lo # ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; Lo # ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; Lo # ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; Lo # ARABIC MATHEMATICAL TAILED HAH +1EE49 ; Lo # ARABIC MATHEMATICAL TAILED YEH +1EE4B ; Lo # ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; Lo # [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; Lo # [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; Lo # ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; Lo # ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; Lo # ARABIC MATHEMATICAL TAILED DAD +1EE5B ; Lo # ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; Lo # ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; Lo # ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; Lo # [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; Lo # ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; Lo # [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; Lo # [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; Lo # [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; Lo # [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; Lo # ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; Lo # [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; Lo # [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; Lo # [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; Lo # [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; Lo # [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 20000..2A6D6 ; Lo # [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 2A700..2B734 ; Lo # [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 2B740..2B81D ; Lo # [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2F800..2FA1D ; Lo # [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 97084 +# Total code points: 97553 # ================================================ @@ -2174,6 +2270,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 0825..0827 ; Mn # [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; Mn # [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; Mn # [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +08E4..08FE ; Mn # [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT 0900..0902 ; Mn # [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 093A ; Mn # DEVANAGARI VOWEL SIGN OE 093C ; Mn # DEVANAGARI SIGN NUKTA @@ -2259,6 +2356,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 1732..1734 ; Mn # [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD 1752..1753 ; Mn # [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U 1772..1773 ; Mn # [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +17B4..17B5 ; Mn # [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 17B7..17BD ; Mn # [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA 17C6 ; Mn # KHMER SIGN NIKAHIT 17C9..17D3 ; Mn # [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT @@ -2286,6 +2384,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 1B80..1B81 ; Mn # [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR 1BA2..1BA5 ; Mn # [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU 1BA8..1BA9 ; Mn # [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAB ; Mn # SUNDANESE SIGN VIRAMA 1BE6 ; Mn # BATAK SIGN TOMPI 1BE8..1BE9 ; Mn # [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE 1BED ; Mn # BATAK VOWEL SIGN KARO O @@ -2296,6 +2395,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 1CD4..1CE0 ; Mn # [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA 1CE2..1CE8 ; Mn # [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL 1CED ; Mn # VEDIC SIGN TIRYAK +1CF4 ; Mn # VEDIC TONE CANDRA ABOVE 1DC0..1DE6 ; Mn # [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z 1DFC..1DFF ; Mn # [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW 20D0..20DC ; Mn # [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE @@ -2304,10 +2404,11 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 2CEF..2CF1 ; Mn # [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS 2D7F ; Mn # TIFINAGH CONSONANT JOINER 2DE0..2DFF ; Mn # [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS -302A..302F ; Mn # [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK +302A..302D ; Mn # [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK 3099..309A ; Mn # [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK A66F ; Mn # COMBINING CYRILLIC VZMET -A67C..A67D ; Mn # [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A674..A67D ; Mn # [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A69F ; Mn # COMBINING CYRILLIC LETTER IOTIFIED E A6F0..A6F1 ; Mn # [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS A802 ; Mn # SYLOTI NAGRI SIGN DVISVARA A806 ; Mn # SYLOTI NAGRI SIGN HASANTA @@ -2331,6 +2432,8 @@ AAB2..AAB4 ; Mn # [3] TAI VIET VOWEL I..TAI VIET VOWEL U AAB7..AAB8 ; Mn # [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA AABE..AABF ; Mn # [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK AAC1 ; Mn # TAI VIET TONE MAI THO +AAEC..AAED ; Mn # [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAF6 ; Mn # MEETEI MAYEK VIRAMA ABE5 ; Mn # MEETEI MAYEK VOWEL SIGN ANAP ABE8 ; Mn # MEETEI MAYEK VOWEL SIGN UNAP ABED ; Mn # MEETEI MAYEK APUN IYEK @@ -2348,6 +2451,16 @@ FE20..FE26 ; Mn # [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MA 11080..11081 ; Mn # [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA 110B3..110B6 ; Mn # [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B9..110BA ; Mn # [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +11100..11102 ; Mn # [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; Mn # [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112D..11134 ; Mn # [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11180..11181 ; Mn # [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +111B6..111BE ; Mn # [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +116AB ; Mn # TAKRI SIGN ANUSVARA +116AD ; Mn # TAKRI VOWEL SIGN AA +116B0..116B5 ; Mn # [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B7 ; Mn # TAKRI SIGN NUKTA +16F8F..16F92 ; Mn # [4] MIAO TONE RIGHT..MIAO TONE BELOW 1D167..1D169 ; Mn # [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 1D17B..1D182 ; Mn # [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE 1D185..1D18B ; Mn # [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE @@ -2355,7 +2468,7 @@ FE20..FE26 ; Mn # [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MA 1D242..1D244 ; Mn # [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME E0100..E01EF ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1199 +# Total code points: 1280 # ================================================ @@ -2453,6 +2566,7 @@ A670..A672 ; Me # [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRIL 1BA1 ; Mc # SUNDANESE CONSONANT SIGN PAMINGKAL 1BA6..1BA7 ; Mc # [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG 1BAA ; Mc # SUNDANESE SIGN PAMAAEH +1BAC..1BAD ; Mc # [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA 1BE7 ; Mc # BATAK VOWEL SIGN E 1BEA..1BEC ; Mc # [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O 1BEE ; Mc # BATAK VOWEL SIGN U @@ -2460,7 +2574,8 @@ A670..A672 ; Me # [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRIL 1C24..1C2B ; Mc # [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU 1C34..1C35 ; Mc # [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG 1CE1 ; Mc # VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA -1CF2 ; Mc # VEDIC SIGN ARDHAVISARGA +1CF2..1CF3 ; Mc # [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA +302E..302F ; Mc # [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK A823..A824 ; Mc # [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I A827 ; Mc # SYLOTI NAGRI VOWEL SIGN OO A880..A881 ; Mc # [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA @@ -2474,6 +2589,9 @@ AA2F..AA30 ; Mc # [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI AA33..AA34 ; Mc # [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA AA4D ; Mc # CHAM CONSONANT SIGN FINAL H AA7B ; Mc # MYANMAR SIGN PAO KAREN TONE +AAEB ; Mc # MEETEI MAYEK VOWEL SIGN II +AAEE..AAEF ; Mc # [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF5 ; Mc # MEETEI MAYEK VOWEL SIGN VISARGA ABE3..ABE4 ; Mc # [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP ABE6..ABE7 ; Mc # [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP ABE9..ABEA ; Mc # [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG @@ -2483,10 +2601,18 @@ ABEC ; Mc # MEETEI MAYEK LUM IYEK 11082 ; Mc # KAITHI SIGN VISARGA 110B0..110B2 ; Mc # [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II 110B7..110B8 ; Mc # [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +1112C ; Mc # CHAKMA VOWEL SIGN E +11182 ; Mc # SHARADA SIGN VISARGA +111B3..111B5 ; Mc # [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111BF..111C0 ; Mc # [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +116AC ; Mc # TAKRI SIGN VISARGA +116AE..116AF ; Mc # [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B6 ; Mc # TAKRI SIGN VIRAMA +16F51..16F7E ; Mc # [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG 1D165..1D166 ; Mc # [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM 1D16D..1D172 ; Mc # [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 -# Total code points: 287 +# Total code points: 353 # ================================================ @@ -2529,9 +2655,13 @@ ABF0..ABF9 ; Nd # [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE FF10..FF19 ; Nd # [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 104A0..104A9 ; Nd # [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE 11066..1106F ; Nd # [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE +110F0..110F9 ; Nd # [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11136..1113F ; Nd # [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +111D0..111D9 ; Nd # [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +116C0..116C9 ; Nd # [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 1D7CE..1D7FF ; Nd # [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE -# Total code points: 420 +# Total code points: 460 # ================================================ @@ -2579,6 +2709,7 @@ A6E6..A6EF ; Nl # [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM 2CFD ; No # COPTIC FRACTION ONE HALF 3192..3195 ; No # [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK 3220..3229 ; No # [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN +3248..324F ; No # [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE 3251..325F ; No # [15] CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE 3280..3289 ; No # [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN 32B1..32BF ; No # [15] CIRCLED NUMBER THIRTY SIX..CIRCLED NUMBER FIFTY @@ -2598,7 +2729,7 @@ A830..A835 ; No # [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTIO 1D360..1D371 ; No # [18] COUNTING ROD UNIT DIGIT ONE..COUNTING ROD TENS DIGIT NINE 1F100..1F10A ; No # [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA -# Total code points: 456 +# Total code points: 464 # ================================================ @@ -2645,10 +2776,9 @@ A830..A835 ; No # [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTIO # General_Category=Format 00AD ; Cf # SOFT HYPHEN -0600..0603 ; Cf # [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA +0600..0604 ; Cf # [5] ARABIC NUMBER SIGN..ARABIC SIGN SAMVAT 06DD ; Cf # ARABIC END OF AYAH 070F ; Cf # SYRIAC ABBREVIATION MARK -17B4..17B5 ; Cf # [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 200B..200F ; Cf # [5] ZERO WIDTH SPACE..RIGHT-TO-LEFT MARK 202A..202E ; Cf # [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE 2060..2064 ; Cf # [5] WORD JOINER..INVISIBLE PLUS @@ -2660,7 +2790,7 @@ FFF9..FFFB ; Cf # [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION E0001 ; Cf # LANGUAGE TAG E0020..E007F ; Cf # [96] TAG SPACE..CANCEL TAG -# Total code points: 140 +# Total code points: 139 # ================================================ @@ -2692,6 +2822,7 @@ D800..DFFF ; Cs # [2048] <surrogate-D800>..<surrogate-DFFF> 2010..2015 ; Pd # [6] HYPHEN..HORIZONTAL BAR 2E17 ; Pd # DOUBLE OBLIQUE HYPHEN 2E1A ; Pd # HYPHEN WITH DIAERESIS +2E3A..2E3B ; Pd # [2] TWO-EM DASH..THREE-EM DASH 301C ; Pd # WAVE DASH 3030 ; Pd # WAVY DASH 30A0 ; Pd # KATAKANA-HIRAGANA DOUBLE HYPHEN @@ -2700,7 +2831,7 @@ FE58 ; Pd # SMALL EM DASH FE63 ; Pd # SMALL HYPHEN-MINUS FF0D ; Pd # FULLWIDTH HYPHEN-MINUS -# Total code points: 21 +# Total code points: 23 # ================================================ @@ -2884,7 +3015,8 @@ FF3F ; Pc # FULLWIDTH LOW LINE 003F..0040 ; Po # [2] QUESTION MARK..COMMERCIAL AT 005C ; Po # REVERSE SOLIDUS 00A1 ; Po # INVERTED EXCLAMATION MARK -00B7 ; Po # MIDDLE DOT +00A7 ; Po # SECTION SIGN +00B6..00B7 ; Po # [2] PILCROW SIGN..MIDDLE DOT 00BF ; Po # INVERTED QUESTION MARK 037E ; Po # GREEK QUESTION MARK 0387 ; Po # GREEK ANO TELEIA @@ -2906,16 +3038,18 @@ FF3F ; Pc # FULLWIDTH LOW LINE 085E ; Po # MANDAIC PUNCTUATION 0964..0965 ; Po # [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA 0970 ; Po # DEVANAGARI ABBREVIATION SIGN +0AF0 ; Po # GUJARATI ABBREVIATION SIGN 0DF4 ; Po # SINHALA PUNCTUATION KUNDDALIYA 0E4F ; Po # THAI CHARACTER FONGMAN 0E5A..0E5B ; Po # [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT 0F04..0F12 ; Po # [15] TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK RGYA GRAM SHAD +0F14 ; Po # TIBETAN MARK GTER TSHEG 0F85 ; Po # TIBETAN MARK PALUTA 0FD0..0FD4 ; Po # [5] TIBETAN MARK BSKA- SHOG GI MGO RGYAN..TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA 0FD9..0FDA ; Po # [2] TIBETAN MARK LEADING MCHAN RTAGS..TIBETAN MARK TRAILING MCHAN RTAGS 104A..104F ; Po # [6] MYANMAR SIGN LITTLE SECTION..MYANMAR SYMBOL GENITIVE 10FB ; Po # GEORGIAN PARAGRAPH SEPARATOR -1361..1368 ; Po # [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR +1360..1368 ; Po # [9] ETHIOPIC SECTION MARK..ETHIOPIC PARAGRAPH SEPARATOR 166D..166E ; Po # [2] CANADIAN SYLLABICS CHI SIGN..CANADIAN SYLLABICS FULL STOP 16EB..16ED ; Po # [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION 1735..1736 ; Po # [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION @@ -2931,6 +3065,7 @@ FF3F ; Pc # FULLWIDTH LOW LINE 1BFC..1BFF ; Po # [4] BATAK SYMBOL BINDU NA METEK..BATAK SYMBOL BINDU PANGOLAT 1C3B..1C3F ; Po # [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK 1C7E..1C7F ; Po # [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD +1CC0..1CC7 ; Po # [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA 1CD3 ; Po # VEDIC SIGN NIHSHVASA 2016..2017 ; Po # [2] DOUBLE VERTICAL LINE..DOUBLE LOW LINE 2020..2027 ; Po # [8] DAGGER..HYPHENATION POINT @@ -2951,7 +3086,7 @@ FF3F ; Pc # FULLWIDTH LOW LINE 2E1B ; Po # TILDE WITH RING ABOVE 2E1E..2E1F ; Po # [2] TILDE WITH DOT ABOVE..TILDE WITH DOT BELOW 2E2A..2E2E ; Po # [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK -2E30..2E31 ; Po # [2] RING POINT..WORD SEPARATOR MIDDLE DOT +2E30..2E39 ; Po # [10] RING POINT..TOP HALF SECTION SIGN 3001..3003 ; Po # [3] IDEOGRAPHIC COMMA..DITTO MARK 303D ; Po # PART ALTERNATION MARK 30FB ; Po # KATAKANA MIDDLE DOT @@ -2969,6 +3104,7 @@ A9C1..A9CD ; Po # [13] JAVANESE LEFT RERENGGAN..JAVANESE TURNED PADA PISELEH A9DE..A9DF ; Po # [2] JAVANESE PADA TIRTA TUMETES..JAVANESE PADA ISEN-ISEN AA5C..AA5F ; Po # [4] CHAM PUNCTUATION SPIRAL..CHAM PUNCTUATION TRIPLE DANDA AADE..AADF ; Po # [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI +AAF0..AAF1 ; Po # [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM ABEB ; Po # MEETEI MAYEK CHEIKHEI FE10..FE16 ; Po # [7] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL QUESTION MARK FE19 ; Po # PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS @@ -2990,7 +3126,7 @@ FF1F..FF20 ; Po # [2] FULLWIDTH QUESTION MARK..FULLWIDTH COMMERCIAL AT FF3C ; Po # FULLWIDTH REVERSE SOLIDUS FF61 ; Po # HALFWIDTH IDEOGRAPHIC FULL STOP FF64..FF65 ; Po # [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDLE DOT -10100..10101 ; Po # [2] AEGEAN WORD SEPARATOR LINE..AEGEAN WORD SEPARATOR DOT +10100..10102 ; Po # [3] AEGEAN WORD SEPARATOR LINE..AEGEAN CHECK MARK 1039F ; Po # UGARITIC WORD DIVIDER 103D0 ; Po # OLD PERSIAN WORD DIVIDER 10857 ; Po # IMPERIAL ARAMAIC SECTION SIGN @@ -3002,9 +3138,11 @@ FF64..FF65 ; Po # [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDL 11047..1104D ; Po # [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS 110BB..110BC ; Po # [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN 110BE..110C1 ; Po # [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +11140..11143 ; Po # [4] CHAKMA SECTION MARK..CHAKMA QUESTION MARK +111C5..111C8 ; Po # [4] SHARADA DANDA..SHARADA SEPARATOR 12470..12473 ; Po # [4] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON -# Total code points: 402 +# Total code points: 434 # ================================================ @@ -3047,9 +3185,7 @@ FF64..FF65 ; Po # [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDL 25F8..25FF ; Sm # [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE 266F ; Sm # MUSIC SHARP SIGN 27C0..27C4 ; Sm # [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET -27C7..27CA ; Sm # [4] OR WITH DOT INSIDE..VERTICAL BAR WITH HORIZONTAL STROKE -27CC ; Sm # LONG DIVISION -27CE..27E5 ; Sm # [24] SQUARED LOGICAL AND..WHITE SQUARE WITH RIGHTWARDS TICK +27C7..27E5 ; Sm # [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK 27F0..27FF ; Sm # [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW 2900..2982 ; Sm # [131] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..Z NOTATION TYPE COLON 2999..29D7 ; Sm # [63] DOTTED FENCE..BLACK HOURGLASS @@ -3076,8 +3212,9 @@ FFE9..FFEC ; Sm # [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW 1D789 ; Sm # MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL 1D7A9 ; Sm # MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA 1D7C3 ; Sm # MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL +1EEF0..1EEF1 ; Sm # [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL -# Total code points: 948 +# Total code points: 952 # ================================================ @@ -3085,6 +3222,7 @@ FFE9..FFEC ; Sm # [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW 0024 ; Sc # DOLLAR SIGN 00A2..00A5 ; Sc # [4] CENT SIGN..YEN SIGN +058F ; Sc # ARMENIAN DRAM SIGN 060B ; Sc # AFGHANI SIGN 09F2..09F3 ; Sc # [2] BENGALI RUPEE MARK..BENGALI RUPEE SIGN 09FB ; Sc # BENGALI GANDA MARK @@ -3100,7 +3238,7 @@ FF04 ; Sc # FULLWIDTH DOLLAR SIGN FFE0..FFE1 ; Sc # [2] FULLWIDTH CENT SIGN..FULLWIDTH POUND SIGN FFE5..FFE6 ; Sc # [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN -# Total code points: 47 +# Total code points: 48 # ================================================ @@ -3140,11 +3278,10 @@ FFE3 ; Sk # FULLWIDTH MACRON # General_Category=Other_Symbol -00A6..00A7 ; So # [2] BROKEN BAR..SECTION SIGN +00A6 ; So # BROKEN BAR 00A9 ; So # COPYRIGHT SIGN 00AE ; So # REGISTERED SIGN 00B0 ; So # DEGREE SIGN -00B6 ; So # PILCROW SIGN 0482 ; So # CYRILLIC THOUSANDS SIGN 060E..060F ; So # [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA 06DE ; So # ARABIC START OF RUB EL HIZB @@ -3158,7 +3295,8 @@ FFE3 ; Sk # FULLWIDTH MACRON 0C7F ; So # TELUGU SIGN TUUMU 0D79 ; So # MALAYALAM DATE MARK 0F01..0F03 ; So # [3] TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA -0F13..0F17 ; So # [5] TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS +0F13 ; So # TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN +0F15..0F17 ; So # [3] TIBETAN LOGOTYPE SIGN CHAD RTAGS..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS 0F1A..0F1F ; So # [6] TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RDEL DKAR RDEL NAG 0F34 ; So # TIBETAN MARK BSDUS RTAGS 0F36 ; So # TIBETAN MARK CARET -DZUD RTAGS BZHI MIG CAN @@ -3168,7 +3306,6 @@ FFE3 ; Sk # FULLWIDTH MACRON 0FCE..0FCF ; So # [2] TIBETAN SIGN RDEL NAG RDEL DKAR..TIBETAN SIGN RDEL NAG GSUM 0FD5..0FD8 ; So # [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS 109E..109F ; So # [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION -1360 ; So # ETHIOPIC SECTION MARK 1390..1399 ; So # [10] ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT 1940 ; So # LIMBU SIGN LOO 19DE..19FF ; So # [34] NEW TAI LUE SIGN LAE..KHMER SYMBOL DAP-PRAM ROC @@ -3232,7 +3369,8 @@ FFE3 ; Sk # FULLWIDTH MACRON 3196..319F ; So # [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK 31C0..31E3 ; So # [36] CJK STROKE T..CJK STROKE Q 3200..321E ; So # [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU -322A..3250 ; So # [39] PARENTHESIZED IDEOGRAPH MOON..PARTNERSHIP SIGN +322A..3247 ; So # [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO +3250 ; So # PARTNERSHIP SIGN 3260..327F ; So # [32] CIRCLED HANGUL KIYEOK..KOREAN STANDARD SYMBOL 328A..32B0 ; So # [39] CIRCLED IDEOGRAPH MOON..CIRCLED IDEOGRAPH NIGHT 32C0..32FE ; So # [63] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..CIRCLED KATAKANA WO @@ -3248,7 +3386,6 @@ FFE4 ; So # FULLWIDTH BROKEN BAR FFE8 ; So # HALFWIDTH FORMS LIGHT VERTICAL FFED..FFEE ; So # [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER -10102 ; So # AEGEAN CHECK MARK 10137..1013F ; So # [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT 10179..10189 ; So # [17] GREEK YEAR SIGN..GREEK TRYBLION BASE SIGN 10190..1019B ; So # [12] ROMAN SEXTANS SIGN..ROMAN CENTURIAL SIGN @@ -3270,7 +3407,7 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1F0C1..1F0CF ; So # [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER 1F0D1..1F0DF ; So # [15] PLAYING CARD ACE OF CLUBS..PLAYING CARD WHITE JOKER 1F110..1F12E ; So # [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ -1F130..1F169 ; So # [58] SQUARED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F130..1F16B ; So # [60] SQUARED LATIN CAPITAL LETTER A..RAISED MD SIGN 1F170..1F19A ; So # [43] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VS 1F1E6..1F202 ; So # [29] REGIONAL INDICATOR SYMBOL LETTER A..SQUARED KATAKANA SA 1F210..1F23A ; So # [43] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-55B6 @@ -3288,24 +3425,14 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1F442..1F4F7 ; So # [182] EAR..CAMERA 1F4F9..1F4FC ; So # [4] VIDEO CAMERA..VIDEOCASSETTE 1F500..1F53D ; So # [62] TWISTED RIGHTWARDS ARROWS..DOWN-POINTING SMALL RED TRIANGLE +1F540..1F543 ; So # [4] CIRCLED CROSS POMMEE..NOTCHED LEFT SEMICIRCLE WITH THREE DOTS 1F550..1F567 ; So # [24] CLOCK FACE ONE OCLOCK..CLOCK FACE TWELVE-THIRTY -1F5FB..1F5FF ; So # [5] MOUNT FUJI..MOYAI -1F601..1F610 ; So # [16] GRINNING FACE WITH SMILING EYES..NEUTRAL FACE -1F612..1F614 ; So # [3] UNAMUSED FACE..PENSIVE FACE -1F616 ; So # CONFOUNDED FACE -1F618 ; So # FACE THROWING A KISS -1F61A ; So # KISSING FACE WITH CLOSED EYES -1F61C..1F61E ; So # [3] FACE WITH STUCK-OUT TONGUE AND WINKING EYE..DISAPPOINTED FACE -1F620..1F625 ; So # [6] ANGRY FACE..DISAPPOINTED BUT RELIEVED FACE -1F628..1F62B ; So # [4] FEARFUL FACE..TIRED FACE -1F62D ; So # LOUDLY CRYING FACE -1F630..1F633 ; So # [4] FACE WITH OPEN MOUTH AND COLD SWEAT..FLUSHED FACE -1F635..1F640 ; So # [12] DIZZY FACE..WEARY CAT FACE +1F5FB..1F640 ; So # [70] MOUNT FUJI..WEARY CAT FACE 1F645..1F64F ; So # [11] FACE WITH NO GOOD GESTURE..PERSON WITH FOLDED HANDS 1F680..1F6C5 ; So # [70] ROCKET..LEFT LUGGAGE 1F700..1F773 ; So # [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE -# Total code points: 4398 +# Total code points: 4404 # ================================================ diff --git a/lib/unicore/extracted/DJoinGroup.txt b/lib/unicore/extracted/DJoinGroup.txt index 5958abbb84..bf3f10c8eb 100644 --- a/lib/unicore/extracted/DJoinGroup.txt +++ b/lib/unicore/extracted/DJoinGroup.txt @@ -1,8 +1,8 @@ -# DerivedJoiningGroup-6.0.0.txt -# Date: 2010-07-17, 22:46:14 GMT [MD] +# DerivedJoiningGroup-6.1.0.txt +# Date: 2011-07-25, 00:54:14 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -48,8 +48,9 @@ 066E ; Beh # Lo ARABIC LETTER DOTLESS BEH 0679..0680 ; Beh # Lo [8] ARABIC LETTER TTEH..ARABIC LETTER BEHEH 0750..0756 ; Beh # Lo [7] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER BEH WITH SMALL V +08A0 ; Beh # Lo ARABIC LETTER BEH WITH SMALL V BELOW -# Total code points: 19 +# Total code points: 20 # ================================================ @@ -86,8 +87,9 @@ 0641 ; Feh # Lo ARABIC LETTER FEH 06A1..06A6 ; Feh # Lo [6] ARABIC LETTER DOTLESS FEH..ARABIC LETTER PEHEH 0760..0761 ; Feh # Lo [2] ARABIC LETTER FEH WITH TWO DOTS BELOW..ARABIC LETTER FEH WITH THREE DOTS POINTING UPWARDS BELOW +08A4 ; Feh # Lo ARABIC LETTER FEH WITH DOT BELOW AND THREE DOTS ABOVE -# Total code points: 9 +# Total code points: 10 # ================================================ @@ -121,8 +123,9 @@ 076E..076F ; Hah # Lo [2] ARABIC LETTER HAH WITH SMALL ARABIC LETTER TAH BELOW..ARABIC LETTER HAH WITH SMALL ARABIC LETTER TAH AND TWO DOTS 0772 ; Hah # Lo ARABIC LETTER HAH WITH SMALL ARABIC LETTER TAH ABOVE 077C ; Hah # Lo ARABIC LETTER HAH WITH EXTENDED ARABIC-INDIC DIGIT FOUR BELOW +08A2 ; Hah # Lo ARABIC LETTER JEEM WITH TWO DOTS ABOVE -# Total code points: 17 +# Total code points: 18 # ================================================ @@ -180,8 +183,9 @@ 0644 ; Lam # Lo ARABIC LETTER LAM 06B5..06B8 ; Lam # Lo [4] ARABIC LETTER LAM WITH SMALL V..ARABIC LETTER LAM WITH THREE DOTS BELOW 076A ; Lam # Lo ARABIC LETTER LAM WITH BAR +08A6 ; Lam # Lo ARABIC LETTER LAM WITH DOUBLE BAR -# Total code points: 6 +# Total code points: 7 # ================================================ @@ -193,8 +197,9 @@ 0645 ; Meem # Lo ARABIC LETTER MEEM 0765..0766 ; Meem # Lo [2] ARABIC LETTER MEEM WITH DOT ABOVE..ARABIC LETTER MEEM WITH DOT BELOW +08A7 ; Meem # Lo ARABIC LETTER MEEM WITH THREE DOTS ABOVE -# Total code points: 3 +# Total code points: 4 # ================================================ @@ -227,8 +232,9 @@ 0642 ; Qaf # Lo ARABIC LETTER QAF 066F ; Qaf # Lo ARABIC LETTER DOTLESS QAF 06A7..06A8 ; Qaf # Lo [2] ARABIC LETTER QAF WITH DOT ABOVE..ARABIC LETTER QAF WITH THREE DOTS ABOVE +08A5 ; Qaf # Lo ARABIC LETTER QAF WITH DOT BELOW -# Total code points: 4 +# Total code points: 5 # ================================================ @@ -244,8 +250,9 @@ 075B ; Reh # Lo ARABIC LETTER REH WITH STROKE 076B..076C ; Reh # Lo [2] ARABIC LETTER REH WITH TWO DOTS VERTICALLY ABOVE..ARABIC LETTER REH WITH HAMZA ABOVE 0771 ; Reh # Lo ARABIC LETTER REH WITH SMALL ARABIC LETTER TAH AND TWO DOTS +08AA ; Reh # Lo ARABIC LETTER REH WITH LOOP -# Total code points: 16 +# Total code points: 17 # ================================================ @@ -301,8 +308,9 @@ 0637..0638 ; Tah # Lo [2] ARABIC LETTER TAH..ARABIC LETTER ZAH 069F ; Tah # Lo ARABIC LETTER TAH WITH THREE DOTS ABOVE +08A3 ; Tah # Lo ARABIC LETTER TAH WITH TWO DOTS ABOVE -# Total code points: 3 +# Total code points: 4 # ================================================ @@ -332,8 +340,9 @@ 06C4..06CB ; Waw # Lo [8] ARABIC LETTER WAW WITH RING..ARABIC LETTER VE 06CF ; Waw # Lo ARABIC LETTER WAW WITH DOT ABOVE 0778..0779 ; Waw # Lo [2] ARABIC LETTER WAW WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER WAW WITH EXTENDED ARABIC-INDIC DIGIT THREE ABOVE +08AB ; Waw # Lo ARABIC LETTER WAW WITH DOT WITHIN -# Total code points: 15 +# Total code points: 16 # ================================================ @@ -349,8 +358,9 @@ 0678 ; Yeh # Lo ARABIC LETTER HIGH HAMZA YEH 06D0..06D1 ; Yeh # Lo [2] ARABIC LETTER E..ARABIC LETTER YEH WITH THREE DOTS BELOW 0777 ; Yeh # Lo ARABIC LETTER FARSI YEH WITH EXTENDED ARABIC-INDIC DIGIT FOUR BELOW +08A8..08A9 ; Yeh # Lo [2] ARABIC LETTER YEH WITH TWO DOTS BELOW AND HAMZA ABOVE..ARABIC LETTER YEH WITH TWO DOTS BELOW AND DOT ABOVE -# Total code points: 8 +# Total code points: 10 # ================================================ @@ -421,4 +431,10 @@ # Total code points: 1 +# ================================================ + +08AC ; Rohingya_Yeh # Lo ARABIC LETTER ROHINGYA YEH + +# Total code points: 1 + # EOF diff --git a/lib/unicore/extracted/DJoinType.txt b/lib/unicore/extracted/DJoinType.txt index 32272c7894..f9d7c7af9c 100644 --- a/lib/unicore/extracted/DJoinType.txt +++ b/lib/unicore/extracted/DJoinType.txt @@ -1,8 +1,8 @@ -# DerivedJoiningType-6.0.0.txt -# Date: 2010-08-19, 00:48:10 GMT [MD] +# DerivedJoiningType-6.1.0.txt +# Date: 2011-11-27, 05:10:23 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -58,8 +58,15 @@ 0775..0777 ; D # Lo [3] ARABIC LETTER FARSI YEH WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER FARSI YEH WITH EXTENDED ARABIC-INDIC DIGIT FOUR BELOW 077A..077F ; D # Lo [6] ARABIC LETTER YEH BARREE WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER KAF WITH TWO DOTS ABOVE 07CA..07EA ; D # Lo [33] NKO LETTER A..NKO LETTER JONA RA +0841..0845 ; D # Lo [5] MANDAIC LETTER AB..MANDAIC LETTER USHENNA +0847..0848 ; D # Lo [2] MANDAIC LETTER IT..MANDAIC LETTER ATT +084A..084E ; D # Lo [5] MANDAIC LETTER AK..MANDAIC LETTER AS +0850..0853 ; D # Lo [4] MANDAIC LETTER AP..MANDAIC LETTER AR +0855 ; D # Lo MANDAIC LETTER AT +08A0 ; D # Lo ARABIC LETTER BEH WITH SMALL V BELOW +08A2..08A9 ; D # Lo [8] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER YEH WITH TWO DOTS BELOW AND DOT ABOVE -# Total code points: 189 +# Total code points: 215 # ================================================ @@ -93,8 +100,14 @@ 0771 ; R # Lo ARABIC LETTER REH WITH SMALL ARABIC LETTER TAH AND TWO DOTS 0773..0774 ; R # Lo [2] ARABIC LETTER ALEF WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER ALEF WITH EXTENDED ARABIC-INDIC DIGIT THREE ABOVE 0778..0779 ; R # Lo [2] ARABIC LETTER WAW WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER WAW WITH EXTENDED ARABIC-INDIC DIGIT THREE ABOVE +0840 ; R # Lo MANDAIC LETTER HALQA +0846 ; R # Lo MANDAIC LETTER AZ +0849 ; R # Lo MANDAIC LETTER AKSA +084F ; R # Lo MANDAIC LETTER IN +0854 ; R # Lo MANDAIC LETTER ASH +08AA..08AC ; R # Lo [3] ARABIC LETTER REH WITH LOOP..ARABIC LETTER ROHINGYA YEH -# Total code points: 74 +# Total code points: 82 # ================================================ @@ -126,6 +139,7 @@ 0825..0827 ; T # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; T # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; T # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +08E4..08FE ; T # Mn [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT 0900..0902 ; T # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 093A ; T # Mn DEVANAGARI VOWEL SIGN OE 093C ; T # Mn DEVANAGARI SIGN NUKTA @@ -211,7 +225,7 @@ 1732..1734 ; T # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD 1752..1753 ; T # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U 1772..1773 ; T # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U -17B4..17B5 ; T # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B4..17B5 ; T # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 17B7..17BD ; T # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA 17C6 ; T # Mn KHMER SIGN NIKAHIT 17C9..17D3 ; T # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT @@ -239,6 +253,7 @@ 1B80..1B81 ; T # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR 1BA2..1BA5 ; T # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU 1BA8..1BA9 ; T # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAB ; T # Mn SUNDANESE SIGN VIRAMA 1BE6 ; T # Mn BATAK SIGN TOMPI 1BE8..1BE9 ; T # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE 1BED ; T # Mn BATAK VOWEL SIGN KARO O @@ -249,6 +264,7 @@ 1CD4..1CE0 ; T # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA 1CE2..1CE8 ; T # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL 1CED ; T # Mn VEDIC SIGN TIRYAK +1CF4 ; T # Mn VEDIC TONE CANDRA ABOVE 1DC0..1DE6 ; T # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z 1DFC..1DFF ; T # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW 200B ; T # Cf ZERO WIDTH SPACE @@ -264,11 +280,12 @@ 2CEF..2CF1 ; T # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS 2D7F ; T # Mn TIFINAGH CONSONANT JOINER 2DE0..2DFF ; T # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS -302A..302F ; T # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK +302A..302D ; T # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK 3099..309A ; T # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK A66F ; T # Mn COMBINING CYRILLIC VZMET A670..A672 ; T # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN -A67C..A67D ; T # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A674..A67D ; T # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A69F ; T # Mn COMBINING CYRILLIC LETTER IOTIFIED E A6F0..A6F1 ; T # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS A802 ; T # Mn SYLOTI NAGRI SIGN DVISVARA A806 ; T # Mn SYLOTI NAGRI SIGN HASANTA @@ -292,6 +309,8 @@ AAB2..AAB4 ; T # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U AAB7..AAB8 ; T # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA AABE..AABF ; T # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK AAC1 ; T # Mn TAI VIET TONE MAI THO +AAEC..AAED ; T # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAF6 ; T # Mn MEETEI MAYEK VIRAMA ABE5 ; T # Mn MEETEI MAYEK VOWEL SIGN ANAP ABE8 ; T # Mn MEETEI MAYEK VOWEL SIGN UNAP ABED ; T # Mn MEETEI MAYEK APUN IYEK @@ -312,6 +331,16 @@ FFF9..FFFB ; T # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATI 110B3..110B6 ; T # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B9..110BA ; T # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA 110BD ; T # Cf KAITHI NUMBER SIGN +11100..11102 ; T # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; T # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112D..11134 ; T # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11180..11181 ; T # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +111B6..111BE ; T # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +116AB ; T # Mn TAKRI SIGN ANUSVARA +116AD ; T # Mn TAKRI VOWEL SIGN AA +116B0..116B5 ; T # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B7 ; T # Mn TAKRI SIGN NUKTA +16F8F..16F92 ; T # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 1D167..1D169 ; T # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 1D173..1D17A ; T # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE 1D17B..1D182 ; T # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE @@ -322,6 +351,6 @@ E0001 ; T # Cf LANGUAGE TAG E0020..E007F ; T # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; T # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1344 +# Total code points: 1423 # EOF diff --git a/lib/unicore/extracted/DLineBreak.txt b/lib/unicore/extracted/DLineBreak.txt index 296b31d203..c2bae071d5 100644 --- a/lib/unicore/extracted/DLineBreak.txt +++ b/lib/unicore/extracted/DLineBreak.txt @@ -1,8 +1,8 @@ -# DerivedLineBreak-6.0.0.txt -# Date: 2010-08-19, 00:48:10 GMT [MD] +# DerivedLineBreak-6.1.0.txt +# Date: 2011-11-27, 05:10:24 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -23,8 +23,8 @@ E000..F8FF ; XX # Co [6400] <private-use-E000>..<private-use-F8FF> F0000..FFFFD ; XX # Co [65534] <private-use-F0000>..<private-use-FFFFD> 100000..10FFFD; XX # Co [65534] <private-use-100000>..<private-use-10FFFD> -# The above property value applies to 781599 code points not listed here. -# Total code points: 919067 +# The above property value applies to 780870 code points not listed here. +# Total code points: 918338 # ================================================ @@ -263,43 +263,18 @@ FF64 ; CL # Po HALFWIDTH IDEOGRAPHIC COMMA 301C ; NS # Pd WAVE DASH 303B ; NS # Lm VERTICAL IDEOGRAPHIC ITERATION MARK 303C ; NS # Lo MASU MARK -3041 ; NS # Lo HIRAGANA LETTER SMALL A -3043 ; NS # Lo HIRAGANA LETTER SMALL I -3045 ; NS # Lo HIRAGANA LETTER SMALL U -3047 ; NS # Lo HIRAGANA LETTER SMALL E -3049 ; NS # Lo HIRAGANA LETTER SMALL O -3063 ; NS # Lo HIRAGANA LETTER SMALL TU -3083 ; NS # Lo HIRAGANA LETTER SMALL YA -3085 ; NS # Lo HIRAGANA LETTER SMALL YU -3087 ; NS # Lo HIRAGANA LETTER SMALL YO -308E ; NS # Lo HIRAGANA LETTER SMALL WA -3095..3096 ; NS # Lo [2] HIRAGANA LETTER SMALL KA..HIRAGANA LETTER SMALL KE 309B..309C ; NS # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK 309D..309E ; NS # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK 30A0 ; NS # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN -30A1 ; NS # Lo KATAKANA LETTER SMALL A -30A3 ; NS # Lo KATAKANA LETTER SMALL I -30A5 ; NS # Lo KATAKANA LETTER SMALL U -30A7 ; NS # Lo KATAKANA LETTER SMALL E -30A9 ; NS # Lo KATAKANA LETTER SMALL O -30C3 ; NS # Lo KATAKANA LETTER SMALL TU -30E3 ; NS # Lo KATAKANA LETTER SMALL YA -30E5 ; NS # Lo KATAKANA LETTER SMALL YU -30E7 ; NS # Lo KATAKANA LETTER SMALL YO -30EE ; NS # Lo KATAKANA LETTER SMALL WA -30F5..30F6 ; NS # Lo [2] KATAKANA LETTER SMALL KA..KATAKANA LETTER SMALL KE 30FB ; NS # Po KATAKANA MIDDLE DOT -30FC..30FE ; NS # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK -31F0..31FF ; NS # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO +30FD..30FE ; NS # Lm [2] KATAKANA ITERATION MARK..KATAKANA VOICED ITERATION MARK A015 ; NS # Lm YI SYLLABLE WU FE54..FE55 ; NS # Po [2] SMALL SEMICOLON..SMALL COLON FF1A..FF1B ; NS # Po [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON FF65 ; NS # Po HALFWIDTH KATAKANA MIDDLE DOT -FF67..FF6F ; NS # Lo [9] HALFWIDTH KATAKANA LETTER SMALL A..HALFWIDTH KATAKANA LETTER SMALL TU -FF70 ; NS # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK FF9E..FF9F ; NS # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK -# Total code points: 77 +# Total code points: 26 # ================================================ @@ -313,7 +288,7 @@ FF9E..FF9F ; NS # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KA 06D4 ; EX # Po ARABIC FULL STOP 07F9 ; EX # Po NKO EXCLAMATION MARK 0F0D..0F11 ; EX # Po [5] TIBETAN MARK SHAD..TIBETAN MARK RIN CHEN SPUNGS SHAD -0F14 ; EX # So TIBETAN MARK GTER TSHEG +0F14 ; EX # Po TIBETAN MARK GTER TSHEG 1802..1803 ; EX # Po [2] MONGOLIAN COMMA..MONGOLIAN FULL STOP 1808..1809 ; EX # Po [2] MONGOLIAN MANCHU COMMA..MONGOLIAN MANCHU FULL STOP 1944..1945 ; EX # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK @@ -364,6 +339,7 @@ FE13..FE14 ; IS # Po [2] PRESENTATION FORM FOR VERTICAL COLON..PRESENTATION 005C ; PR # Po REVERSE SOLIDUS 00A3..00A5 ; PR # Sc [3] POUND SIGN..YEN SIGN 00B1 ; PR # Sm PLUS-MINUS SIGN +058F ; PR # Sc ARMENIAN DRAM SIGN 09FB ; PR # Sc BENGALI GANDA MARK 0AF1 ; PR # Sc GUJARATI RUPEE SIGN 0BF9 ; PR # Sc TAMIL RUPEE SIGN @@ -379,7 +355,7 @@ FF04 ; PR # Sc FULLWIDTH DOLLAR SIGN FFE1 ; PR # Sc FULLWIDTH POUND SIGN FFE5..FFE6 ; PR # Sc [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN -# Total code points: 44 +# Total code points: 45 # ================================================ @@ -448,9 +424,13 @@ AA50..AA59 ; NU # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 104A0..104A9 ; NU # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE 11066..1106F ; NU # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE +110F0..110F9 ; NU # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11136..1113F ; NU # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +111D0..111D9 ; NU # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +116C0..116C9 ; NU # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 1D7CE..1D7FF ; NU # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE -# Total code points: 412 +# Total code points: 452 # ================================================ @@ -519,10 +499,8 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 0561..0587 ; AL # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN 05C0 ; AL # Po HEBREW PUNCTUATION PASEQ 05C3 ; AL # Po HEBREW PUNCTUATION SOF PASUQ -05D0..05EA ; AL # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV -05F0..05F2 ; AL # Lo [3] HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD 05F3..05F4 ; AL # Po [2] HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM -0600..0603 ; AL # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA +0600..0604 ; AL # Cf [5] ARABIC NUMBER SIGN..ARABIC SIGN SAMVAT 0606..0608 ; AL # Sm [3] ARABIC-INDIC CUBE ROOT..ARABIC RAY 060E..060F ; AL # So [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA 0620..063F ; AL # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE @@ -558,6 +536,8 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 0830..083E ; AL # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU 0840..0858 ; AL # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 085E ; AL # Po MANDAIC PUNCTUATION +08A0 ; AL # Lo ARABIC LETTER BEH WITH SMALL V BELOW +08A2..08AC ; AL # Lo [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH 0904..0939 ; AL # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA 093D ; AL # Lo DEVANAGARI SIGN AVAGRAHA 0950 ; AL # Lo DEVANAGARI OM @@ -598,6 +578,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 0ABD ; AL # Lo GUJARATI SIGN AVAGRAHA 0AD0 ; AL # Lo GUJARATI OM 0AE0..0AE1 ; AL # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL +0AF0 ; AL # Po GUJARATI ABBREVIATION SIGN 0B05..0B0C ; AL # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L 0B0F..0B10 ; AL # Lo [2] ORIYA LETTER E..ORIYA LETTER AI 0B13..0B28 ; AL # Lo [22] ORIYA LETTER O..ORIYA LETTER NA @@ -676,9 +657,12 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 0FD5..0FD8 ; AL # So [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS 104C..104F ; AL # Po [4] MYANMAR SYMBOL LOCATIVE..MYANMAR SYMBOL GENITIVE 10A0..10C5 ; AL # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; AL # L& GEORGIAN CAPITAL LETTER YN +10CD ; AL # L& GEORGIAN CAPITAL LETTER AEN 10D0..10FA ; AL # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN 10FB ; AL # Po GEORGIAN PARAGRAPH SEPARATOR 10FC ; AL # Lm MODIFIER LETTER GEORGIAN NAR +10FD..10FF ; AL # Lo [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN 1200..1248 ; AL # Lo [73] ETHIOPIC SYLLABLE HA..ETHIOPIC SYLLABLE QWA 124A..124D ; AL # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE 1250..1256 ; AL # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO @@ -695,7 +679,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 12D8..1310 ; AL # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA 1312..1315 ; AL # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE 1318..135A ; AL # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA -1360 ; AL # So ETHIOPIC SECTION MARK +1360 ; AL # Po ETHIOPIC SECTION MARK 1362..1368 ; AL # Po [7] ETHIOPIC FULL STOP..ETHIOPIC PARAGRAPH SEPARATOR 1369..137C ; AL # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND 1380..138F ; AL # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE @@ -736,18 +720,20 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 1B74..1B7C ; AL # So [9] BALINESE MUSICAL SYMBOL RIGHT-HAND OPEN DUG..BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING 1B83..1BA0 ; AL # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA 1BAE..1BAF ; AL # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA -1BC0..1BE5 ; AL # Lo [38] BATAK LETTER A..BATAK LETTER U +1BBA..1BE5 ; AL # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U 1BFC..1BFF ; AL # Po [4] BATAK SYMBOL BINDU NA METEK..BATAK SYMBOL BINDU PANGOLAT 1C00..1C23 ; AL # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A 1C4D..1C4F ; AL # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA 1C5A..1C77 ; AL # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; AL # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD +1CC0..1CC7 ; AL # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA 1CD3 ; AL # Po VEDIC SIGN NIHSHVASA 1CE9..1CEC ; AL # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL 1CEE..1CF1 ; AL # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA +1CF5..1CF6 ; AL # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 1D00..1D2B ; AL # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D61 ; AL # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D77 ; AL # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D2C..1D6A ; AL # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; AL # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; AL # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; AL # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; AL # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA @@ -941,9 +927,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 2764..2767 ; AL # So [4] HEAVY BLACK HEART..ROTATED FLORAL HEART BULLET 2794..27BF ; AL # So [44] HEAVY WIDE-HEADED RIGHTWARDS ARROW..DOUBLE CURLY LOOP 27C0..27C4 ; AL # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET -27C7..27CA ; AL # Sm [4] OR WITH DOT INSIDE..VERTICAL BAR WITH HORIZONTAL STROKE -27CC ; AL # Sm LONG DIVISION -27CE..27E5 ; AL # Sm [24] SQUARED LOGICAL AND..WHITE SQUARE WITH RIGHTWARDS TICK +27C7..27E5 ; AL # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK 27F0..27FF ; AL # Sm [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW 2800..28FF ; AL # So [256] BRAILLE PATTERN BLANK..BRAILLE PATTERN DOTS-12345678 2900..2982 ; AL # Sm [131] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..Z NOTATION TYPE COLON @@ -957,14 +941,17 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 2B50..2B54 ; AL # So [5] WHITE MEDIUM STAR..WHITE RIGHT-POINTING PENTAGON 2C00..2C2E ; AL # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 2C30..2C5E ; AL # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE -2C60..2C7C ; AL # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; AL # Lm MODIFIER LETTER CAPITAL V +2C60..2C7B ; AL # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; AL # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; AL # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI 2CE5..2CEA ; AL # So [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA 2CEB..2CEE ; AL # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; AL # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2CFD ; AL # No COPTIC FRACTION ONE HALF 2D00..2D25 ; AL # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE -2D30..2D65 ; AL # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D27 ; AL # L& GEORGIAN SMALL LETTER YN +2D2D ; AL # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; AL # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO 2D6F ; AL # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D80..2D96 ; AL # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE 2DA0..2DA6 ; AL # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO @@ -980,6 +967,8 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 2E1B ; AL # Po TILDE WITH RING ABOVE 2E1E..2E1F ; AL # Po [2] TILDE WITH DOT ABOVE..TILDE WITH DOT BELOW 2E2F ; AL # Lm VERTICAL TILDE +2E32 ; AL # Po TURNED COMMA +2E35..2E39 ; AL # Po [5] TURNED SEMICOLON..TOP HALF SECTION SIGN 4DC0..4DFF ; AL # So [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION A4D0..A4F7 ; AL # Lo [40] LISU LETTER BA..LISU LETTER OE A4F8..A4FD ; AL # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU @@ -1005,8 +994,9 @@ A771..A787 ; AL # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR A788 ; AL # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; AL # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; AL # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A791 ; AL # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER -A7A0..A7A9 ; AL # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE +A790..A793 ; AL # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR +A7A0..A7AA ; AL # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK +A7F8..A7F9 ; AL # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; AL # L& LATIN LETTER SMALL CAPITAL TURNED M A7FB..A801 ; AL # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I A803..A805 ; AL # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O @@ -1033,6 +1023,9 @@ AA00..AA28 ; AL # Lo [41] CHAM LETTER A..CHAM LETTER HA AA40..AA42 ; AL # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG AA44..AA4B ; AL # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS AA5C ; AL # Po CHAM PUNCTUATION SPIRAL +AAE0..AAEA ; AL # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAF2 ; AL # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; AL # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK AB01..AB06 ; AL # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO AB09..AB0E ; AL # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO AB11..AB16 ; AL # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO @@ -1041,15 +1034,8 @@ AB28..AB2E ; AL # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO ABC0..ABE2 ; AL # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM FB00..FB06 ; AL # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; AL # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH -FB1D ; AL # Lo HEBREW LETTER YOD WITH HIRIQ -FB1F..FB28 ; AL # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV FB29 ; AL # Sm HEBREW LETTER ALTERNATIVE PLUS SIGN -FB2A..FB36 ; AL # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH -FB38..FB3C ; AL # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH -FB3E ; AL # Lo HEBREW LETTER MEM WITH DAGESH -FB40..FB41 ; AL # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH -FB43..FB44 ; AL # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH -FB46..FBB1 ; AL # Lo [108] HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM +FB50..FBB1 ; AL # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM FBB2..FBC1 ; AL # Sk [16] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL SMALL TAH BELOW FBD3..FD3D ; AL # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM FD50..FD8F ; AL # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM @@ -1108,6 +1094,8 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 10916..1091B ; AL # No [6] PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THREE 10920..10939 ; AL # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C 1093F ; AL # Po LYDIAN TRIANGULAR MARK +10980..109B7 ; AL # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; AL # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; AL # Lo KHAROSHTHI LETTER A 10A10..10A13 ; AL # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA 10A15..10A17 ; AL # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA @@ -1130,6 +1118,12 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 11083..110AF ; AL # Lo [45] KAITHI LETTER A..KAITHI LETTER HA 110BB..110BC ; AL # Po [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN 110BD ; AL # Cf KAITHI NUMBER SIGN +110D0..110E8 ; AL # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +11103..11126 ; AL # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11183..111B2 ; AL # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111C1..111C4 ; AL # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111C7 ; AL # Po SHARADA ABBREVIATION SIGN +11680..116AA ; AL # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA 12000..1236E ; AL # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM 12400..12462 ; AL # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER 13000..13257 ; AL # Lo [600] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH O006 @@ -1138,6 +1132,9 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1328A..13378 ; AL # Lo [239] EGYPTIAN HIEROGLYPH O037..EGYPTIAN HIEROGLYPH V011 1337C..1342E ; AL # Lo [179] EGYPTIAN HIEROGLYPH V012..EGYPTIAN HIEROGLYPH AA032 16800..16A38 ; AL # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16F00..16F44 ; AL # Lo [69] MIAO LETTER PA..MIAO LETTER HHA +16F50 ; AL # Lo MIAO LETTER NASALIZATION +16F93..16F9F ; AL # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 1D000..1D0F5 ; AL # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO 1D100..1D126 ; AL # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 1D129..1D164 ; AL # So [60] MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE @@ -1189,6 +1186,40 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1D7AA..1D7C2 ; AL # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA 1D7C3 ; AL # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL 1D7C4..1D7CB ; AL # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1EE00..1EE03 ; AL # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; AL # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; AL # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; AL # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; AL # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; AL # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; AL # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; AL # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; AL # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; AL # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; AL # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; AL # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; AL # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; AL # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; AL # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; AL # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; AL # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; AL # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; AL # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; AL # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; AL # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; AL # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; AL # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; AL # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; AL # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; AL # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; AL # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; AL # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; AL # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; AL # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; AL # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; AL # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; AL # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1EEF0..1EEF1 ; AL # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL 1F000..1F02B ; AL # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK 1F030..1F093 ; AL # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06 1F0A0..1F0AE ; AL # So [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES @@ -1196,6 +1227,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1F0C1..1F0CF ; AL # So [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER 1F0D1..1F0DF ; AL # So [15] PLAYING CARD ACE OF CLUBS..PLAYING CARD WHITE JOKER 1F12E ; AL # So CIRCLED WZ +1F16A..1F16B ; AL # So [2] RAISED MC SIGN..RAISED MD SIGN 1F1E6..1F1FF ; AL # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z 1F300..1F320 ; AL # So [33] CYCLONE..SHOOTING STAR 1F330..1F335 ; AL # So [6] CHESTNUT..CACTUS @@ -1209,24 +1241,14 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1F442..1F4F7 ; AL # So [182] EAR..CAMERA 1F4F9..1F4FC ; AL # So [4] VIDEO CAMERA..VIDEOCASSETTE 1F500..1F53D ; AL # So [62] TWISTED RIGHTWARDS ARROWS..DOWN-POINTING SMALL RED TRIANGLE +1F540..1F543 ; AL # So [4] CIRCLED CROSS POMMEE..NOTCHED LEFT SEMICIRCLE WITH THREE DOTS 1F550..1F567 ; AL # So [24] CLOCK FACE ONE OCLOCK..CLOCK FACE TWELVE-THIRTY -1F5FB..1F5FF ; AL # So [5] MOUNT FUJI..MOYAI -1F601..1F610 ; AL # So [16] GRINNING FACE WITH SMILING EYES..NEUTRAL FACE -1F612..1F614 ; AL # So [3] UNAMUSED FACE..PENSIVE FACE -1F616 ; AL # So CONFOUNDED FACE -1F618 ; AL # So FACE THROWING A KISS -1F61A ; AL # So KISSING FACE WITH CLOSED EYES -1F61C..1F61E ; AL # So [3] FACE WITH STUCK-OUT TONGUE AND WINKING EYE..DISAPPOINTED FACE -1F620..1F625 ; AL # So [6] ANGRY FACE..DISAPPOINTED BUT RELIEVED FACE -1F628..1F62B ; AL # So [4] FEARFUL FACE..TIRED FACE -1F62D ; AL # So LOUDLY CRYING FACE -1F630..1F633 ; AL # So [4] FACE WITH OPEN MOUTH AND COLD SWEAT..FLUSHED FACE -1F635..1F640 ; AL # So [12] DIZZY FACE..WEARY CAT FACE +1F5FB..1F640 ; AL # So [70] MOUNT FUJI..WEARY CAT FACE 1F645..1F64F ; AL # So [11] FACE WITH NO GOOD GESTURE..PERSON WITH FOLDED HANDS 1F680..1F6C5 ; AL # So [70] ROCKET..LEFT LUGGAGE 1F700..1F773 ; AL # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE -# Total code points: 15797 +# Total code points: 16251 # ================================================ @@ -1293,14 +1315,12 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 3300..33FF ; ID # So [256] SQUARE APAATO..SQUARE GAL 3400..4DB5 ; ID # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 4DB6..4DBF ; ID # Cn [10] <reserved-4DB6>..<reserved-4DBF> -4E00..9FCB ; ID # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB -9FCC..9FFF ; ID # Cn [52] <reserved-9FCC>..<reserved-9FFF> +4E00..9FCC ; ID # Lo [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC +9FCD..9FFF ; ID # Cn [51] <reserved-9FCD>..<reserved-9FFF> A000..A014 ; ID # Lo [21] YI SYLLABLE IT..YI SYLLABLE E A016..A48C ; ID # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR A490..A4C6 ; ID # So [55] YI RADICAL QOT..YI RADICAL KE -F900..FA2D ; ID # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA2E..FA2F ; ID # Cn [2] <reserved-FA2E>..<reserved-FA2F> -FA30..FA6D ; ID # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +F900..FA6D ; ID # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA6E..FA6F ; ID # Cn [2] <reserved-FA6E>..<reserved-FA6F> FA70..FAD9 ; ID # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FADA..FAFF ; ID # Cn [38] <reserved-FADA>..<reserved-FAFF> @@ -1406,6 +1426,7 @@ FE19 ; IN # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS 0825..0827 ; CM # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; CM # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; CM # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +08E4..08FE ; CM # Mn [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT 0900..0902 ; CM # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 0903 ; CM # Mc DEVANAGARI SIGN VISARGA 093A ; CM # Mn DEVANAGARI VOWEL SIGN OE @@ -1549,6 +1570,8 @@ FE19 ; IN # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS 1BA6..1BA7 ; CM # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG 1BA8..1BA9 ; CM # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG 1BAA ; CM # Mc SUNDANESE SIGN PAMAAEH +1BAB ; CM # Mn SUNDANESE SIGN VIRAMA +1BAC..1BAD ; CM # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA 1BE6 ; CM # Mn BATAK SIGN TOMPI 1BE7 ; CM # Mc BATAK VOWEL SIGN E 1BE8..1BE9 ; CM # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE @@ -1566,7 +1589,8 @@ FE19 ; IN # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS 1CE1 ; CM # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA 1CE2..1CE8 ; CM # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL 1CED ; CM # Mn VEDIC SIGN TIRYAK -1CF2 ; CM # Mc VEDIC SIGN ARDHAVISARGA +1CF2..1CF3 ; CM # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF4 ; CM # Mn VEDIC TONE CANDRA ABOVE 1DC0..1DE6 ; CM # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z 1DFC..1DFF ; CM # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW 200C..200F ; CM # Cf [4] ZERO WIDTH NON-JOINER..RIGHT-TO-LEFT MARK @@ -1580,11 +1604,13 @@ FE19 ; IN # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS 2CEF..2CF1 ; CM # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS 2D7F ; CM # Mn TIFINAGH CONSONANT JOINER 2DE0..2DFF ; CM # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS -302A..302F ; CM # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK +302A..302D ; CM # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; CM # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK 3099..309A ; CM # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK A66F ; CM # Mn COMBINING CYRILLIC VZMET A670..A672 ; CM # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN -A67C..A67D ; CM # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A674..A67D ; CM # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A69F ; CM # Mn COMBINING CYRILLIC LETTER IOTIFIED E A6F0..A6F1 ; CM # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS A802 ; CM # Mn SYLOTI NAGRI SIGN DVISVARA A806 ; CM # Mn SYLOTI NAGRI SIGN HASANTA @@ -1615,6 +1641,11 @@ AA35..AA36 ; CM # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA AA43 ; CM # Mn CHAM CONSONANT SIGN FINAL NG AA4C ; CM # Mn CHAM CONSONANT SIGN FINAL M AA4D ; CM # Mc CHAM CONSONANT SIGN FINAL H +AAEB ; CM # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; CM # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; CM # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF5 ; CM # Mc MEETEI MAYEK VOWEL SIGN VISARGA +AAF6 ; CM # Mn MEETEI MAYEK VIRAMA ABE3..ABE4 ; CM # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP ABE5 ; CM # Mn MEETEI MAYEK VOWEL SIGN ANAP ABE6..ABE7 ; CM # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP @@ -1642,6 +1673,24 @@ FFF9..FFFB ; CM # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTAT 110B3..110B6 ; CM # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B7..110B8 ; CM # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU 110B9..110BA ; CM # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +11100..11102 ; CM # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; CM # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; CM # Mc CHAKMA VOWEL SIGN E +1112D..11134 ; CM # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11180..11181 ; CM # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; CM # Mc SHARADA SIGN VISARGA +111B3..111B5 ; CM # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; CM # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF..111C0 ; CM # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +116AB ; CM # Mn TAKRI SIGN ANUSVARA +116AC ; CM # Mc TAKRI SIGN VISARGA +116AD ; CM # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; CM # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; CM # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B6 ; CM # Mc TAKRI SIGN VIRAMA +116B7 ; CM # Mn TAKRI SIGN NUKTA +16F51..16F7E ; CM # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG +16F8F..16F92 ; CM # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 1D165..1D166 ; CM # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM 1D167..1D169 ; CM # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 1D16D..1D172 ; CM # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 @@ -1654,7 +1703,7 @@ E0001 ; CM # Cf LANGUAGE TAG E0020..E007F ; CM # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; CM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1483 +# Total code points: 1628 # ================================================ @@ -1724,6 +1773,7 @@ A874..A875 ; BB # Po [2] PHAGS-PA SINGLE HEAD MARK..PHAGS-PA DOUBLE HEAD MA 2E19 ; BA # Po PALM BRANCH 2E2A..2E2D ; BA # Po [4] TWO DOTS OVER ONE DOT PUNCTUATION..FIVE DOT MARK 2E30..2E31 ; BA # Po [2] RING POINT..WORD SEPARATOR MIDDLE DOT +2E33..2E34 ; BA # Po [2] RAISED DOT..RAISED COMMA A4FE..A4FF ; BA # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP A60D ; BA # Po VAI COMMA A60F ; BA # Po VAI QUESTION MARK @@ -1732,9 +1782,9 @@ A8CE..A8CF ; BA # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA A92E..A92F ; BA # Po [2] KAYAH LI SIGN CWI..KAYAH LI SIGN SHYA A9C7..A9C9 ; BA # Po [3] JAVANESE PADA PANGKAT..JAVANESE PADA LUNGSI AA5D..AA5F ; BA # Po [3] CHAM PUNCTUATION DANDA..CHAM PUNCTUATION TRIPLE DANDA +AAF0..AAF1 ; BA # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM ABEB ; BA # Po MEETEI MAYEK CHEIKHEI -10100..10101 ; BA # Po [2] AEGEAN WORD SEPARATOR LINE..AEGEAN WORD SEPARATOR DOT -10102 ; BA # So AEGEAN CHECK MARK +10100..10102 ; BA # Po [3] AEGEAN WORD SEPARATOR LINE..AEGEAN CHECK MARK 1039F ; BA # Po UGARITIC WORD DIVIDER 103D0 ; BA # Po OLD PERSIAN WORD DIVIDER 10857 ; BA # Po IMPERIAL ARAMAIC SECTION SIGN @@ -1743,9 +1793,12 @@ ABEB ; BA # Po MEETEI MAYEK CHEIKHEI 10B39..10B3F ; BA # Po [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION 11047..11048 ; BA # Po [2] BRAHMI DANDA..BRAHMI DOUBLE DANDA 110BE..110C1 ; BA # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +11140..11143 ; BA # Po [4] CHAKMA SECTION MARK..CHAKMA QUESTION MARK +111C5..111C6 ; BA # Po [2] SHARADA DANDA..SHARADA DOUBLE DANDA +111C8 ; BA # Po SHARADA SEPARATOR 12470..12473 ; BA # Po [4] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON -# Total code points: 140 +# Total code points: 151 # ================================================ @@ -1820,7 +1873,7 @@ FFFC ; CB # So OBJECT REPLACEMENT CHARACTER 0EC0..0EC4 ; SA # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI 0EC6 ; SA # Lm LAO KO LA 0EC8..0ECD ; SA # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA -0EDC..0EDD ; SA # Lo [2] LAO HO NO..LAO HO MO +0EDC..0EDF ; SA # Lo [4] LAO HO NO..LAO LETTER KHMU NYO 1000..102A ; SA # Lo [43] MYANMAR LETTER KA..MYANMAR LETTER AU 102B..102C ; SA # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA 102D..1030 ; SA # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU @@ -1854,7 +1907,7 @@ FFFC ; CB # So OBJECT REPLACEMENT CHARACTER 109D ; SA # Mn MYANMAR VOWEL SIGN AITON AI 109E..109F ; SA # So [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION 1780..17B3 ; SA # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU -17B4..17B5 ; SA # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B4..17B5 ; SA # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 17B6 ; SA # Mc KHMER VOWEL SIGN AA 17B7..17BD ; SA # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA 17BE..17C5 ; SA # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU @@ -1908,21 +1961,20 @@ AADB..AADC ; SA # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG AADD ; SA # Lm TAI VIET SYMBOL SAM AADE..AADF ; SA # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI -# Total code points: 663 +# Total code points: 665 # ================================================ # Line_Break=Ambiguous -00A7 ; AI # So SECTION SIGN +00A7 ; AI # Po SECTION SIGN 00A8 ; AI # Sk DIAERESIS -00AA ; AI # L& FEMININE ORDINAL INDICATOR +00AA ; AI # Lo FEMININE ORDINAL INDICATOR 00B2..00B3 ; AI # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE -00B6 ; AI # So PILCROW SIGN -00B7 ; AI # Po MIDDLE DOT +00B6..00B7 ; AI # Po [2] PILCROW SIGN..MIDDLE DOT 00B8 ; AI # Sk CEDILLA 00B9 ; AI # No SUPERSCRIPT ONE -00BA ; AI # L& MASCULINE ORDINAL INDICATOR +00BA ; AI # Lo MASCULINE ORDINAL INDICATOR 00BC..00BE ; AI # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS 00D7 ; AI # Sm MULTIPLICATION SIGN 00F7 ; AI # Sm DIVISION SIGN @@ -2024,7 +2076,7 @@ AADE..AADF ; SA # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI 2757 ; AI # So HEAVY EXCLAMATION MARK SYMBOL 2776..2793 ; AI # No [30] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN 2B55..2B59 ; AI # So [5] HEAVY LARGE CIRCLE..HEAVY CIRCLED SALTIRE -3248..324F ; AI # So [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE +3248..324F ; AI # No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE FFFD ; AI # So REPLACEMENT CHARACTER 1F100..1F10A ; AI # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA 1F110..1F12D ; AI # So [30] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED CD @@ -2038,8 +2090,9 @@ FFFD ; AI # So REPLACEMENT CHARACTER # Line_Break=Break_Both 2014 ; B2 # Pd EM DASH +2E3A..2E3B ; B2 # Pd [2] TWO-EM DASH..THREE-EM DASH -# Total code points: 1 +# Total code points: 3 # ================================================ @@ -2922,4 +2975,54 @@ D789..D7A3 ; H3 # Lo [27] HANGUL SYLLABLE HIG..HANGUL SYLLABLE HIH # Total code points: 2 +# ================================================ + +# Line_Break=Hebrew_Letter + +05D0..05EA ; HL # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV +05F0..05F2 ; HL # Lo [3] HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD +FB1D ; HL # Lo HEBREW LETTER YOD WITH HIRIQ +FB1F..FB28 ; HL # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV +FB2A..FB36 ; HL # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH +FB38..FB3C ; HL # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH +FB3E ; HL # Lo HEBREW LETTER MEM WITH DAGESH +FB40..FB41 ; HL # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH +FB43..FB44 ; HL # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH +FB46..FB4F ; HL # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE ALEF LAMED + +# Total code points: 74 + +# ================================================ + +# Line_Break=Conditional_Japanese_Starter + +3041 ; CJ # Lo HIRAGANA LETTER SMALL A +3043 ; CJ # Lo HIRAGANA LETTER SMALL I +3045 ; CJ # Lo HIRAGANA LETTER SMALL U +3047 ; CJ # Lo HIRAGANA LETTER SMALL E +3049 ; CJ # Lo HIRAGANA LETTER SMALL O +3063 ; CJ # Lo HIRAGANA LETTER SMALL TU +3083 ; CJ # Lo HIRAGANA LETTER SMALL YA +3085 ; CJ # Lo HIRAGANA LETTER SMALL YU +3087 ; CJ # Lo HIRAGANA LETTER SMALL YO +308E ; CJ # Lo HIRAGANA LETTER SMALL WA +3095..3096 ; CJ # Lo [2] HIRAGANA LETTER SMALL KA..HIRAGANA LETTER SMALL KE +30A1 ; CJ # Lo KATAKANA LETTER SMALL A +30A3 ; CJ # Lo KATAKANA LETTER SMALL I +30A5 ; CJ # Lo KATAKANA LETTER SMALL U +30A7 ; CJ # Lo KATAKANA LETTER SMALL E +30A9 ; CJ # Lo KATAKANA LETTER SMALL O +30C3 ; CJ # Lo KATAKANA LETTER SMALL TU +30E3 ; CJ # Lo KATAKANA LETTER SMALL YA +30E5 ; CJ # Lo KATAKANA LETTER SMALL YU +30E7 ; CJ # Lo KATAKANA LETTER SMALL YO +30EE ; CJ # Lo KATAKANA LETTER SMALL WA +30F5..30F6 ; CJ # Lo [2] KATAKANA LETTER SMALL KA..KATAKANA LETTER SMALL KE +30FC ; CJ # Lm KATAKANA-HIRAGANA PROLONGED SOUND MARK +31F0..31FF ; CJ # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO +FF67..FF6F ; CJ # Lo [9] HALFWIDTH KATAKANA LETTER SMALL A..HALFWIDTH KATAKANA LETTER SMALL TU +FF70 ; CJ # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK + +# Total code points: 51 + # EOF diff --git a/lib/unicore/extracted/DNumType.txt b/lib/unicore/extracted/DNumType.txt index e1595fa29c..92866603e7 100644 --- a/lib/unicore/extracted/DNumType.txt +++ b/lib/unicore/extracted/DNumType.txt @@ -1,14 +1,22 @@ -# DerivedNumericType-6.0.0.txt -# Date: 2010-08-19, 00:48:13 GMT [MD] +# DerivedNumericType-6.1.0.txt +# Date: 2011-08-23, 00:47:14 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # ================================================ -# Numeric Type (from UnicodeData.txt, field 6/7/8 plus Unihan Database: see UAX #44: http://www.unicode.org/reports/tr44/) +# Derived Property: Numeric_Type +# The values are based on fields 6-8 of UnicodeData.txt, plus the fields +# kAccountingNumeric, kOtherNumeric, kPrimaryNumeric in the Unicode Han Database (Unihan). +# The derivations for these values are as follows. +# Numeric_Type=Decimal: When there is a value in field 6. +# Numeric_Type=Digit: When there is a value in field 7, but not in field 6. +# Numeric_Type=Numeric: When there are values for kAccountingNumeric, kOtherNumeric, kPrimaryNumeric, +# or there is a value in field 8, but not in field 7. +# Numeric_Type=None: Otherwise # All code points not explicitly listed for Numeric_Type # have the value None. @@ -45,6 +53,7 @@ 3038..303A ; Numeric # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY 3192..3195 ; Numeric # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK 3220..3229 ; Numeric # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN +3248..324F ; Numeric # No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE 3251..325F ; Numeric # No [15] CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE 3280..3289 ; Numeric # No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN 32B1..32BF ; Numeric # No [15] CIRCLED NUMBER THIRTY SIX..CIRCLED NUMBER FIFTY @@ -143,7 +152,7 @@ F9FD ; Numeric # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD 2626D ; Numeric # Lo CJK UNIFIED IDEOGRAPH-2626D 2F890 ; Numeric # Lo CJK COMPATIBILITY IDEOGRAPH-2F890 -# Total code points: 629 +# Total code points: 637 # ================================================ @@ -209,8 +218,12 @@ ABF0..ABF9 ; Decimal # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT N FF10..FF19 ; Decimal # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 104A0..104A9 ; Decimal # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE 11066..1106F ; Decimal # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE +110F0..110F9 ; Decimal # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11136..1113F ; Decimal # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +111D0..111D9 ; Decimal # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +116C0..116C9 ; Decimal # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 1D7CE..1D7FF ; Decimal # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE -# Total code points: 420 +# Total code points: 460 # EOF diff --git a/lib/unicore/extracted/DNumValues.txt b/lib/unicore/extracted/DNumValues.txt index 654bb86b67..02d408eb4b 100644 --- a/lib/unicore/extracted/DNumValues.txt +++ b/lib/unicore/extracted/DNumValues.txt @@ -1,19 +1,28 @@ -# DerivedNumericValues-6.0.0.txt -# Date: 2010-08-19, 00:48:14 GMT [MD] +# DerivedNumericValues-6.1.0.txt +# Date: 2011-08-19, 17:58:36 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # ================================================ -# Numeric Values (from UnicodeData.txt, field 6/7/8) +# Derived Property: Numeric_Value +# The values are based on field 8 of UnicodeData.txt, plus the fields +# kAccountingNumeric, kOtherNumeric, kPrimaryNumeric in the Unicode Han Database (Unihan). +# The derivations for these values are as follows. +# Numeric_Value = the value of kAccountingNumeric, kOtherNumeric, or kPrimaryNumeric, if they exist; otherwise +# Numeric_Value = the value of field 8, if it exists; otherwise +# Numeric_Value = NaN +# # WARNING: Certain values, such as 0.16666667, are repeating fractions # Although they are only printed with a limited number of decimal places # in this file, they should be expressed to the limits of the precision # available when used. +# # The third field is empty; it used to be a copy of the numeric type. +# # A fourth field was added to this extracted data as of # Unicode 5.1.0, expressing the same numeric value either as # a whole integer where possible or as a rational fraction, e.g. "1/6". @@ -77,6 +86,10 @@ FF10 ; 0.0 ; ; 0 # Nd FULLWIDTH DIGIT ZERO 1018A ; 0.0 ; ; 0 # No GREEK ZERO SIGN 104A0 ; 0.0 ; ; 0 # Nd OSMANYA DIGIT ZERO 11066 ; 0.0 ; ; 0 # Nd BRAHMI DIGIT ZERO +110F0 ; 0.0 ; ; 0 # Nd SORA SOMPENG DIGIT ZERO +11136 ; 0.0 ; ; 0 # Nd CHAKMA DIGIT ZERO +111D0 ; 0.0 ; ; 0 # Nd SHARADA DIGIT ZERO +116C0 ; 0.0 ; ; 0 # Nd TAKRI DIGIT ZERO 1D7CE ; 0.0 ; ; 0 # Nd MATHEMATICAL BOLD DIGIT ZERO 1D7D8 ; 0.0 ; ; 0 # Nd MATHEMATICAL DOUBLE-STRUCK DIGIT ZERO 1D7E2 ; 0.0 ; ; 0 # Nd MATHEMATICAL SANS-SERIF DIGIT ZERO @@ -84,7 +97,7 @@ FF10 ; 0.0 ; ; 0 # Nd FULLWIDTH DIGIT ZERO 1D7F6 ; 0.0 ; ; 0 # Nd MATHEMATICAL MONOSPACE DIGIT ZERO 1F100..1F101 ; 0.0 ; ; 0 # No [2] DIGIT ZERO FULL STOP..DIGIT ZERO COMMA -# Total code points: 56 +# Total code points: 60 # ================================================ @@ -323,6 +336,10 @@ FF11 ; 1.0 ; ; 1 # Nd FULLWIDTH DIGIT ONE 10E60 ; 1.0 ; ; 1 # No RUMI DIGIT ONE 11052 ; 1.0 ; ; 1 # No BRAHMI NUMBER ONE 11067 ; 1.0 ; ; 1 # Nd BRAHMI DIGIT ONE +110F1 ; 1.0 ; ; 1 # Nd SORA SOMPENG DIGIT ONE +11137 ; 1.0 ; ; 1 # Nd CHAKMA DIGIT ONE +111D1 ; 1.0 ; ; 1 # Nd SHARADA DIGIT ONE +116C1 ; 1.0 ; ; 1 # Nd TAKRI DIGIT ONE 12415 ; 1.0 ; ; 1 # Nl CUNEIFORM NUMERIC SIGN ONE GESH2 1241E ; 1.0 ; ; 1 # Nl CUNEIFORM NUMERIC SIGN ONE GESHU 1242C ; 1.0 ; ; 1 # Nl CUNEIFORM NUMERIC SIGN ONE SHARU @@ -338,7 +355,7 @@ FF11 ; 1.0 ; ; 1 # Nd FULLWIDTH DIGIT ONE 1F102 ; 1.0 ; ; 1 # No DIGIT ONE COMMA 2092A ; 1.0 ; ; 1 # Lo CJK UNIFIED IDEOGRAPH-2092A -# Total code points: 93 +# Total code points: 97 # ================================================ @@ -424,6 +441,10 @@ FF12 ; 2.0 ; ; 2 # Nd FULLWIDTH DIGIT TWO 10E61 ; 2.0 ; ; 2 # No RUMI DIGIT TWO 11053 ; 2.0 ; ; 2 # No BRAHMI NUMBER TWO 11068 ; 2.0 ; ; 2 # Nd BRAHMI DIGIT TWO +110F2 ; 2.0 ; ; 2 # Nd SORA SOMPENG DIGIT TWO +11138 ; 2.0 ; ; 2 # Nd CHAKMA DIGIT TWO +111D2 ; 2.0 ; ; 2 # Nd SHARADA DIGIT TWO +116C2 ; 2.0 ; ; 2 # Nd TAKRI DIGIT TWO 12400 ; 2.0 ; ; 2 # Nl CUNEIFORM NUMERIC SIGN TWO ASH 12416 ; 2.0 ; ; 2 # Nl CUNEIFORM NUMERIC SIGN TWO GESH2 1241F ; 2.0 ; ; 2 # Nl CUNEIFORM NUMERIC SIGN TWO GESHU @@ -442,7 +463,7 @@ FF12 ; 2.0 ; ; 2 # Nd FULLWIDTH DIGIT TWO 1F103 ; 2.0 ; ; 2 # No DIGIT TWO COMMA 22390 ; 2.0 ; ; 2 # Lo CJK UNIFIED IDEOGRAPH-22390 -# Total code points: 96 +# Total code points: 100 # ================================================ @@ -522,6 +543,10 @@ FF13 ; 3.0 ; ; 3 # Nd FULLWIDTH DIGIT THREE 10E62 ; 3.0 ; ; 3 # No RUMI DIGIT THREE 11054 ; 3.0 ; ; 3 # No BRAHMI NUMBER THREE 11069 ; 3.0 ; ; 3 # Nd BRAHMI DIGIT THREE +110F3 ; 3.0 ; ; 3 # Nd SORA SOMPENG DIGIT THREE +11139 ; 3.0 ; ; 3 # Nd CHAKMA DIGIT THREE +111D3 ; 3.0 ; ; 3 # Nd SHARADA DIGIT THREE +116C3 ; 3.0 ; ; 3 # Nd TAKRI DIGIT THREE 12401 ; 3.0 ; ; 3 # Nl CUNEIFORM NUMERIC SIGN THREE ASH 12408 ; 3.0 ; ; 3 # Nl CUNEIFORM NUMERIC SIGN THREE DISH 12417 ; 3.0 ; ; 3 # Nl CUNEIFORM NUMERIC SIGN THREE GESH2 @@ -544,7 +569,7 @@ FF13 ; 3.0 ; ; 3 # Nd FULLWIDTH DIGIT THREE 22998 ; 3.0 ; ; 3 # Lo CJK UNIFIED IDEOGRAPH-22998 23B1B ; 3.0 ; ; 3 # Lo CJK UNIFIED IDEOGRAPH-23B1B -# Total code points: 98 +# Total code points: 102 # ================================================ @@ -618,6 +643,10 @@ FF14 ; 4.0 ; ; 4 # Nd FULLWIDTH DIGIT FOUR 10E63 ; 4.0 ; ; 4 # No RUMI DIGIT FOUR 11055 ; 4.0 ; ; 4 # No BRAHMI NUMBER FOUR 1106A ; 4.0 ; ; 4 # Nd BRAHMI DIGIT FOUR +110F4 ; 4.0 ; ; 4 # Nd SORA SOMPENG DIGIT FOUR +1113A ; 4.0 ; ; 4 # Nd CHAKMA DIGIT FOUR +111D4 ; 4.0 ; ; 4 # Nd SHARADA DIGIT FOUR +116C4 ; 4.0 ; ; 4 # Nd TAKRI DIGIT FOUR 12402 ; 4.0 ; ; 4 # Nl CUNEIFORM NUMERIC SIGN FOUR ASH 12409 ; 4.0 ; ; 4 # Nl CUNEIFORM NUMERIC SIGN FOUR DISH 1240F ; 4.0 ; ; 4 # Nl CUNEIFORM NUMERIC SIGN FOUR U @@ -640,7 +669,7 @@ FF14 ; 4.0 ; ; 4 # Nd FULLWIDTH DIGIT FOUR 200E2 ; 4.0 ; ; 4 # Lo CJK UNIFIED IDEOGRAPH-200E2 2626D ; 4.0 ; ; 4 # Lo CJK UNIFIED IDEOGRAPH-2626D -# Total code points: 89 +# Total code points: 93 # ================================================ @@ -717,6 +746,10 @@ FF15 ; 5.0 ; ; 5 # Nd FULLWIDTH DIGIT FIVE 10E64 ; 5.0 ; ; 5 # No RUMI DIGIT FIVE 11056 ; 5.0 ; ; 5 # No BRAHMI NUMBER FIVE 1106B ; 5.0 ; ; 5 # Nd BRAHMI DIGIT FIVE +110F5 ; 5.0 ; ; 5 # Nd SORA SOMPENG DIGIT FIVE +1113B ; 5.0 ; ; 5 # Nd CHAKMA DIGIT FIVE +111D5 ; 5.0 ; ; 5 # Nd SHARADA DIGIT FIVE +116C5 ; 5.0 ; ; 5 # Nd TAKRI DIGIT FIVE 12403 ; 5.0 ; ; 5 # Nl CUNEIFORM NUMERIC SIGN FIVE ASH 1240A ; 5.0 ; ; 5 # Nl CUNEIFORM NUMERIC SIGN FIVE DISH 12410 ; 5.0 ; ; 5 # Nl CUNEIFORM NUMERIC SIGN FIVE U @@ -736,7 +769,7 @@ FF15 ; 5.0 ; ; 5 # Nd FULLWIDTH DIGIT FIVE 1F106 ; 5.0 ; ; 5 # No DIGIT FIVE COMMA 20121 ; 5.0 ; ; 5 # Lo CJK UNIFIED IDEOGRAPH-20121 -# Total code points: 86 +# Total code points: 90 # ================================================ @@ -809,6 +842,10 @@ FF16 ; 6.0 ; ; 6 # Nd FULLWIDTH DIGIT SIX 10E65 ; 6.0 ; ; 6 # No RUMI DIGIT SIX 11057 ; 6.0 ; ; 6 # No BRAHMI NUMBER SIX 1106C ; 6.0 ; ; 6 # Nd BRAHMI DIGIT SIX +110F6 ; 6.0 ; ; 6 # Nd SORA SOMPENG DIGIT SIX +1113C ; 6.0 ; ; 6 # Nd CHAKMA DIGIT SIX +111D6 ; 6.0 ; ; 6 # Nd SHARADA DIGIT SIX +116C6 ; 6.0 ; ; 6 # Nd TAKRI DIGIT SIX 12404 ; 6.0 ; ; 6 # Nl CUNEIFORM NUMERIC SIGN SIX ASH 1240B ; 6.0 ; ; 6 # Nl CUNEIFORM NUMERIC SIGN SIX DISH 12411 ; 6.0 ; ; 6 # Nl CUNEIFORM NUMERIC SIGN SIX U @@ -825,7 +862,7 @@ FF16 ; 6.0 ; ; 6 # Nd FULLWIDTH DIGIT SIX 1F107 ; 6.0 ; ; 6 # No DIGIT SIX COMMA 20AEA ; 6.0 ; ; 6 # Lo CJK UNIFIED IDEOGRAPH-20AEA -# Total code points: 78 +# Total code points: 82 # ================================================ @@ -896,6 +933,10 @@ FF17 ; 7.0 ; ; 7 # Nd FULLWIDTH DIGIT SEVEN 10E66 ; 7.0 ; ; 7 # No RUMI DIGIT SEVEN 11058 ; 7.0 ; ; 7 # No BRAHMI NUMBER SEVEN 1106D ; 7.0 ; ; 7 # Nd BRAHMI DIGIT SEVEN +110F7 ; 7.0 ; ; 7 # Nd SORA SOMPENG DIGIT SEVEN +1113D ; 7.0 ; ; 7 # Nd CHAKMA DIGIT SEVEN +111D7 ; 7.0 ; ; 7 # Nd SHARADA DIGIT SEVEN +116C7 ; 7.0 ; ; 7 # Nd TAKRI DIGIT SEVEN 12405 ; 7.0 ; ; 7 # Nl CUNEIFORM NUMERIC SIGN SEVEN ASH 1240C ; 7.0 ; ; 7 # Nl CUNEIFORM NUMERIC SIGN SEVEN DISH 12412 ; 7.0 ; ; 7 # Nl CUNEIFORM NUMERIC SIGN SEVEN U @@ -911,7 +952,7 @@ FF17 ; 7.0 ; ; 7 # Nd FULLWIDTH DIGIT SEVEN 1F108 ; 7.0 ; ; 7 # No DIGIT SEVEN COMMA 20001 ; 7.0 ; ; 7 # Lo CJK UNIFIED IDEOGRAPH-20001 -# Total code points: 77 +# Total code points: 81 # ================================================ @@ -980,6 +1021,10 @@ FF18 ; 8.0 ; ; 8 # Nd FULLWIDTH DIGIT EIGHT 10E67 ; 8.0 ; ; 8 # No RUMI DIGIT EIGHT 11059 ; 8.0 ; ; 8 # No BRAHMI NUMBER EIGHT 1106E ; 8.0 ; ; 8 # Nd BRAHMI DIGIT EIGHT +110F8 ; 8.0 ; ; 8 # Nd SORA SOMPENG DIGIT EIGHT +1113E ; 8.0 ; ; 8 # Nd CHAKMA DIGIT EIGHT +111D8 ; 8.0 ; ; 8 # Nd SHARADA DIGIT EIGHT +116C8 ; 8.0 ; ; 8 # Nd TAKRI DIGIT EIGHT 12406 ; 8.0 ; ; 8 # Nl CUNEIFORM NUMERIC SIGN EIGHT ASH 1240D ; 8.0 ; ; 8 # Nl CUNEIFORM NUMERIC SIGN EIGHT DISH 12413 ; 8.0 ; ; 8 # Nl CUNEIFORM NUMERIC SIGN EIGHT U @@ -994,7 +1039,7 @@ FF18 ; 8.0 ; ; 8 # Nd FULLWIDTH DIGIT EIGHT 1D7FE ; 8.0 ; ; 8 # Nd MATHEMATICAL MONOSPACE DIGIT EIGHT 1F109 ; 8.0 ; ; 8 # No DIGIT EIGHT COMMA -# Total code points: 73 +# Total code points: 77 # ================================================ @@ -1064,6 +1109,10 @@ FF19 ; 9.0 ; ; 9 # Nd FULLWIDTH DIGIT NINE 10E68 ; 9.0 ; ; 9 # No RUMI DIGIT NINE 1105A ; 9.0 ; ; 9 # No BRAHMI NUMBER NINE 1106F ; 9.0 ; ; 9 # Nd BRAHMI DIGIT NINE +110F9 ; 9.0 ; ; 9 # Nd SORA SOMPENG DIGIT NINE +1113F ; 9.0 ; ; 9 # Nd CHAKMA DIGIT NINE +111D9 ; 9.0 ; ; 9 # Nd SHARADA DIGIT NINE +116C9 ; 9.0 ; ; 9 # Nd TAKRI DIGIT NINE 12407 ; 9.0 ; ; 9 # Nl CUNEIFORM NUMERIC SIGN NINE ASH 1240E ; 9.0 ; ; 9 # Nl CUNEIFORM NUMERIC SIGN NINE DISH 12414 ; 9.0 ; ; 9 # Nl CUNEIFORM NUMERIC SIGN NINE U @@ -1079,7 +1128,7 @@ FF19 ; 9.0 ; ; 9 # Nd FULLWIDTH DIGIT NINE 1F10A ; 9.0 ; ; 9 # No DIGIT NINE COMMA 2F890 ; 9.0 ; ; 9 # Lo CJK COMPATIBILITY IDEOGRAPH-2F890 -# Total code points: 77 +# Total code points: 81 # ================================================ @@ -1097,6 +1146,7 @@ FF19 ; 9.0 ; ; 9 # Nd FULLWIDTH DIGIT NINE 2793 ; 10.0 ; ; 10 # No DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN 3038 ; 10.0 ; ; 10 # Nl HANGZHOU NUMERAL TEN 3229 ; 10.0 ; ; 10 # No PARENTHESIZED IDEOGRAPH TEN +3248 ; 10.0 ; ; 10 # No CIRCLED NUMBER TEN ON BLACK SQUARE 3289 ; 10.0 ; ; 10 # No CIRCLED IDEOGRAPH TEN 4EC0 ; 10.0 ; ; 10 # Lo CJK UNIFIED IDEOGRAPH-4EC0 5341 ; 10.0 ; ; 10 # Lo CJK UNIFIED IDEOGRAPH-5341 @@ -1119,7 +1169,7 @@ F9FD ; 10.0 ; ; 10 # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD 1105B ; 10.0 ; ; 10 # No BRAHMI NUMBER TEN 1D369 ; 10.0 ; ; 10 # No COUNTING ROD TENS DIGIT ONE -# Total code points: 39 +# Total code points: 40 # ================================================ @@ -1218,6 +1268,7 @@ F9FD ; 10.0 ; ; 10 # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD 249B ; 20.0 ; ; 20 # No NUMBER TWENTY FULL STOP 24F4 ; 20.0 ; ; 20 # No NEGATIVE CIRCLED NUMBER TWENTY 3039 ; 20.0 ; ; 20 # Nl HANGZHOU NUMERAL TWENTY +3249 ; 20.0 ; ; 20 # No CIRCLED NUMBER TWENTY ON BLACK SQUARE 5344 ; 20.0 ; ; 20 # Lo CJK UNIFIED IDEOGRAPH-5344 5EFF ; 20.0 ; ; 20 # Lo CJK UNIFIED IDEOGRAPH-5EFF 10111 ; 20.0 ; ; 20 # No AEGEAN NUMBER TWENTY @@ -1231,7 +1282,7 @@ F9FD ; 10.0 ; ; 10 # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD 1105C ; 20.0 ; ; 20 # No BRAHMI NUMBER TWENTY 1D36A ; 20.0 ; ; 20 # No COUNTING ROD TENS DIGIT TWO -# Total code points: 18 +# Total code points: 19 # ================================================ @@ -1291,6 +1342,7 @@ F9FD ; 10.0 ; ; 10 # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD 1374 ; 30.0 ; ; 30 # No ETHIOPIC NUMBER THIRTY 303A ; 30.0 ; ; 30 # Nl HANGZHOU NUMERAL THIRTY +324A ; 30.0 ; ; 30 # No CIRCLED NUMBER THIRTY ON BLACK SQUARE 325A ; 30.0 ; ; 30 # No CIRCLED NUMBER THIRTY 5345 ; 30.0 ; ; 30 # Lo CJK UNIFIED IDEOGRAPH-5345 10112 ; 30.0 ; ; 30 # No AEGEAN NUMBER THIRTY @@ -1300,7 +1352,7 @@ F9FD ; 10.0 ; ; 10 # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD 1D36B ; 30.0 ; ; 30 # No COUNTING ROD TENS DIGIT THREE 20983 ; 30.0 ; ; 30 # Lo CJK UNIFIED IDEOGRAPH-20983 -# Total code points: 10 +# Total code points: 11 # ================================================ @@ -1359,6 +1411,7 @@ F9FD ; 10.0 ; ; 10 # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD # ================================================ 1375 ; 40.0 ; ; 40 # No ETHIOPIC NUMBER FORTY +324B ; 40.0 ; ; 40 # No CIRCLED NUMBER FORTY ON BLACK SQUARE 32B5 ; 40.0 ; ; 40 # No CIRCLED NUMBER FORTY 534C ; 40.0 ; ; 40 # Lo CJK UNIFIED IDEOGRAPH-534C 10113 ; 40.0 ; ; 40 # No AEGEAN NUMBER FORTY @@ -1368,7 +1421,7 @@ F9FD ; 10.0 ; ; 10 # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD 2098C ; 40.0 ; ; 40 # Lo CJK UNIFIED IDEOGRAPH-2098C 2099C ; 40.0 ; ; 40 # Lo CJK UNIFIED IDEOGRAPH-2099C -# Total code points: 9 +# Total code points: 10 # ================================================ @@ -1430,6 +1483,7 @@ F9FD ; 10.0 ; ; 10 # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD 216C ; 50.0 ; ; 50 # Nl ROMAN NUMERAL FIFTY 217C ; 50.0 ; ; 50 # Nl SMALL ROMAN NUMERAL FIFTY 2186 ; 50.0 ; ; 50 # Nl ROMAN NUMERAL FIFTY EARLY FORM +324C ; 50.0 ; ; 50 # No CIRCLED NUMBER FIFTY ON BLACK SQUARE 32BF ; 50.0 ; ; 50 # No CIRCLED NUMBER FIFTY 10114 ; 50.0 ; ; 50 # No AEGEAN NUMBER FIFTY 10144 ; 50.0 ; ; 50 # Nl GREEK ACROPHONIC ATTIC FIFTY @@ -1443,37 +1497,40 @@ F9FD ; 10.0 ; ; 10 # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD 1105F ; 50.0 ; ; 50 # No BRAHMI NUMBER FIFTY 1D36D ; 50.0 ; ; 50 # No COUNTING ROD TENS DIGIT FIVE -# Total code points: 19 +# Total code points: 20 # ================================================ 1377 ; 60.0 ; ; 60 # No ETHIOPIC NUMBER SIXTY +324D ; 60.0 ; ; 60 # No CIRCLED NUMBER SIXTY ON BLACK SQUARE 10115 ; 60.0 ; ; 60 # No AEGEAN NUMBER SIXTY 10E6E ; 60.0 ; ; 60 # No RUMI NUMBER SIXTY 11060 ; 60.0 ; ; 60 # No BRAHMI NUMBER SIXTY 1D36E ; 60.0 ; ; 60 # No COUNTING ROD TENS DIGIT SIX -# Total code points: 5 +# Total code points: 6 # ================================================ 1378 ; 70.0 ; ; 70 # No ETHIOPIC NUMBER SEVENTY +324E ; 70.0 ; ; 70 # No CIRCLED NUMBER SEVENTY ON BLACK SQUARE 10116 ; 70.0 ; ; 70 # No AEGEAN NUMBER SEVENTY 10E6F ; 70.0 ; ; 70 # No RUMI NUMBER SEVENTY 11061 ; 70.0 ; ; 70 # No BRAHMI NUMBER SEVENTY 1D36F ; 70.0 ; ; 70 # No COUNTING ROD TENS DIGIT SEVEN -# Total code points: 5 +# Total code points: 6 # ================================================ 1379 ; 80.0 ; ; 80 # No ETHIOPIC NUMBER EIGHTY +324F ; 80.0 ; ; 80 # No CIRCLED NUMBER EIGHTY ON BLACK SQUARE 10117 ; 80.0 ; ; 80 # No AEGEAN NUMBER EIGHTY 10E70 ; 80.0 ; ; 80 # No RUMI NUMBER EIGHTY 11062 ; 80.0 ; ; 80 # No BRAHMI NUMBER EIGHTY 1D370 ; 80.0 ; ; 80 # No COUNTING ROD TENS DIGIT EIGHT -# Total code points: 5 +# Total code points: 6 # ================================================ diff --git a/lib/unicore/mktables b/lib/unicore/mktables index 7824fd4986..2aaaa56b4f 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -8596,15 +8596,6 @@ sub finish_property_setup { } } - # This entry is still missing as of 6.0, perhaps because no short name for - # it. - if (-e 'NameAliases.txt') { - my $aliases = property_ref('Name_Alias'); - if (! defined $aliases) { - $aliases = Property->new('Name_Alias'); - } - } - # These are used so much, that we set globals for them. $gc = property_ref('General_Category'); $block = property_ref('Block'); @@ -12313,11 +12304,14 @@ sub compile_perl() { $perl_charname->add_duplicate($abbreviations{$value}, $value, Replace => $MULTIPLE_AFTER); } $alias_sentence = <<END; -The Name_Alias property adds duplicate code point entries with a corrected -name. The original (less correct, but still valid) name will be physically -last. +The Name_Alias property adds duplicate code point entries that are +alternatives to the original name. If an addition is a corrected +name, it will be physically first in the table. The original (less correct, +but still valid) name will be next; then any alternatives, in no particular +order; and finally any abbreviations, again in no particular order. END } + my $comment; if (@composition <= 2) { # Always at least 2 $comment = join " and ", @composition; @@ -12329,8 +12323,8 @@ END $perl_charname->add_comment(join_lines( <<END This file is for charnames.pm. It is the union of the $comment properties. -Unicode_1_Name entries are used only for otherwise nameless code -points. +Unicode_1_Name entries are used only for nameless code points in the Name +property. $alias_sentence This file doesn't include the algorithmically determinable names. For those, use 'unicore/Name.pm' diff --git a/lib/unicore/version b/lib/unicore/version index 09b254e90c..dfda3e0b4f 100644 --- a/lib/unicore/version +++ b/lib/unicore/version @@ -1 +1 @@ -6.0.0 +6.1.0 diff --git a/pod/perldelta.pod b/pod/perldelta.pod index fdf0a0529b..215be08d49 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -66,6 +66,70 @@ Full details are in L<perlfunc/fc>. The C<_> character in subroutine prototypes is now allowed before C<@> or C<%>. +=head1 Supports (I<almost>) Unicode 6.1 + +Besides the addition of whole new scripts, and new characters in +existing scripts, this new version of Unicode, as always, makes some +changes to existing characters. One change that may trip up some +applications is that the General Category of two characters in the +Latin-1 range, PILCROW SIGN and SECTION SIGN, has been changed from +Other_Symbol to Other_Punctuation. The same change has been made for +a character in each of Tibetan, Ethiopic, and Aegean. +The code points U+3248..U+324F (CIRCLED NUMBER TEN ON BLACK SQUARE +through CIRCLED NUMBER EIGHTY ON BLACK SQUARE) have had their General +Category changed from Other_Symbol to Other_Numeric. The Line Break +property has changes for Hebrew and Japanese; and as a consequence of +other changes in 6.1, the Perl regular expression construct C<\X> now +works differently for some characters in Thai and Lao. + +New aliases (synonyms) have been defined for many property values; +these, along with the previously existing ones, are all cross indexed in +L<perluniprops>. + +The return value of C<charnames::viacode> is affected by other changes. +One of these is that the preferred name (which is what C<viacode> +returns) for the character at U+2118 has been changed from SCRIPT CAPITAL P +to WEIERSTRASS ELLIPTIC FUNCTION. But most of these changes are the +fallout of the mistake Unicode 6.0 made in naming a character used in +Japanese cell phones to be "BELL", which conflicts with the long +standing industry use of (and Unicode's recommendation to use) that name +to mean the ASCII control character at U+0007. As a result, that name +has been deprecated in Perl since v5.14; and any use of it will raise a +warning message (unless turned off). The name "ALERT" is now the +preferred name for this code point, with "BEL" being an acceptable short +form. The name for the new cell phone character, at code point U+1F514, +remains undefined in this version of Perl (hence we don't quite +implement all of Unicode 6.1), but starting in v5.18, BELL will mean +this character, and not U+0007. + +Unicode has taken steps to make sure that this sort of mistake does not +happen again. The Standard now includes all the generally accepted +names and abbreviations for control characters, whereas previously it +didn't. This means that all the names that Perl had previously +deprecated (except BELL) are no longer deprecated, such as FILE +SEPARATOR. Also, the names for four rarely used characters are subtly +different (a hyphen instead of a space) than before: + + Code point Old Name New Name + U+008E SINGLE-SHIFT 2 SINGLE-SHIFT-2 + U+008F SINGLE-SHIFT 3 SINGLE-SHIFT-3 + U+0091 PRIVATE USE 1 PRIVATE USE-1 + U+0092 PRIVATE USE 2 PRIVATE USE-2 + +Perl will accept either name as input, but C<charnames::viacode> now +returns the new name. + +Additional name abbreviations are accepted: +SP for SPACE; +TAB for CHARACTER TABULATION; +NEW LINE, END OF LINE, NL, and EOL for LINE FEED; +LOCKING-SHIFT ONE for SHIFT OUT; +LOCKING-SHIFT ZERO for SHIFT IN; +and ZWNBSP for ZERO WIDTH NO-BREAK SPACE. + +More details on this version of Unicode are provided in +L<http://www.unicode.org/versions/Unicode6.1.0/>. + =head1 Security XXX Any security-related notices go here. In particular, any security @@ -103,6 +167,10 @@ core typemap: T_DATAUNIT and T_CALLBACK. If you are, against all odds, a user of these, please see the instructions on how to regain them in L<perlxstypemap>. +=head2 Unicode 6.1 has incompatibilities with Unicode 6.0 + +These are detailed in L</Supports (almost) Unicode 6.1> above. + =head1 Deprecations XXX Any deprecated features, syntax, modules etc. should be listed here. diff --git a/t/re/pat_advanced.t b/t/re/pat_advanced.t index 4d88190a5e..43db3d4334 100644 --- a/t/re/pat_advanced.t +++ b/t/re/pat_advanced.t @@ -1743,7 +1743,7 @@ EOP my @isPunct = grep {/[[:punct:]]/ != /\p{IsPunct}/} map {chr} 0x80 .. 0xff; - is(join ('', @isPunct), "\xa1\xab\xb7\xbb\xbf", # ¡ « · » ¿ + is(join ('', @isPunct), "\xa1\xa7\xab\xb6\xb7\xbb\xbf", # ¡ « · » ¿ 'IsPunct disagrees with [:punct:] outside ASCII'); my @isPunctLatin1 = eval q { |