diff options
56 files changed, 9089 insertions, 2861 deletions
diff --git a/l1_char_class_tab.h b/l1_char_class_tab.h index ce8d53384d..4d2612bb9d 100644 --- a/l1_char_class_tab.h +++ b/l1_char_class_tab.h @@ -172,7 +172,7 @@ /* U+A4 CURRENCY SIGN */ _CC_GRAPH_L1|_CC_PRINT_L1, /* U+A5 YEN SIGN */ _CC_GRAPH_L1|_CC_PRINT_L1, /* U+A6 BROKEN BAR */ _CC_GRAPH_L1|_CC_PRINT_L1, -/* U+A7 SECTION SIGN */ _CC_GRAPH_L1|_CC_PRINT_L1, +/* U+A7 SECTION SIGN */ _CC_GRAPH_L1|_CC_PRINT_L1|_CC_PUNCT_L1, /* U+A8 DIAERESIS */ _CC_GRAPH_L1|_CC_PRINT_L1, /* U+A9 COPYRIGHT SIGN */ _CC_GRAPH_L1|_CC_PRINT_L1, /* U+AA FEMININE ORDINAL INDICATOR */ _CC_ALNUMC_L1|_CC_ALPHA_L1|_CC_CHARNAME_CONT|_CC_GRAPH_L1|_CC_IDFIRST_L1|_CC_LOWER_L1|_CC_PRINT_L1|_CC_WORDCHAR_L1, @@ -187,7 +187,7 @@ /* U+B3 SUPERSCRIPT THREE */ _CC_GRAPH_L1|_CC_PRINT_L1, /* U+B4 ACUTE ACCENT */ _CC_GRAPH_L1|_CC_PRINT_L1, /* U+B5 MICRO SIGN */ _CC_NONLATIN1_FOLD|_CC_ALNUMC_L1|_CC_ALPHA_L1|_CC_CHARNAME_CONT|_CC_GRAPH_L1|_CC_IDFIRST_L1|_CC_LOWER_L1|_CC_PRINT_L1|_CC_WORDCHAR_L1, -/* U+B6 PILCROW SIGN */ _CC_GRAPH_L1|_CC_PRINT_L1, +/* U+B6 PILCROW SIGN */ _CC_GRAPH_L1|_CC_PRINT_L1|_CC_PUNCT_L1, /* U+B7 MIDDLE DOT */ _CC_GRAPH_L1|_CC_PRINT_L1|_CC_PUNCT_L1, /* U+B8 CEDILLA */ _CC_GRAPH_L1|_CC_PRINT_L1, /* U+B9 SUPERSCRIPT ONE */ _CC_GRAPH_L1|_CC_PRINT_L1, diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm index 074284f5fb..a1f16a99ff 100644 --- a/lib/Unicode/UCD.pm +++ b/lib/Unicode/UCD.pm @@ -2252,20 +2252,56 @@ Devanagari, Gurmukhi, and Oriya scripts. The Name_Alias property is of this form. But each scalar consists of two components: 1) the name, and 2) the type of alias this is. They are -separated by a colon and a space. In Unicode 6.0, there are two alias types: -C<"correction">, which indicates that the name is a corrected form for the -original name (which remains valid) for the same code point; and C<"control">, -which adds a new name for a control character. +separated by a colon and a space. In Unicode 6.1, there are several alias types: + +=over + +=item C<correction> + +indicates that the name is a corrected form for the +original name (which remains valid) for the same code point. + +=item C<control> + +adds a new name for a control character. + +=item C<alternate> + +is an alternate name for a character + +=item C<figment> + +is a name for a character that has been documented but was never in any +actual standard. + +=item C<abbreviation> + +is a common abbreviation for a character + +=back + +The lists are ordered (roughly) so the most preferred names come before less +preferred ones. For example, - @aliases_ranges @alias_maps + @aliases_ranges @alias_maps + ... + 0x009E [ 'PRIVACY MESSAGE: control', 'PM: abbreviation' ] + 0x009F [ 'APPLICATION PROGRAM COMMAND: control', + 'APC: abbreviation' + ] + 0x00A0 'NBSP: abbreviation' + 0x00A1 "" + 0x00AD 'SHY: abbreviation' + 0x00AE "" + 0x01A2 'LATIN CAPITAL LETTER GHA: correction' + 0x01A3 'LATIN SMALL LETTER GHA: correction' + 0x01A4 "" ... - 0x01A2 LATIN CAPITAL LETTER GHA: correction - 0x01A3 LATIN SMALL LETTER GHA: correction -Unicode 6.1 will introduce other types, and some map entries will be lists of -multiple name-alias pairs for a single code point. +A map to the empty string means that there is no alias defined for the code +point. =item C<r> @@ -2409,7 +2445,9 @@ the function L<charnames/charnames::viacode(code)>. Note that for control characters (C<Gc=cc>), Unicode's data files have the string "C<E<lt>controlE<gt>>", but the real name of each of these characters is the empty -string. This function returns that real name, the empty string. +string. This function returns that real name, the empty string. (There are +names for these characters, but they are aliases, not the real name, and are +contained in the C<Name_Alias> property.) =item C<d> @@ -3179,6 +3217,9 @@ To convert from new-style to old-style, follow this recipe: gets the lower end of the range (0th element) and then looks up the old name for its block using C<charblock>). +Note that starting in Unicode 6.1, many of the block names have shorter +synonyms. These are always given in the new style. + =head1 BUGS Does not yet support EBCDIC platforms. diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t index b92dd51e4b..0178eba3af 100644 --- a/lib/Unicode/UCD.t +++ b/lib/Unicode/UCD.t @@ -342,7 +342,7 @@ is($bt->{AL}, 'Right-to-Left Arabic', 'AL is Right-to-Left Arabic'); # If this fails, then maybe one should look at the Unicode changes to see # what else might need to be updated. -is(Unicode::UCD::UnicodeVersion, '6.0.0', 'UnicodeVersion'); +is(Unicode::UCD::UnicodeVersion, '6.1.0', 'UnicodeVersion'); use Unicode::UCD qw(compexcl); @@ -470,7 +470,7 @@ is(Unicode::UCD::_getcode('U+123x'), undef, "_getcode(x123)"); { my $r1 = charscript('Latin'); my $n1 = @$r1; - is($n1, 30, "number of ranges in Latin script (Unicode 6.0.0)"); + is($n1, 30, "number of ranges in Latin script (Unicode 6.1.0)"); shift @$r1 while @$r1; my $r2 = charscript('Latin'); is(@$r2, $n1, "modifying results should not mess up internal caches"); diff --git a/lib/_charnames.pm b/lib/_charnames.pm index 5f64ebf8d3..02dbef056c 100644 --- a/lib/_charnames.pm +++ b/lib/_charnames.pm @@ -65,432 +65,26 @@ $Carp::Internal{ (__PACKAGE__) } = 1; # it alone, but since that is harder for a human to parse, I left it as-is. my %system_aliases = ( - # Synonyms for the icky 3.2 names that have parentheses. - 'LINE FEED' => pack("U", 0x0A), # LINE FEED (LF) - 'FORM FEED' => pack("U", 0x0C), # FORM FEED (FF) - 'CARRIAGE RETURN' => pack("U", 0x0D), # CARRIAGE RETURN (CR) - 'NEXT LINE' => pack("U", 0x85), # NEXT LINE (NEL) - # Some variant names from Wikipedia 'SINGLE-SHIFT 2' => pack("U", 0x8E), 'SINGLE-SHIFT 3' => pack("U", 0x8F), 'PRIVATE USE 1' => pack("U", 0x91), 'PRIVATE USE 2' => pack("U", 0x92), - 'START OF PROTECTED AREA' => pack("U", 0x96), - 'END OF PROTECTED AREA' => pack("U", 0x97), - - # Convenience. Standard abbreviations for the controls - 'NUL' => pack("U", 0x00), # NULL - 'SOH' => pack("U", 0x01), # START OF HEADING - 'STX' => pack("U", 0x02), # START OF TEXT - 'ETX' => pack("U", 0x03), # END OF TEXT - 'EOT' => pack("U", 0x04), # END OF TRANSMISSION - 'ENQ' => pack("U", 0x05), # ENQUIRY - 'ACK' => pack("U", 0x06), # ACKNOWLEDGE - 'BEL' => pack("U", 0x07), # ALERT; formerly BELL - 'BS' => pack("U", 0x08), # BACKSPACE - 'HT' => pack("U", 0x09), # HORIZONTAL TABULATION - 'LF' => pack("U", 0x0A), # LINE FEED (LF) - 'VT' => pack("U", 0x0B), # VERTICAL TABULATION - 'FF' => pack("U", 0x0C), # FORM FEED (FF) - 'CR' => pack("U", 0x0D), # CARRIAGE RETURN (CR) - 'SO' => pack("U", 0x0E), # SHIFT OUT - 'SI' => pack("U", 0x0F), # SHIFT IN - 'DLE' => pack("U", 0x10), # DATA LINK ESCAPE - 'DC1' => pack("U", 0x11), # DEVICE CONTROL ONE - 'DC2' => pack("U", 0x12), # DEVICE CONTROL TWO - 'DC3' => pack("U", 0x13), # DEVICE CONTROL THREE - 'DC4' => pack("U", 0x14), # DEVICE CONTROL FOUR - 'NAK' => pack("U", 0x15), # NEGATIVE ACKNOWLEDGE - 'SYN' => pack("U", 0x16), # SYNCHRONOUS IDLE - 'ETB' => pack("U", 0x17), # END OF TRANSMISSION BLOCK - 'CAN' => pack("U", 0x18), # CANCEL - 'EOM' => pack("U", 0x19), # END OF MEDIUM - 'SUB' => pack("U", 0x1A), # SUBSTITUTE - 'ESC' => pack("U", 0x1B), # ESCAPE - 'FS' => pack("U", 0x1C), # FILE SEPARATOR - 'GS' => pack("U", 0x1D), # GROUP SEPARATOR - 'RS' => pack("U", 0x1E), # RECORD SEPARATOR - 'US' => pack("U", 0x1F), # UNIT SEPARATOR - 'DEL' => pack("U", 0x7F), # DELETE - 'BPH' => pack("U", 0x82), # BREAK PERMITTED HERE - 'NBH' => pack("U", 0x83), # NO BREAK HERE - 'NEL' => pack("U", 0x85), # NEXT LINE (NEL) - 'SSA' => pack("U", 0x86), # START OF SELECTED AREA - 'ESA' => pack("U", 0x87), # END OF SELECTED AREA - 'HTS' => pack("U", 0x88), # CHARACTER TABULATION SET - 'HTJ' => pack("U", 0x89), # CHARACTER TABULATION WITH JUSTIFICATION - 'VTS' => pack("U", 0x8A), # LINE TABULATION SET - 'PLD' => pack("U", 0x8B), # PARTIAL LINE FORWARD - 'PLU' => pack("U", 0x8C), # PARTIAL LINE BACKWARD - 'RI' => pack("U", 0x8D), # REVERSE LINE FEED - 'SS2' => pack("U", 0x8E), # SINGLE SHIFT TWO - 'SS3' => pack("U", 0x8F), # SINGLE SHIFT THREE - 'DCS' => pack("U", 0x90), # DEVICE CONTROL STRING - 'PU1' => pack("U", 0x91), # PRIVATE USE ONE - 'PU2' => pack("U", 0x92), # PRIVATE USE TWO - 'STS' => pack("U", 0x93), # SET TRANSMIT STATE - 'CCH' => pack("U", 0x94), # CANCEL CHARACTER - 'MW' => pack("U", 0x95), # MESSAGE WAITING - 'SPA' => pack("U", 0x96), # START OF GUARDED AREA - 'EPA' => pack("U", 0x97), # END OF GUARDED AREA - 'SOS' => pack("U", 0x98), # START OF STRING - 'SCI' => pack("U", 0x9A), # SINGLE CHARACTER INTRODUCER - 'CSI' => pack("U", 0x9B), # CONTROL SEQUENCE INTRODUCER - 'ST' => pack("U", 0x9C), # STRING TERMINATOR - 'OSC' => pack("U", 0x9D), # OPERATING SYSTEM COMMAND - 'PM' => pack("U", 0x9E), # PRIVACY MESSAGE - 'APC' => pack("U", 0x9F), # APPLICATION PROGRAM COMMAND - - # There are no names for these in the Unicode standard; perhaps should be - # deprecated, but then again there are no alternative names, so am not - # deprecating. And if did, the code would have to change to not recommend - # an alternative for these. - 'PADDING CHARACTER' => pack("U", 0x80), - 'PAD' => pack("U", 0x80), - 'HIGH OCTET PRESET' => pack("U", 0x81), - 'HOP' => pack("U", 0x81), - 'INDEX' => pack("U", 0x84), - 'IND' => pack("U", 0x84), - 'SINGLE GRAPHIC CHARACTER INTRODUCER' => pack("U", 0x99), - 'SGC' => pack("U", 0x99), - - # More convenience. For further convenience, it is suggested some way of - # using the NamesList aliases be implemented, but there are ambiguities in - # NamesList.txt - 'BOM' => pack("U", 0xFEFF), # BYTE ORDER MARK - 'BYTE ORDER MARK'=> pack("U", 0xFEFF), - 'CGJ' => pack("U", 0x034F), # COMBINING GRAPHEME JOINER - 'FVS1' => pack("U", 0x180B), # MONGOLIAN FREE VARIATION SELECTOR ONE - 'FVS2' => pack("U", 0x180C), # MONGOLIAN FREE VARIATION SELECTOR TWO - 'FVS3' => pack("U", 0x180D), # MONGOLIAN FREE VARIATION SELECTOR THREE - 'LRE' => pack("U", 0x202A), # LEFT-TO-RIGHT EMBEDDING - 'LRM' => pack("U", 0x200E), # LEFT-TO-RIGHT MARK - 'LRO' => pack("U", 0x202D), # LEFT-TO-RIGHT OVERRIDE - 'MMSP' => pack("U", 0x205F), # MEDIUM MATHEMATICAL SPACE - 'MVS' => pack("U", 0x180E), # MONGOLIAN VOWEL SEPARATOR - 'NBSP' => pack("U", 0x00A0), # NO-BREAK SPACE - 'NNBSP' => pack("U", 0x202F), # NARROW NO-BREAK SPACE - 'PDF' => pack("U", 0x202C), # POP DIRECTIONAL FORMATTING - 'RLE' => pack("U", 0x202B), # RIGHT-TO-LEFT EMBEDDING - 'RLM' => pack("U", 0x200F), # RIGHT-TO-LEFT MARK - 'RLO' => pack("U", 0x202E), # RIGHT-TO-LEFT OVERRIDE - 'SHY' => pack("U", 0x00AD), # SOFT HYPHEN - 'VS1' => pack("U", 0xFE00), # VARIATION SELECTOR-1 - 'VS2' => pack("U", 0xFE01), # VARIATION SELECTOR-2 - 'VS3' => pack("U", 0xFE02), # VARIATION SELECTOR-3 - 'VS4' => pack("U", 0xFE03), # VARIATION SELECTOR-4 - 'VS5' => pack("U", 0xFE04), # VARIATION SELECTOR-5 - 'VS6' => pack("U", 0xFE05), # VARIATION SELECTOR-6 - 'VS7' => pack("U", 0xFE06), # VARIATION SELECTOR-7 - 'VS8' => pack("U", 0xFE07), # VARIATION SELECTOR-8 - 'VS9' => pack("U", 0xFE08), # VARIATION SELECTOR-9 - 'VS10' => pack("U", 0xFE09), # VARIATION SELECTOR-10 - 'VS11' => pack("U", 0xFE0A), # VARIATION SELECTOR-11 - 'VS12' => pack("U", 0xFE0B), # VARIATION SELECTOR-12 - 'VS13' => pack("U", 0xFE0C), # VARIATION SELECTOR-13 - 'VS14' => pack("U", 0xFE0D), # VARIATION SELECTOR-14 - 'VS15' => pack("U", 0xFE0E), # VARIATION SELECTOR-15 - 'VS16' => pack("U", 0xFE0F), # VARIATION SELECTOR-16 - 'VS17' => pack("U", 0xE0100), # VARIATION SELECTOR-17 - 'VS18' => pack("U", 0xE0101), # VARIATION SELECTOR-18 - 'VS19' => pack("U", 0xE0102), # VARIATION SELECTOR-19 - 'VS20' => pack("U", 0xE0103), # VARIATION SELECTOR-20 - 'VS21' => pack("U", 0xE0104), # VARIATION SELECTOR-21 - 'VS22' => pack("U", 0xE0105), # VARIATION SELECTOR-22 - 'VS23' => pack("U", 0xE0106), # VARIATION SELECTOR-23 - 'VS24' => pack("U", 0xE0107), # VARIATION SELECTOR-24 - 'VS25' => pack("U", 0xE0108), # VARIATION SELECTOR-25 - 'VS26' => pack("U", 0xE0109), # VARIATION SELECTOR-26 - 'VS27' => pack("U", 0xE010A), # VARIATION SELECTOR-27 - 'VS28' => pack("U", 0xE010B), # VARIATION SELECTOR-28 - 'VS29' => pack("U", 0xE010C), # VARIATION SELECTOR-29 - 'VS30' => pack("U", 0xE010D), # VARIATION SELECTOR-30 - 'VS31' => pack("U", 0xE010E), # VARIATION SELECTOR-31 - 'VS32' => pack("U", 0xE010F), # VARIATION SELECTOR-32 - 'VS33' => pack("U", 0xE0110), # VARIATION SELECTOR-33 - 'VS34' => pack("U", 0xE0111), # VARIATION SELECTOR-34 - 'VS35' => pack("U", 0xE0112), # VARIATION SELECTOR-35 - 'VS36' => pack("U", 0xE0113), # VARIATION SELECTOR-36 - 'VS37' => pack("U", 0xE0114), # VARIATION SELECTOR-37 - 'VS38' => pack("U", 0xE0115), # VARIATION SELECTOR-38 - 'VS39' => pack("U", 0xE0116), # VARIATION SELECTOR-39 - 'VS40' => pack("U", 0xE0117), # VARIATION SELECTOR-40 - 'VS41' => pack("U", 0xE0118), # VARIATION SELECTOR-41 - 'VS42' => pack("U", 0xE0119), # VARIATION SELECTOR-42 - 'VS43' => pack("U", 0xE011A), # VARIATION SELECTOR-43 - 'VS44' => pack("U", 0xE011B), # VARIATION SELECTOR-44 - 'VS45' => pack("U", 0xE011C), # VARIATION SELECTOR-45 - 'VS46' => pack("U", 0xE011D), # VARIATION SELECTOR-46 - 'VS47' => pack("U", 0xE011E), # VARIATION SELECTOR-47 - 'VS48' => pack("U", 0xE011F), # VARIATION SELECTOR-48 - 'VS49' => pack("U", 0xE0120), # VARIATION SELECTOR-49 - 'VS50' => pack("U", 0xE0121), # VARIATION SELECTOR-50 - 'VS51' => pack("U", 0xE0122), # VARIATION SELECTOR-51 - 'VS52' => pack("U", 0xE0123), # VARIATION SELECTOR-52 - 'VS53' => pack("U", 0xE0124), # VARIATION SELECTOR-53 - 'VS54' => pack("U", 0xE0125), # VARIATION SELECTOR-54 - 'VS55' => pack("U", 0xE0126), # VARIATION SELECTOR-55 - 'VS56' => pack("U", 0xE0127), # VARIATION SELECTOR-56 - 'VS57' => pack("U", 0xE0128), # VARIATION SELECTOR-57 - 'VS58' => pack("U", 0xE0129), # VARIATION SELECTOR-58 - 'VS59' => pack("U", 0xE012A), # VARIATION SELECTOR-59 - 'VS60' => pack("U", 0xE012B), # VARIATION SELECTOR-60 - 'VS61' => pack("U", 0xE012C), # VARIATION SELECTOR-61 - 'VS62' => pack("U", 0xE012D), # VARIATION SELECTOR-62 - 'VS63' => pack("U", 0xE012E), # VARIATION SELECTOR-63 - 'VS64' => pack("U", 0xE012F), # VARIATION SELECTOR-64 - 'VS65' => pack("U", 0xE0130), # VARIATION SELECTOR-65 - 'VS66' => pack("U", 0xE0131), # VARIATION SELECTOR-66 - 'VS67' => pack("U", 0xE0132), # VARIATION SELECTOR-67 - 'VS68' => pack("U", 0xE0133), # VARIATION SELECTOR-68 - 'VS69' => pack("U", 0xE0134), # VARIATION SELECTOR-69 - 'VS70' => pack("U", 0xE0135), # VARIATION SELECTOR-70 - 'VS71' => pack("U", 0xE0136), # VARIATION SELECTOR-71 - 'VS72' => pack("U", 0xE0137), # VARIATION SELECTOR-72 - 'VS73' => pack("U", 0xE0138), # VARIATION SELECTOR-73 - 'VS74' => pack("U", 0xE0139), # VARIATION SELECTOR-74 - 'VS75' => pack("U", 0xE013A), # VARIATION SELECTOR-75 - 'VS76' => pack("U", 0xE013B), # VARIATION SELECTOR-76 - 'VS77' => pack("U", 0xE013C), # VARIATION SELECTOR-77 - 'VS78' => pack("U", 0xE013D), # VARIATION SELECTOR-78 - 'VS79' => pack("U", 0xE013E), # VARIATION SELECTOR-79 - 'VS80' => pack("U", 0xE013F), # VARIATION SELECTOR-80 - 'VS81' => pack("U", 0xE0140), # VARIATION SELECTOR-81 - 'VS82' => pack("U", 0xE0141), # VARIATION SELECTOR-82 - 'VS83' => pack("U", 0xE0142), # VARIATION SELECTOR-83 - 'VS84' => pack("U", 0xE0143), # VARIATION SELECTOR-84 - 'VS85' => pack("U", 0xE0144), # VARIATION SELECTOR-85 - 'VS86' => pack("U", 0xE0145), # VARIATION SELECTOR-86 - 'VS87' => pack("U", 0xE0146), # VARIATION SELECTOR-87 - 'VS88' => pack("U", 0xE0147), # VARIATION SELECTOR-88 - 'VS89' => pack("U", 0xE0148), # VARIATION SELECTOR-89 - 'VS90' => pack("U", 0xE0149), # VARIATION SELECTOR-90 - 'VS91' => pack("U", 0xE014A), # VARIATION SELECTOR-91 - 'VS92' => pack("U", 0xE014B), # VARIATION SELECTOR-92 - 'VS93' => pack("U", 0xE014C), # VARIATION SELECTOR-93 - 'VS94' => pack("U", 0xE014D), # VARIATION SELECTOR-94 - 'VS95' => pack("U", 0xE014E), # VARIATION SELECTOR-95 - 'VS96' => pack("U", 0xE014F), # VARIATION SELECTOR-96 - 'VS97' => pack("U", 0xE0150), # VARIATION SELECTOR-97 - 'VS98' => pack("U", 0xE0151), # VARIATION SELECTOR-98 - 'VS99' => pack("U", 0xE0152), # VARIATION SELECTOR-99 - 'VS100' => pack("U", 0xE0153), # VARIATION SELECTOR-100 - 'VS101' => pack("U", 0xE0154), # VARIATION SELECTOR-101 - 'VS102' => pack("U", 0xE0155), # VARIATION SELECTOR-102 - 'VS103' => pack("U", 0xE0156), # VARIATION SELECTOR-103 - 'VS104' => pack("U", 0xE0157), # VARIATION SELECTOR-104 - 'VS105' => pack("U", 0xE0158), # VARIATION SELECTOR-105 - 'VS106' => pack("U", 0xE0159), # VARIATION SELECTOR-106 - 'VS107' => pack("U", 0xE015A), # VARIATION SELECTOR-107 - 'VS108' => pack("U", 0xE015B), # VARIATION SELECTOR-108 - 'VS109' => pack("U", 0xE015C), # VARIATION SELECTOR-109 - 'VS110' => pack("U", 0xE015D), # VARIATION SELECTOR-110 - 'VS111' => pack("U", 0xE015E), # VARIATION SELECTOR-111 - 'VS112' => pack("U", 0xE015F), # VARIATION SELECTOR-112 - 'VS113' => pack("U", 0xE0160), # VARIATION SELECTOR-113 - 'VS114' => pack("U", 0xE0161), # VARIATION SELECTOR-114 - 'VS115' => pack("U", 0xE0162), # VARIATION SELECTOR-115 - 'VS116' => pack("U", 0xE0163), # VARIATION SELECTOR-116 - 'VS117' => pack("U", 0xE0164), # VARIATION SELECTOR-117 - 'VS118' => pack("U", 0xE0165), # VARIATION SELECTOR-118 - 'VS119' => pack("U", 0xE0166), # VARIATION SELECTOR-119 - 'VS120' => pack("U", 0xE0167), # VARIATION SELECTOR-120 - 'VS121' => pack("U", 0xE0168), # VARIATION SELECTOR-121 - 'VS122' => pack("U", 0xE0169), # VARIATION SELECTOR-122 - 'VS123' => pack("U", 0xE016A), # VARIATION SELECTOR-123 - 'VS124' => pack("U", 0xE016B), # VARIATION SELECTOR-124 - 'VS125' => pack("U", 0xE016C), # VARIATION SELECTOR-125 - 'VS126' => pack("U", 0xE016D), # VARIATION SELECTOR-126 - 'VS127' => pack("U", 0xE016E), # VARIATION SELECTOR-127 - 'VS128' => pack("U", 0xE016F), # VARIATION SELECTOR-128 - 'VS129' => pack("U", 0xE0170), # VARIATION SELECTOR-129 - 'VS130' => pack("U", 0xE0171), # VARIATION SELECTOR-130 - 'VS131' => pack("U", 0xE0172), # VARIATION SELECTOR-131 - 'VS132' => pack("U", 0xE0173), # VARIATION SELECTOR-132 - 'VS133' => pack("U", 0xE0174), # VARIATION SELECTOR-133 - 'VS134' => pack("U", 0xE0175), # VARIATION SELECTOR-134 - 'VS135' => pack("U", 0xE0176), # VARIATION SELECTOR-135 - 'VS136' => pack("U", 0xE0177), # VARIATION SELECTOR-136 - 'VS137' => pack("U", 0xE0178), # VARIATION SELECTOR-137 - 'VS138' => pack("U", 0xE0179), # VARIATION SELECTOR-138 - 'VS139' => pack("U", 0xE017A), # VARIATION SELECTOR-139 - 'VS140' => pack("U", 0xE017B), # VARIATION SELECTOR-140 - 'VS141' => pack("U", 0xE017C), # VARIATION SELECTOR-141 - 'VS142' => pack("U", 0xE017D), # VARIATION SELECTOR-142 - 'VS143' => pack("U", 0xE017E), # VARIATION SELECTOR-143 - 'VS144' => pack("U", 0xE017F), # VARIATION SELECTOR-144 - 'VS145' => pack("U", 0xE0180), # VARIATION SELECTOR-145 - 'VS146' => pack("U", 0xE0181), # VARIATION SELECTOR-146 - 'VS147' => pack("U", 0xE0182), # VARIATION SELECTOR-147 - 'VS148' => pack("U", 0xE0183), # VARIATION SELECTOR-148 - 'VS149' => pack("U", 0xE0184), # VARIATION SELECTOR-149 - 'VS150' => pack("U", 0xE0185), # VARIATION SELECTOR-150 - 'VS151' => pack("U", 0xE0186), # VARIATION SELECTOR-151 - 'VS152' => pack("U", 0xE0187), # VARIATION SELECTOR-152 - 'VS153' => pack("U", 0xE0188), # VARIATION SELECTOR-153 - 'VS154' => pack("U", 0xE0189), # VARIATION SELECTOR-154 - 'VS155' => pack("U", 0xE018A), # VARIATION SELECTOR-155 - 'VS156' => pack("U", 0xE018B), # VARIATION SELECTOR-156 - 'VS157' => pack("U", 0xE018C), # VARIATION SELECTOR-157 - 'VS158' => pack("U", 0xE018D), # VARIATION SELECTOR-158 - 'VS159' => pack("U", 0xE018E), # VARIATION SELECTOR-159 - 'VS160' => pack("U", 0xE018F), # VARIATION SELECTOR-160 - 'VS161' => pack("U", 0xE0190), # VARIATION SELECTOR-161 - 'VS162' => pack("U", 0xE0191), # VARIATION SELECTOR-162 - 'VS163' => pack("U", 0xE0192), # VARIATION SELECTOR-163 - 'VS164' => pack("U", 0xE0193), # VARIATION SELECTOR-164 - 'VS165' => pack("U", 0xE0194), # VARIATION SELECTOR-165 - 'VS166' => pack("U", 0xE0195), # VARIATION SELECTOR-166 - 'VS167' => pack("U", 0xE0196), # VARIATION SELECTOR-167 - 'VS168' => pack("U", 0xE0197), # VARIATION SELECTOR-168 - 'VS169' => pack("U", 0xE0198), # VARIATION SELECTOR-169 - 'VS170' => pack("U", 0xE0199), # VARIATION SELECTOR-170 - 'VS171' => pack("U", 0xE019A), # VARIATION SELECTOR-171 - 'VS172' => pack("U", 0xE019B), # VARIATION SELECTOR-172 - 'VS173' => pack("U", 0xE019C), # VARIATION SELECTOR-173 - 'VS174' => pack("U", 0xE019D), # VARIATION SELECTOR-174 - 'VS175' => pack("U", 0xE019E), # VARIATION SELECTOR-175 - 'VS176' => pack("U", 0xE019F), # VARIATION SELECTOR-176 - 'VS177' => pack("U", 0xE01A0), # VARIATION SELECTOR-177 - 'VS178' => pack("U", 0xE01A1), # VARIATION SELECTOR-178 - 'VS179' => pack("U", 0xE01A2), # VARIATION SELECTOR-179 - 'VS180' => pack("U", 0xE01A3), # VARIATION SELECTOR-180 - 'VS181' => pack("U", 0xE01A4), # VARIATION SELECTOR-181 - 'VS182' => pack("U", 0xE01A5), # VARIATION SELECTOR-182 - 'VS183' => pack("U", 0xE01A6), # VARIATION SELECTOR-183 - 'VS184' => pack("U", 0xE01A7), # VARIATION SELECTOR-184 - 'VS185' => pack("U", 0xE01A8), # VARIATION SELECTOR-185 - 'VS186' => pack("U", 0xE01A9), # VARIATION SELECTOR-186 - 'VS187' => pack("U", 0xE01AA), # VARIATION SELECTOR-187 - 'VS188' => pack("U", 0xE01AB), # VARIATION SELECTOR-188 - 'VS189' => pack("U", 0xE01AC), # VARIATION SELECTOR-189 - 'VS190' => pack("U", 0xE01AD), # VARIATION SELECTOR-190 - 'VS191' => pack("U", 0xE01AE), # VARIATION SELECTOR-191 - 'VS192' => pack("U", 0xE01AF), # VARIATION SELECTOR-192 - 'VS193' => pack("U", 0xE01B0), # VARIATION SELECTOR-193 - 'VS194' => pack("U", 0xE01B1), # VARIATION SELECTOR-194 - 'VS195' => pack("U", 0xE01B2), # VARIATION SELECTOR-195 - 'VS196' => pack("U", 0xE01B3), # VARIATION SELECTOR-196 - 'VS197' => pack("U", 0xE01B4), # VARIATION SELECTOR-197 - 'VS198' => pack("U", 0xE01B5), # VARIATION SELECTOR-198 - 'VS199' => pack("U", 0xE01B6), # VARIATION SELECTOR-199 - 'VS200' => pack("U", 0xE01B7), # VARIATION SELECTOR-200 - 'VS201' => pack("U", 0xE01B8), # VARIATION SELECTOR-201 - 'VS202' => pack("U", 0xE01B9), # VARIATION SELECTOR-202 - 'VS203' => pack("U", 0xE01BA), # VARIATION SELECTOR-203 - 'VS204' => pack("U", 0xE01BB), # VARIATION SELECTOR-204 - 'VS205' => pack("U", 0xE01BC), # VARIATION SELECTOR-205 - 'VS206' => pack("U", 0xE01BD), # VARIATION SELECTOR-206 - 'VS207' => pack("U", 0xE01BE), # VARIATION SELECTOR-207 - 'VS208' => pack("U", 0xE01BF), # VARIATION SELECTOR-208 - 'VS209' => pack("U", 0xE01C0), # VARIATION SELECTOR-209 - 'VS210' => pack("U", 0xE01C1), # VARIATION SELECTOR-210 - 'VS211' => pack("U", 0xE01C2), # VARIATION SELECTOR-211 - 'VS212' => pack("U", 0xE01C3), # VARIATION SELECTOR-212 - 'VS213' => pack("U", 0xE01C4), # VARIATION SELECTOR-213 - 'VS214' => pack("U", 0xE01C5), # VARIATION SELECTOR-214 - 'VS215' => pack("U", 0xE01C6), # VARIATION SELECTOR-215 - 'VS216' => pack("U", 0xE01C7), # VARIATION SELECTOR-216 - 'VS217' => pack("U", 0xE01C8), # VARIATION SELECTOR-217 - 'VS218' => pack("U", 0xE01C9), # VARIATION SELECTOR-218 - 'VS219' => pack("U", 0xE01CA), # VARIATION SELECTOR-219 - 'VS220' => pack("U", 0xE01CB), # VARIATION SELECTOR-220 - 'VS221' => pack("U", 0xE01CC), # VARIATION SELECTOR-221 - 'VS222' => pack("U", 0xE01CD), # VARIATION SELECTOR-222 - 'VS223' => pack("U", 0xE01CE), # VARIATION SELECTOR-223 - 'VS224' => pack("U", 0xE01CF), # VARIATION SELECTOR-224 - 'VS225' => pack("U", 0xE01D0), # VARIATION SELECTOR-225 - 'VS226' => pack("U", 0xE01D1), # VARIATION SELECTOR-226 - 'VS227' => pack("U", 0xE01D2), # VARIATION SELECTOR-227 - 'VS228' => pack("U", 0xE01D3), # VARIATION SELECTOR-228 - 'VS229' => pack("U", 0xE01D4), # VARIATION SELECTOR-229 - 'VS230' => pack("U", 0xE01D5), # VARIATION SELECTOR-230 - 'VS231' => pack("U", 0xE01D6), # VARIATION SELECTOR-231 - 'VS232' => pack("U", 0xE01D7), # VARIATION SELECTOR-232 - 'VS233' => pack("U", 0xE01D8), # VARIATION SELECTOR-233 - 'VS234' => pack("U", 0xE01D9), # VARIATION SELECTOR-234 - 'VS235' => pack("U", 0xE01DA), # VARIATION SELECTOR-235 - 'VS236' => pack("U", 0xE01DB), # VARIATION SELECTOR-236 - 'VS237' => pack("U", 0xE01DC), # VARIATION SELECTOR-237 - 'VS238' => pack("U", 0xE01DD), # VARIATION SELECTOR-238 - 'VS239' => pack("U", 0xE01DE), # VARIATION SELECTOR-239 - 'VS240' => pack("U", 0xE01DF), # VARIATION SELECTOR-240 - 'VS241' => pack("U", 0xE01E0), # VARIATION SELECTOR-241 - 'VS242' => pack("U", 0xE01E1), # VARIATION SELECTOR-242 - 'VS243' => pack("U", 0xE01E2), # VARIATION SELECTOR-243 - 'VS244' => pack("U", 0xE01E3), # VARIATION SELECTOR-244 - 'VS245' => pack("U", 0xE01E4), # VARIATION SELECTOR-245 - 'VS246' => pack("U", 0xE01E5), # VARIATION SELECTOR-246 - 'VS247' => pack("U", 0xE01E6), # VARIATION SELECTOR-247 - 'VS248' => pack("U", 0xE01E7), # VARIATION SELECTOR-248 - 'VS249' => pack("U", 0xE01E8), # VARIATION SELECTOR-249 - 'VS250' => pack("U", 0xE01E9), # VARIATION SELECTOR-250 - 'VS251' => pack("U", 0xE01EA), # VARIATION SELECTOR-251 - 'VS252' => pack("U", 0xE01EB), # VARIATION SELECTOR-252 - 'VS253' => pack("U", 0xE01EC), # VARIATION SELECTOR-253 - 'VS254' => pack("U", 0xE01ED), # VARIATION SELECTOR-254 - 'VS255' => pack("U", 0xE01EE), # VARIATION SELECTOR-255 - 'VS256' => pack("U", 0xE01EF), # VARIATION SELECTOR-256 - 'WJ' => pack("U", 0x2060), # WORD JOINER - 'ZWJ' => pack("U", 0x200D), # ZERO WIDTH JOINER - 'ZWNJ' => pack("U", 0x200C), # ZERO WIDTH NON-JOINER - 'ZWSP' => pack("U", 0x200B), # ZERO WIDTH SPACE ); # These are the aliases above that differ under :loose and :full matching # because the :full versions have blanks or hyphens in them. -my %loose_system_aliases = ( - 'LINEFEED' => pack("U", 0x0A), - 'FORMFEED' => pack("U", 0x0C), - 'CARRIAGERETURN' => pack("U", 0x0D), - 'NEXTLINE' => pack("U", 0x85), - 'SINGLESHIFT2' => pack("U", 0x8E), - 'SINGLESHIFT3' => pack("U", 0x8F), - 'PRIVATEUSE1' => pack("U", 0x91), - 'PRIVATEUSE2' => pack("U", 0x92), - 'STARTOFPROTECTEDAREA' => pack("U", 0x96), - 'ENDOFPROTECTEDAREA' => pack("U", 0x97), - 'PADDINGCHARACTER' => pack("U", 0x80), - 'HIGHOCTETPRESET' => pack("U", 0x81), - 'SINGLEGRAPHICCHARACTERINTRODUCER' => pack("U", 0x99), - 'BYTEORDERMARK' => pack("U", 0xFEFF), -); +#my %loose_system_aliases = ( +#); my %deprecated_aliases = ( - # Pre-3.2 compatibility (only for the first 256 characters). # Use of these gives deprecated message. - 'HORIZONTAL TABULATION' => pack("U", 0x09), # CHARACTER TABULATION - 'VERTICAL TABULATION' => pack("U", 0x0B), # LINE TABULATION - 'FILE SEPARATOR' => pack("U", 0x1C), # INFORMATION SEPARATOR FOUR - 'GROUP SEPARATOR' => pack("U", 0x1D), # INFORMATION SEPARATOR THREE - 'RECORD SEPARATOR' => pack("U", 0x1E), # INFORMATION SEPARATOR TWO - 'UNIT SEPARATOR' => pack("U", 0x1F), # INFORMATION SEPARATOR ONE - 'HORIZONTAL TABULATION SET' => pack("U", 0x88), # CHARACTER TABULATION SET - 'HORIZONTAL TABULATION WITH JUSTIFICATION' => pack("U", 0x89), # CHARACTER TABULATION WITH JUSTIFICATION - 'PARTIAL LINE DOWN' => pack("U", 0x8B), # PARTIAL LINE FORWARD - 'PARTIAL LINE UP' => pack("U", 0x8C), # PARTIAL LINE BACKWARD - 'VERTICAL TABULATION SET' => pack("U", 0x8A), # LINE TABULATION SET - 'REVERSE INDEX' => pack("U", 0x8D), # REVERSE LINE FEED - # Unicode 6.0 co-opted this for U+1F514, so deprecate it for now. 'BELL' => pack("U", 0x07), ); -my %loose_deprecated_aliases = ( - 'HORIZONTALTABULATION' => pack("U", 0x09), - 'VERTICALTABULATION' => pack("U", 0x0B), - 'FILESEPARATOR' => pack("U", 0x1C), - 'GROUPSEPARATOR' => pack("U", 0x1D), - 'RECORDSEPARATOR' => pack("U", 0x1E), - 'UNITSEPARATOR' => pack("U", 0x1F), - 'HORIZONTALTABULATIONSET' => pack("U", 0x88), - 'HORIZONTALTABULATIONWITHJUSTIFICATION' => pack("U", 0x89), - 'PARTIALLINEDOWN' => pack("U", 0x8B), - 'PARTIALLINEUP' => pack("U", 0x8C), - 'VERTICALTABULATIONSET' => pack("U", 0x8A), - 'REVERSEINDEX' => pack("U", 0x8D), -); +#my %loose_deprecated_aliases = ( +#); # These are special cased in :loose matching, differing only in a medial # hyphen @@ -720,10 +314,13 @@ sub lookup_name ($$$) { if (exists $system_aliases{$lookup_name}) { $utf8 = $system_aliases{$lookup_name}; } - elsif ($loose && exists $loose_system_aliases{$lookup_name}) { - $utf8 = $loose_system_aliases{$lookup_name}; - } - elsif (exists $deprecated_aliases{$lookup_name}) { + # There are currently no entries in this hash, so don't waste time looking + # for them. But the code is retained for the unlikely possibility that + # some will be added in the future. +# elsif ($loose && exists $loose_system_aliases{$lookup_name}) { +# $utf8 = $loose_system_aliases{$lookup_name}; +# } + if (exists $deprecated_aliases{$lookup_name}) { require warnings; warnings::warnif('deprecated', "Unicode character name \"$name\" is deprecated, use \"" @@ -731,14 +328,17 @@ sub lookup_name ($$$) { . "\" instead"); $utf8 = $deprecated_aliases{$lookup_name}; } - elsif ($loose && exists $loose_deprecated_aliases{$lookup_name}) { - require warnings; - warnings::warnif('deprecated', - "Unicode character name \"$name\" is deprecated, use \"" - . viacode(ord $loose_deprecated_aliases{$lookup_name}) - . "\" instead"); - $utf8 = $loose_deprecated_aliases{$lookup_name}; - } + # There are currently no entries in this hash, so don't waste time looking + # for them. But the code is retained for the unlikely possibility that + # some will be added in the future. +# elsif ($loose && exists $loose_deprecated_aliases{$lookup_name}) { +# require warnings; +# warnings::warnif('deprecated', +# "Unicode character name \"$name\" is deprecated, use \"" +# . viacode(ord $loose_deprecated_aliases{$lookup_name}) +# . "\" instead"); +# $utf8 = $loose_deprecated_aliases{$lookup_name}; +# } } my @off; # Offsets into table of pattern match begin and end @@ -1099,6 +699,8 @@ sub viacode { return $viacode{$hex} if exists $viacode{$hex}; + my $return; + # If the code point is above the max in the table, there's no point # looking through it. Checking the length first is slightly faster if (length($hex) <= 5 || CORE::hex($hex) <= 0x10FFFF) { @@ -1119,20 +721,34 @@ sub viacode { # The name starts with the next character and goes up to the # next new-line. Using capturing parentheses above instead of # @+ more than doubles the execution time in Perl 5.13 - $viacode{$hex} = substr($txt, $+[0], index($txt, "\n", $+[0]) - $+[0]); - return $viacode{$hex}; + $return = substr($txt, $+[0], index($txt, "\n", $+[0]) - $+[0]); + + # If not one of these 4 code points, return what we've found. + if ($hex !~ / ^ 000 (?: 8[014] | 99 ) $ /x) { + $viacode{$hex} = $return; + return $return; + } + + # For backwards compatibility, we don't return the official name of + # the 4 code points if there are user-defined aliases for them -- so + # continue looking. } } # See if there is a user name for it, before giving up completely. # First get the scoped aliases, give up if have none. my $H_ref = (caller(1))[10]; - return if ! defined $H_ref - || ! exists $H_ref->{charnames_stringified_inverse_ords}; + return if ! defined $return + && (! defined $H_ref + || ! exists $H_ref->{charnames_stringified_inverse_ords}); my %code_point_aliases = split ',', $H_ref->{charnames_stringified_inverse_ords}; if (! exists $code_point_aliases{$hex}) { + + # If there is an official alias, and no user-defined one, return that + return $return if defined $return; + if (CORE::hex($hex) > 0x10FFFF) { carp "Unicode characters only allocated up to U+10FFFF (you asked for U+$hex)"; } diff --git a/lib/charnames.pm b/lib/charnames.pm index 534ed5cd0a..07c1b70cdf 100644 --- a/lib/charnames.pm +++ b/lib/charnames.pm @@ -211,13 +211,8 @@ use variables inside the C<\N{...}>. If you want similar run-time functionality, use L<charnames::string_vianame()|/charnames::string_vianame(I<name>)>. -For the C0 and C1 control characters (U+0000..U+001F, U+0080..U+009F) -there are no official Unicode names but you can use instead the ISO 6429 -names (LINE FEED, ESCAPE, and so forth, and their abbreviations, LF, -ESC, ...). In Unicode 3.2 (as of Perl 5.8) some naming changes took -place, and ISO 6429 was updated, see L</ALIASES>. Since Unicode 6.0, it -is deprecated to use C<BELL>. Instead use C<ALERT> (but C<BEL> will continue -to work). +Since Unicode 6.0, it is deprecated to use C<BELL>. Instead use C<ALERT> (but +C<BEL> will continue to work). If the input name is unknown, C<\N{NAME}> raises a warning and substitutes the Unicode REPLACEMENT CHARACTER (U+FFFD). @@ -258,104 +253,15 @@ string_vianame(), since C<\N{...}> look-ups are done at compile time. =head1 ALIASES -A few aliases have been defined for convenience; instead of having -to use the official names, - - LINE FEED (LF) - FORM FEED (FF) - CARRIAGE RETURN (CR) - NEXT LINE (NEL) - -(yes, with parentheses), one can use - - LINE FEED - FORM FEED - CARRIAGE RETURN - NEXT LINE - LF - FF - CR - NEL - -All the other standard abbreviations for the controls, such as C<ACK> for -C<ACKNOWLEDGE> also can be used. - -One can also use - - BYTE ORDER MARK - BOM - -and these abbreviations - - Abbreviation Full Name - - CGJ COMBINING GRAPHEME JOINER - FVS1 MONGOLIAN FREE VARIATION SELECTOR ONE - FVS2 MONGOLIAN FREE VARIATION SELECTOR TWO - FVS3 MONGOLIAN FREE VARIATION SELECTOR THREE - LRE LEFT-TO-RIGHT EMBEDDING - LRM LEFT-TO-RIGHT MARK - LRO LEFT-TO-RIGHT OVERRIDE - MMSP MEDIUM MATHEMATICAL SPACE - MVS MONGOLIAN VOWEL SEPARATOR - NBSP NO-BREAK SPACE - NNBSP NARROW NO-BREAK SPACE - PDF POP DIRECTIONAL FORMATTING - RLE RIGHT-TO-LEFT EMBEDDING - RLM RIGHT-TO-LEFT MARK - RLO RIGHT-TO-LEFT OVERRIDE - SHY SOFT HYPHEN - VS1 VARIATION SELECTOR-1 - . - . - . - VS256 VARIATION SELECTOR-256 - WJ WORD JOINER - ZWJ ZERO WIDTH JOINER - ZWNJ ZERO WIDTH NON-JOINER - ZWSP ZERO WIDTH SPACE - -For backward compatibility one can use the old names for -certain C0 and C1 controls - - old new - - FILE SEPARATOR INFORMATION SEPARATOR FOUR - GROUP SEPARATOR INFORMATION SEPARATOR THREE - HORIZONTAL TABULATION CHARACTER TABULATION - HORIZONTAL TABULATION SET CHARACTER TABULATION SET - HORIZONTAL TABULATION WITH JUSTIFICATION CHARACTER TABULATION - WITH JUSTIFICATION - PARTIAL LINE DOWN PARTIAL LINE FORWARD - PARTIAL LINE UP PARTIAL LINE BACKWARD - RECORD SEPARATOR INFORMATION SEPARATOR TWO - REVERSE INDEX REVERSE LINE FEED - UNIT SEPARATOR INFORMATION SEPARATOR ONE - VERTICAL TABULATION LINE TABULATION - VERTICAL TABULATION SET LINE TABULATION SET - -but the old names in addition to giving the character -will also give a warning about being deprecated. - -And finally, certain published variants are usable, including some for -controls that have no Unicode names: - - name character - - END OF PROTECTED AREA END OF GUARDED AREA, U+0097 - HIGH OCTET PRESET U+0081 - HOP U+0081 - IND U+0084 - INDEX U+0084 - PAD U+0080 - PADDING CHARACTER U+0080 - PRIVATE USE 1 PRIVATE USE ONE, U+0091 - PRIVATE USE 2 PRIVATE USE TWO, U+0092 - SGC U+0099 - SINGLE GRAPHIC CHARACTER INTRODUCER U+0099 - SINGLE-SHIFT 2 SINGLE SHIFT TWO, U+008E - SINGLE-SHIFT 3 SINGLE SHIFT THREE, U+008F - START OF PROTECTED AREA START OF GUARDED AREA, U+0096 +Starting in Unicode 6.1 and Perl v5.16, Unicode defines many abbreviations and +names that were formerly Perl extensions, and some additional ones that Perl +did not previously accept. The list is getting too long to reproduce here, +but you can get the complete list from the Unicode web site: +L<http://www.unicode.org/Public/UNIDATA/NameAliases.txt>. + +Earlier versions of Perl accepted almost all the 6.1 names. These were most +extensively documented in the v5.14 version of this pod: +L<http://perldoc.perl.org/5.14.0/charnames.html#ALIASES>. =head1 CUSTOM ALIASES @@ -434,8 +340,13 @@ prints "FOUR TEARDROP-SPOKED ASTERISK". The name returned is the official name for the code point, if available; otherwise your custom alias for it. This means that your alias will only be returned for code points that don't have an official -Unicode name (nor a Unicode version 1 name), such as private use code -points, and the 4 control characters U+0080, U+0081, U+0084, and U+0099. +Unicode name (nor alias) such as private use code points. +Until Unicode 6.1, the 4 control characters U+0080, U+0081, U+0084, and U+0099 +did not have names (actually, to be precise they still don't, but they do have +aliases, which for most purposes are indistiunguishable from true names). +To preserve backwards compatibility, any alias you define for these code +points will be returned by this function, in preference to the official alias. + If you define more than one name for the code point, it is indeterminate which one will be returned. diff --git a/lib/charnames.t b/lib/charnames.t index 4686b8127c..9d37daa58c 100644 --- a/lib/charnames.t +++ b/lib/charnames.t @@ -292,8 +292,8 @@ is("\N{BOM}", chr(0xFEFF), 'Verify "\N{BOM}" is correct'); is("\N{HORIZONTAL TABULATION}", "\t", 'Verify "\N{HORIZONTAL TABULATION}" eq "\t"'); - my $ok = grep { /"HORIZONTAL TABULATION" is deprecated.*"CHARACTER TABULATION"/ } @WARN; - ok($ok, '... and that gives deprecated warning'); + my $ok = ! grep { /"HORIZONTAL TABULATION" is deprecated.*"CHARACTER TABULATION"/ } @WARN; + ok($ok, '... and doesnt give deprecated warning'); # XXX These tests should be changed for 5.16, when we convert BELL to the # Unicode version. @@ -444,9 +444,13 @@ is(charnames::viacode("U+00000000000FEED"), "ARABIC LETTER WAW ISOLATED FORM", ' is("\N{VERTICAL TABULATION SET}", "\N{LINE TABULATION SET}", 'Verify "\N{VERTICAL TABULATION SET}" eq "\N{LINE TABULATION SET}"'); is("\N{REVERSE INDEX}", "\N{REVERSE LINE FEED}", 'Verify "\N{REVERSE INDEX}" eq "\N{REVERSE LINE FEED}"'); is("\N{SINGLE-SHIFT 2}", "\N{SINGLE SHIFT TWO}", 'Verify "\N{SINGLE-SHIFT 2}" eq "\N{SINGLE SHIFT TWO}"'); + is("\N{SINGLE-SHIFT-2}", "\N{SINGLE-SHIFT 2}", 'Verify "\N{SINGLE-SHIFT-2}" eq "\N{SINGLE SHIFT 2}"'); is("\N{SINGLE-SHIFT 3}", "\N{SINGLE SHIFT THREE}", 'Verify "\N{SINGLE-SHIFT 3}" eq "\N{SINGLE SHIFT THREE}"'); + is("\N{SINGLE-SHIFT-3}", "\N{SINGLE-SHIFT 3}", 'Verify "\N{SINGLE-SHIFT-3}" eq "\N{SINGLE SHIFT 3}"'); is("\N{PRIVATE USE 1}", "\N{PRIVATE USE ONE}", 'Verify "\N{PRIVATE USE 1}" eq "\N{PRIVATE USE ONE}"'); + is("\N{PRIVATE USE-1}", "\N{PRIVATE USE 1}", 'Verify "\N{PRIVATE USE-1}" eq "\N{PRIVATE USE 1}"'); is("\N{PRIVATE USE 2}", "\N{PRIVATE USE TWO}", 'Verify "\N{PRIVATE USE 2}" eq "\N{PRIVATE USE TWO}"'); + is("\N{PRIVATE USE-2}", "\N{PRIVATE USE 2}", 'Verify "\N{PRIVATE USE-2}" eq "\N{PRIVATE USE 2}"'); is("\N{START OF PROTECTED AREA}", "\N{START OF GUARDED AREA}", 'Verify "\N{START OF PROTECTED AREA}" eq "\N{START OF GUARDED AREA}"'); is("\N{END OF PROTECTED AREA}", "\N{END OF GUARDED AREA}", 'Verify "\N{END OF PROTECTED AREA}" eq "\N{END OF GUARDED AREA}"'); is("\N{VS1}", "\N{VARIATION SELECTOR-1}", 'Verify "\N{VS1}" eq "\N{VARIATION SELECTOR-1}"'); @@ -1056,7 +1060,7 @@ is("\N{U+1D0C5}", "\N{BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS}", 'V chomp; s/^\s*#.*//; next unless $_; - my ($hex, $name) = split ";"; + my ($hex, $name, $type) = split ";"; my $i = CORE::hex $hex; # Make sure that both aliases (the one in UnicodeData, and the one we @@ -1070,7 +1074,7 @@ is("\N{U+1D0C5}", "\N{BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS}", 'V # aliases for the same code point, and viacode should return only the # final one. So don't do it here; instead rely on the loop below to # pick up the test. - $names[$i] = $name; + $names[$i] = $name if $type eq 'correction'; } close $fh; @@ -1131,6 +1135,11 @@ is("\N{U+1D0C5}", "\N{BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS}", 'V my $hex = sprintf("%04X", $i); if (! $names[$i]) { + # These four code points now have names, from NameAlias, but + # aren't listed as having names in UnicodeData.txt, so viacode + # returns their alias names, not undef + next if $i == 0x80 || $i == 0x81 || $i == 0x84 || $i == 0x99; + # If there is no name for this code point, all we can # test is that. $all_pass &= ok(! defined charnames::viacode($i), "Verify viacode(0x$hex) is undefined"); diff --git a/lib/unicore/ArabicShaping.txt b/lib/unicore/ArabicShaping.txt index bab6a11f9a..35e79f6e1f 100644 --- a/lib/unicore/ArabicShaping.txt +++ b/lib/unicore/ArabicShaping.txt @@ -1,27 +1,30 @@ -# ArabicShaping-6.0.0.txt -# Date: 2010-04-30, 13:47:00 PDT [KW] +# ArabicShaping-6.1.0.txt +# Date: 2011-04-15, 23:16:00 GMT [KW] # # This file is a normative contributory data file in the # Unicode Character Database. # -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # -# This file defines the shaping classes for Arabic, Syriac, and N'Ko +# This file defines the Joining_Type and Joining_Group +# property values for Arabic, Syriac, N'Ko, and Mandaic # positional shaping, repeating in machine readable form the -# information exemplified in Tables 8-3, 8-7, 8-8, 8-11, 8-12, -# 8-13, and 13-5 of The Unicode Standard, Version 6.0. +# information exemplified in Tables 8-3, 8-8, 8-9, 8-10, 8-13, 8-14, +# 8-15, 13-5, 14-5, and 14-6 of The Unicode Standard, Version 6.1. # -# See sections 8.2, 8.3, and 13.5 of The Unicode Standard, Version 6.0 -# for more information. +# See sections 8.2, 8.3, 13.5, and 14.12 of The Unicode Standard, +# Version 6.1 for more information. # # Each line contains four fields, separated by a semicolon. # # Field 0: the code point, in 4-digit hexadecimal -# form, of an Arabic, Syriac, or N'Ko character. +# form, of an Arabic, Syriac, N'Ko, or Mandaic character. # -# Field 1: gives a short schematic name for that character, -# abbreviated from the normative Unicode character name. +# Field 1: gives a short schematic name for that character. +# The schematic name is descriptive of the shape, based as +# consistently as possible on a name for the skeleton and +# then the diacritic marks applied to the skeleton, if any. # Note that this schematic name is considered a comment, # and does not constitute a formal property value. # @@ -65,7 +68,7 @@ # to jg=No_Joining_Group in this data file. Other, more specific # joining group values will be defined only if an explicit proposal # to define those values exactly has been approved by the UTC. This -# is the convention exemplified by the N'Ko script. Only the Arabic +# is the convention exemplified by the N'Ko and Mandaic scripts. Only the Arabic # and Syriac scripts currently have explicit joining group values defined. # # Note: Code points that are not explicitly listed in this file are @@ -84,44 +87,45 @@ # Unicode; Schematic Name; Joining Type; Joining Group -# Arabic characters +# Arabic Characters 0600; ARABIC NUMBER SIGN; U; No_Joining_Group 0601; ARABIC SIGN SANAH; U; No_Joining_Group 0602; ARABIC FOOTNOTE MARKER; U; No_Joining_Group 0603; ARABIC SIGN SAFHA; U; No_Joining_Group +0604; ARABIC SIGN SAMVAT; U; No_Joining_Group 0608; ARABIC RAY; U; No_Joining_Group 060B; AFGHANI SIGN; U; No_Joining_Group -0620; YEH WITH RING; D; YEH +0620; DOTLESS YEH WITH SEPARATE RING BELOW; D; YEH 0621; HAMZA; U; No_Joining_Group -0622; MADDA ON ALEF; R; ALEF -0623; HAMZA ON ALEF; R; ALEF -0624; HAMZA ON WAW; R; WAW -0625; HAMZA UNDER ALEF; R; ALEF -0626; HAMZA ON YEH; D; YEH +0622; ALEF WITH MADDA ABOVE; R; ALEF +0623; ALEF WITH HAMZA ABOVE; R; ALEF +0624; WAW WITH HAMZA ABOVE; R; WAW +0625; ALEF WITH HAMZA BELOW; R; ALEF +0626; DOTLESS YEH WITH HAMZA ABOVE; D; YEH 0627; ALEF; R; ALEF 0628; BEH; D; BEH 0629; TEH MARBUTA; R; TEH MARBUTA -062A; TEH; D; BEH -062B; THEH; D; BEH -062C; JEEM; D; HAH +062A; DOTLESS BEH WITH 2 DOTS ABOVE; D; BEH +062B; DOTLESS BEH WITH 3 DOTS ABOVE; D; BEH +062C; HAH WITH DOT BELOW; D; HAH 062D; HAH; D; HAH -062E; KHAH; D; HAH +062E; HAH WITH DOT ABOVE; D; HAH 062F; DAL; R; DAL -0630; THAL; R; DAL +0630; DAL WITH DOT ABOVE; R; DAL 0631; REH; R; REH -0632; ZAIN; R; REH +0632; REH WITH DOT ABOVE; R; REH 0633; SEEN; D; SEEN -0634; SHEEN; D; SEEN +0634; SEEN WITH 3 DOTS ABOVE; D; SEEN 0635; SAD; D; SAD -0636; DAD; D; SAD +0636; SAD WITH DOT ABOVE; D; SAD 0637; TAH; D; TAH -0638; ZAH; D; TAH +0638; TAH WITH DOT ABOVE; D; TAH 0639; AIN; D; AIN -063A; GHAIN; D; AIN +063A; AIN WITH DOT ABOVE; D; AIN 063B; KEHEH WITH 2 DOTS ABOVE; D; GAF -063C; KEHEH WITH 3 DOTS BELOW; D; GAF -063D; FARSI YEH WITH INVERTED V; D; FARSI YEH +063C; KEHEH WITH 3 DOTS BELOW; D; GAF +063D; FARSI YEH WITH INVERTED V ABOVE; D; FARSI YEH 063E; FARSI YEH WITH 2 DOTS ABOVE; D; FARSI YEH 063F; FARSI YEH WITH 3 DOTS ABOVE; D; FARSI YEH 0640; TATWEEL; C; No_Joining_Group @@ -133,48 +137,48 @@ 0646; NOON; D; NOON 0647; HEH; D; HEH 0648; WAW; R; WAW -0649; ALEF MAKSURA; D; YEH +0649; DOTLESS YEH; D; YEH 064A; YEH; D; YEH 066E; DOTLESS BEH; D; BEH 066F; DOTLESS QAF; D; QAF -0671; HAMZAT WASL ON ALEF; R; ALEF -0672; WAVY HAMZA ON ALEF; R; ALEF -0673; WAVY HAMZA UNDER ALEF; R; ALEF +0671; ALEF WITH WASLA ABOVE; R; ALEF +0672; ALEF WITH WAVY HAMZA ABOVE; R; ALEF +0673; ALEF WITH WAVY HAMZA BELOW; R; ALEF 0674; HIGH HAMZA; U; No_Joining_Group 0675; HIGH HAMZA ALEF; R; ALEF 0676; HIGH HAMZA WAW; R; WAW -0677; HIGH HAMZA WAW WITH DAMMA; R; WAW -0678; HIGH HAMZA YEH; D; YEH -0679; TEH WITH SMALL TAH; D; BEH -067A; TEH WITH 2 DOTS VERTICAL ABOVE; D; BEH -067B; BEH WITH 2 DOTS VERTICAL BELOW; D; BEH -067C; TEH WITH RING; D; BEH -067D; TEH WITH 3 DOTS ABOVE DOWNWARD; D; BEH -067E; TEH WITH 3 DOTS BELOW; D; BEH -067F; TEH WITH 4 DOTS ABOVE; D; BEH -0680; BEH WITH 4 DOTS BELOW; D; BEH -0681; HAMZA ON HAH; D; HAH -0682; HAH WITH 2 DOTS VERTICAL ABOVE; D; HAH -0683; HAH WITH MIDDLE 2 DOTS; D; HAH -0684; HAH WITH MIDDLE 2 DOTS VERTICAL; D; HAH +0677; HIGH HAMZA WAW WITH DAMMA ABOVE; R; WAW +0678; HIGH HAMZA DOTLESS YEH; D; YEH +0679; DOTLESS BEH WITH TAH ABOVE; D; BEH +067A; DOTLESS BEH WITH VERTICAL 2 DOTS ABOVE; D; BEH +067B; DOTLESS BEH WITH VERTICAL 2 DOTS BELOW; D; BEH +067C; DOTLESS BEH WITH ATTACHED RING BELOW AND 2 DOTS ABOVE; D; BEH +067D; DOTLESS BEH WITH INVERTED 3 DOTS ABOVE; D; BEH +067E; DOTLESS BEH WITH 3 DOTS BELOW; D; BEH +067F; DOTLESS BEH WITH 4 DOTS ABOVE; D; BEH +0680; DOTLESS BEH WITH 4 DOTS BELOW; D; BEH +0681; HAH WITH HAMZA ABOVE; D; HAH +0682; HAH WITH VERTICAL 2 DOTS ABOVE; D; HAH +0683; HAH WITH 2 DOTS BELOW; D; HAH +0684; HAH WITH VERTICAL 2 DOTS BELOW; D; HAH 0685; HAH WITH 3 DOTS ABOVE; D; HAH -0686; HAH WITH MIDDLE 3 DOTS DOWNWARD; D; HAH -0687; HAH WITH MIDDLE 4 DOTS; D; HAH -0688; DAL WITH SMALL TAH; R; DAL -0689; DAL WITH RING; R; DAL +0686; HAH WITH 3 DOTS BELOW; D; HAH +0687; HAH WITH 4 DOTS BELOW; D; HAH +0688; DAL WITH TAH ABOVE; R; DAL +0689; DAL WITH ATTACHED RING BELOW; R; DAL 068A; DAL WITH DOT BELOW; R; DAL -068B; DAL WITH DOT BELOW AND SMALL TAH; R; DAL +068B; DAL WITH DOT BELOW AND TAH ABOVE; R; DAL 068C; DAL WITH 2 DOTS ABOVE; R; DAL 068D; DAL WITH 2 DOTS BELOW; R; DAL 068E; DAL WITH 3 DOTS ABOVE; R; DAL -068F; DAL WITH 3 DOTS ABOVE DOWNWARD; R; DAL +068F; DAL WITH INVERTED 3 DOTS ABOVE; R; DAL 0690; DAL WITH 4 DOTS ABOVE; R; DAL -0691; REH WITH SMALL TAH; R; REH -0692; REH WITH SMALL V; R; REH -0693; REH WITH RING; R; REH +0691; REH WITH TAH ABOVE; R; REH +0692; REH WITH V ABOVE; R; REH +0693; REH WITH ATTACHED RING BELOW; R; REH 0694; REH WITH DOT BELOW; R; REH -0695; REH WITH SMALL V BELOW; R; REH -0696; REH WITH DOT BELOW AND DOT ABOVE; R; REH +0695; REH WITH V BELOW; R; REH +0696; REH WITH DOT BELOW AND DOT WITHIN; R; REH 0697; REH WITH 2 DOTS ABOVE; R; REH 0698; REH WITH 3 DOTS ABOVE; R; REH 0699; REH WITH 4 DOTS ABOVE; R; REH @@ -186,66 +190,66 @@ 069F; TAH WITH 3 DOTS ABOVE; D; TAH 06A0; AIN WITH 3 DOTS ABOVE; D; AIN 06A1; DOTLESS FEH; D; FEH -06A2; FEH WITH DOT MOVED BELOW; D; FEH +06A2; DOTLESS FEH WITH DOT BELOW; D; FEH 06A3; FEH WITH DOT BELOW; D; FEH -06A4; FEH WITH 3 DOTS ABOVE; D; FEH -06A5; FEH WITH 3 DOTS BELOW; D; FEH -06A6; FEH WITH 4 DOTS ABOVE; D; FEH -06A7; QAF WITH DOT ABOVE; D; QAF -06A8; QAF WITH 3 DOTS ABOVE; D; QAF +06A4; DOTLESS FEH WITH 3 DOTS ABOVE; D; FEH +06A5; DOTLESS FEH WITH 3 DOTS BELOW; D; FEH +06A6; DOTLESS FEH WITH 4 DOTS ABOVE; D; FEH +06A7; DOTLESS QAF WITH DOT ABOVE; D; QAF +06A8; DOTLESS QAF WITH 3 DOTS ABOVE; D; QAF 06A9; KEHEH; D; GAF 06AA; SWASH KAF; D; SWASH KAF -06AB; KAF WITH RING; D; GAF +06AB; KEHEH WITH ATTACHED RING BELOW; D; GAF 06AC; KAF WITH DOT ABOVE; D; KAF 06AD; KAF WITH 3 DOTS ABOVE; D; KAF 06AE; KAF WITH 3 DOTS BELOW; D; KAF 06AF; GAF; D; GAF -06B0; GAF WITH RING; D; GAF +06B0; GAF WITH ATTACHED RING BELOW; D; GAF 06B1; GAF WITH 2 DOTS ABOVE; D; GAF 06B2; GAF WITH 2 DOTS BELOW; D; GAF -06B3; GAF WITH 2 DOTS VERTICAL BELOW; D; GAF +06B3; GAF WITH VERTICAL 2 DOTS BELOW; D; GAF 06B4; GAF WITH 3 DOTS ABOVE; D; GAF -06B5; LAM WITH SMALL V; D; LAM +06B5; LAM WITH V ABOVE; D; LAM 06B6; LAM WITH DOT ABOVE; D; LAM 06B7; LAM WITH 3 DOTS ABOVE; D; LAM 06B8; LAM WITH 3 DOTS BELOW; D; LAM 06B9; NOON WITH DOT BELOW; D; NOON 06BA; DOTLESS NOON; D; NOON -06BB; DOTLESS NOON WITH SMALL TAH; D; NOON -06BC; NOON WITH RING; D; NOON +06BB; DOTLESS NOON WITH TAH ABOVE; D; NOON +06BC; NOON WITH ATTACHED RING BELOW; D; NOON 06BD; NYA; D; NYA 06BE; KNOTTED HEH; D; KNOTTED HEH -06BF; HAH WITH MIDDLE 3 DOTS DOWNWARD AND DOT ABOVE; D; HAH -06C0; HAMZA ON HEH; R; TEH MARBUTA +06BF; HAH WITH 3 DOTS BELOW AND DOT ABOVE; D; HAH +06C0; DOTLESS TEH MARBUTA WITH HAMZA ABOVE; R; TEH MARBUTA 06C1; HEH GOAL; D; HEH GOAL -06C2; HAMZA ON HEH GOAL; D; HEH GOAL +06C2; HEH GOAL WITH HAMZA ABOVE; D; HEH GOAL 06C3; TEH MARBUTA GOAL; R; TEH MARBUTA GOAL -06C4; WAW WITH RING; R; WAW +06C4; WAW WITH ATTACHED RING WITHIN; R; WAW 06C5; WAW WITH BAR; R; WAW -06C6; WAW WITH SMALL V; R; WAW -06C7; WAW WITH DAMMA; R; WAW +06C6; WAW WITH V ABOVE; R; WAW +06C7; WAW WITH DAMMA ABOVE; R; WAW 06C8; WAW WITH ALEF ABOVE; R; WAW -06C9; WAW WITH INVERTED SMALL V; R; WAW +06C9; WAW WITH INVERTED V ABOVE; R; WAW 06CA; WAW WITH 2 DOTS ABOVE; R; WAW 06CB; WAW WITH 3 DOTS ABOVE; R; WAW 06CC; FARSI YEH; D; FARSI YEH 06CD; YEH WITH TAIL; R; YEH WITH TAIL -06CE; FARSI YEH WITH SMALL V; D; FARSI YEH +06CE; FARSI YEH WITH V ABOVE; D; FARSI YEH 06CF; WAW WITH DOT ABOVE; R; WAW -06D0; YEH WITH 2 DOTS VERTICAL BELOW; D; YEH -06D1; YEH WITH 3 DOTS BELOW; D; YEH +06D0; DOTLESS YEH WITH VERTICAL 2 DOTS BELOW; D; YEH +06D1; DOTLESS YEH WITH 3 DOTS BELOW; D; YEH 06D2; YEH BARREE; R; YEH BARREE -06D3; HAMZA ON YEH BARREE; R; YEH BARREE -06D5; AE; R; TEH MARBUTA +06D3; YEH BARREE WITH HAMZA ABOVE; R; YEH BARREE +06D5; DOTLESS TEH MARBUTA; R; TEH MARBUTA 06DD; ARABIC END OF AYAH; U; No_Joining_Group -06EE; DAL WITH INVERTED V; R; DAL -06EF; REH WITH INVERTED V; R; REH +06EE; DAL WITH INVERTED V ABOVE; R; DAL +06EF; REH WITH INVERTED V ABOVE; R; REH 06FA; SEEN WITH DOT BELOW AND 3 DOTS ABOVE; D; SEEN -06FB; DAD WITH DOT BELOW; D; SAD -06FC; GHAIN WITH DOT BELOW; D; AIN -06FF; HEH WITH INVERTED V; D; KNOTTED HEH +06FB; SAD WITH DOT BELOW AND DOT ABOVE; D; SAD +06FC; AIN WITH DOT BELOW AND DOT ABOVE; D; AIN +06FF; KNOTTED HEH WITH INVERTED V ABOVE; D; KNOTTED HEH -# Syriac characters +# Syriac Characters 0710; ALAPH; R; ALAPH 0712; BETH; D; BETH @@ -282,55 +286,55 @@ 074E; SOGDIAN KHAPH; D; KHAPH 074F; SOGDIAN FE; D; FE -# Arabic supplement characters +# Arabic Supplement Characters -0750; BEH WITH 3 DOTS HORIZONTALLY BELOW; D; BEH -0751; BEH WITH DOT BELOW AND 3 DOTS ABOVE; D; BEH -0752; BEH WITH 3 DOTS POINTING UPWARDS BELOW; D; BEH -0753; BEH WITH 3 DOTS POINTING UPWARDS BELOW AND 2 DOTS ABOVE; D; BEH -0754; BEH WITH 2 DOTS BELOW AND DOT ABOVE; D; BEH -0755; BEH WITH INVERTED SMALL V BELOW; D; BEH -0756; BEH WITH SMALL V; D; BEH +0750; DOTLESS BEH WITH HORIZONTAL 3 DOTS BELOW; D; BEH +0751; BEH WITH 3 DOTS ABOVE; D; BEH +0752; DOTLESS BEH WITH INVERTED 3 DOTS BELOW; D; BEH +0753; DOTLESS BEH WITH INVERTED 3 DOTS BELOW AND 2 DOTS ABOVE; D; BEH +0754; DOTLESS BEH WITH 2 DOTS BELOW AND DOT ABOVE; D; BEH +0755; DOTLESS BEH WITH INVERTED V BELOW; D; BEH +0756; DOTLESS BEH WITH V ABOVE; D; BEH 0757; HAH WITH 2 DOTS ABOVE; D; HAH -0758; HAH WITH 3 DOTS POINTING UPWARDS BELOW; D; HAH -0759; DAL WITH 2 DOTS VERTICALLY BELOW AND SMALL TAH; R; DAL -075A; DAL WITH INVERTED SMALL V BELOW; R; DAL -075B; REH WITH STROKE; R; REH +0758; HAH WITH INVERTED 3 DOTS BELOW; D; HAH +0759; DAL WITH VERTICAL 2 DOTS BELOW AND TAH ABOVE; R; DAL +075A; DAL WITH INVERTED V BELOW; R; DAL +075B; REH WITH BAR; R; REH 075C; SEEN WITH 4 DOTS ABOVE; D; SEEN 075D; AIN WITH 2 DOTS ABOVE; D; AIN -075E; AIN WITH 3 DOTS POINTING DOWNWARDS ABOVE; D; AIN -075F; AIN WITH 2 DOTS VERTICALLY ABOVE; D; AIN -0760; FEH WITH 2 DOTS BELOW; D; FEH -0761; FEH WITH 3 DOTS POINTING UPWARDS BELOW; D; FEH +075E; AIN WITH INVERTED 3 DOTS ABOVE; D; AIN +075F; AIN WITH VERTICAL 2 DOTS ABOVE; D; AIN +0760; DOTLESS FEH WITH 2 DOTS BELOW; D; FEH +0761; DOTLESS FEH WITH INVERTED 3 DOTS BELOW; D; FEH 0762; KEHEH WITH DOT ABOVE; D; GAF 0763; KEHEH WITH 3 DOTS ABOVE; D; GAF -0764; KEHEH WITH 3 DOTS POINTING UPWARDS BELOW; D; GAF +0764; KEHEH WITH INVERTED 3 DOTS BELOW; D; GAF 0765; MEEM WITH DOT ABOVE; D; MEEM 0766; MEEM WITH DOT BELOW; D; MEEM 0767; NOON WITH 2 DOTS BELOW; D; NOON -0768; NOON WITH SMALL TAH; D; NOON -0769; NOON WITH SMALL V; D; NOON +0768; NOON WITH TAH ABOVE; D; NOON +0769; NOON WITH V ABOVE; D; NOON 076A; LAM WITH BAR; D; LAM -076B; REH WITH 2 DOTS VERTICALLY ABOVE; R; REH +076B; REH WITH VERTICAL 2 DOTS ABOVE; R; REH 076C; REH WITH HAMZA ABOVE; R; REH -076D; SEEN WITH 2 DOTS VERTICALLY ABOVE; D; SEEN -076E; HAH WITH SMALL TAH BELOW; D; HAH -076F; HAH WITH SMALL TAH AND 2 DOTS; D; HAH -0770; SEEN WITH SMALL TAH AND 2 DOTS; D; SEEN -0771; REH WITH SMALL TAH AND 2 DOTS; R; REH -0772; HAH WITH SMALL TAH ABOVE; D; HAH +076D; SEEN WITH VERTICAL 2 DOTS ABOVE; D; SEEN +076E; HAH WITH TAH BELOW; D; HAH +076F; HAH WITH TAH AND 2 DOTS BELOW; D; HAH +0770; SEEN WITH 2 DOTS AND TAH ABOVE; D; SEEN +0771; REH WITH 2 DOTS AND TAH ABOVE; R; REH +0772; HAH WITH TAH ABOVE; D; HAH 0773; ALEF WITH DIGIT TWO ABOVE; R; ALEF 0774; ALEF WITH DIGIT THREE ABOVE; R; ALEF 0775; FARSI YEH WITH DIGIT TWO ABOVE; D; FARSI YEH 0776; FARSI YEH WITH DIGIT THREE ABOVE; D; FARSI YEH -0777; YEH WITH DIGIT FOUR BELOW; D; YEH +0777; DOTLESS YEH WITH DIGIT FOUR BELOW; D; YEH 0778; WAW WITH DIGIT TWO ABOVE; R; WAW 0779; WAW WITH DIGIT THREE ABOVE; R; WAW -077A; YEH BARREE WITH DIGIT TWO ABOVE; D; BURUSHASKI YEH BARREE -077B; YEH BARREE WITH DIGIT THREE ABOVE; D; BURUSHASKI YEH BARREE +077A; BURUSHASKI YEH BARREE WITH DIGIT TWO ABOVE; D; BURUSHASKI YEH BARREE +077B; BURUSHASKI YEH BARREE WITH DIGIT THREE ABOVE; D; BURUSHASKI YEH BARREE 077C; HAH WITH DIGIT FOUR BELOW; D; HAH 077D; SEEN WITH DIGIT FOUR ABOVE; D; SEEN -077E; SEEN WITH INVERTED V; D; SEEN +077E; SEEN WITH INVERTED V ABOVE; D; SEEN 077F; KAF WITH 2 DOTS ABOVE; D; KAF # N'Ko Characters @@ -370,6 +374,49 @@ 07EA; NKO JONA RA; D; No_Joining_Group 07FA; NKO LAJANYALAN; C; No_Joining_Group +# Mandaic Characters + +0840; MANDAIC HALQA; R; No_Joining_Group +0841; MANDAIC AB; D; No_Joining_Group +0842; MANDAIC AG; D; No_Joining_Group +0843; MANDAIC AD; D; No_Joining_Group +0844; MANDAIC AH; D; No_Joining_Group +0845; MANDAIC USHENNA; D; No_Joining_Group +0846; MANDAIC AZ; R; No_Joining_Group +0847; MANDAIC IT; D; No_Joining_Group +0848; MANDAIC ATT; D; No_Joining_Group +0849; MANDAIC AKSA; R; No_Joining_Group +084A; MANDAIC AK; D; No_Joining_Group +084B; MANDAIC AL; D; No_Joining_Group +084C; MANDAIC AM; D; No_Joining_Group +084D; MANDAIC AN; D; No_Joining_Group +084E; MANDAIC AS; D; No_Joining_Group +084F; MANDAIC IN; R; No_Joining_Group +0850; MANDAIC AP; D; No_Joining_Group +0851; MANDAIC ASZ; D; No_Joining_Group +0852; MANDAIC AQ; D; No_Joining_Group +0853; MANDAIC AR; D; No_Joining_Group +0854; MANDAIC ASH; R; No_Joining_Group +0855; MANDAIC AT; D; No_Joining_Group +0856; MANDAIC DUSHENNA; U; No_Joining_Group +0857; MANDAIC KAD; U; No_Joining_Group +0858; MANDAIC AIN; U; No_Joining_Group + +# Arabic Extended-A Characters + +08A0; DOTLESS BEH WITH V BELOW; D; BEH +08A2; HAH WITH DOT BELOW AND 2 DOTS ABOVE; D; HAH +08A3; TAH WITH 2 DOTS ABOVE; D; TAH +08A4; DOTLESS FEH WITH DOT BELOW AND 3 DOTS ABOVE; D; FEH +08A5; QAF WITH DOT BELOW; D; QAF +08A6; LAM WITH DOUBLE BAR; D; LAM +08A7; MEEM WITH 3 DOTS ABOVE; D; MEEM +08A8; YEH WITH HAMZA ABOVE; D; YEH +08A9; YEH WITH DOT ABOVE; D; YEH +08AA; REH WITH LOOP; R; REH +08AB; WAW WITH DOT WITHIN; R; WAW +08AC; ROHINGYA YEH; R; ROHINGYA YEH + # Other 200C; ZERO WIDTH NON-JOINER; U; No_Joining_Group diff --git a/lib/unicore/BidiMirroring.txt b/lib/unicore/BidiMirroring.txt index 902f9a6b88..2e719bc1e0 100644 --- a/lib/unicore/BidiMirroring.txt +++ b/lib/unicore/BidiMirroring.txt @@ -1,19 +1,19 @@ -# BidiMirroring-6.0.0.txt -# Date: 2010-06-21, 12:09:00 PDT [KW] +# BidiMirroring-6.1.0.txt +# Date: 2011-12-20, 19:31:00 GMT [KW, LI] # # Bidi_Mirroring_Glyph Property # # This file is an informative contributory data file in the # Unicode Character Database. # -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # -# This data file lists characters that have the Bidi_Mirrored=True property +# This data file lists characters that have the Bidi_Mirrored=Yes property # value, for which there is another Unicode character that typically has a glyph # that is the mirror image of the original character's glyph. # -# The repertoire covered by the file is Unicode 6.0.0. +# The repertoire covered by the file is Unicode 6.1.0. # # The file contains a list of lines with mappings from one code point # to another one for character-based mirroring. @@ -26,10 +26,18 @@ # variable-length hexadecimal value with 4 to 6 digits. # A comment indicates where the characters are "BEST FIT" mirroring. # -# Code points for which Bidi_Mirrored=True, but for which no appropriate +# Code points for which Bidi_Mirrored=Yes, but for which no appropriate # characters exist with mirrored glyphs, are # listed as comments at the end of the file. # +# Note: (2011-12-19) There is an inconsistency between the +# following statement about the default value +# of the Bidi_Mirroring_Glyph property and the +# value of the @missing line for Bidi_Mirroring_Glyph in +# PropertyValueAliases.txt. This inconsistency was discovered too +# late in the release process to be resolved by +# the UTC. The inconsistency will be resolved in a future revision. +# # Formally, the default value of the Bidi_Mirroring_Glyph property # for each code point is the code point itself, unless a mapping to # some other character is specified in this data file. When a code @@ -41,9 +49,13 @@ # at http://www.unicode.org/unicode/reports/tr9/ # # This file was originally created by Markus Scherer. -# Extended for Unicode 3.2, 4.0, 4.1, 5.0, 5.1, 5.2, and 6.0 by Ken Whistler. +# Extended for Unicode 3.2, 4.0, 4.1, 5.0, 5.1, 5.2, and 6.0 by Ken Whistler, +# and for Unicode 6.1 by Ken Whistler and Laurentiu Iancu. # # ############################################################ +# +# Property: Bidi_Mirroring_Glyph +# 0028; 0029 # LEFT PARENTHESIS 0029; 0028 # RIGHT PARENTHESIS @@ -209,6 +221,8 @@ 27C6; 27C5 # RIGHT S-SHAPED BAG DELIMITER 27C8; 27C9 # REVERSE SOLIDUS PRECEDING SUBSET 27C9; 27C8 # SUPERSET PRECEDING SOLIDUS +27CB; 27CD # MATHEMATICAL RISING DIAGONAL +27CD; 27CB # MATHEMATICAL FALLING DIAGONAL 27D5; 27D6 # LEFT OUTER JOIN 27D6; 27D5 # RIGHT OUTER JOIN 27DD; 27DE # LONG RIGHT TACK diff --git a/lib/unicore/Blocks.txt b/lib/unicore/Blocks.txt index 50df2e1d31..f9a384e3ff 100644 --- a/lib/unicore/Blocks.txt +++ b/lib/unicore/Blocks.txt @@ -1,8 +1,8 @@ -# Blocks-6.0.0.txt -# Date: 2010-06-04, 11:12:00 PDT [KW] +# Blocks-6.1.0.txt +# Date: 2011-06-14, 18:26:00 GMT [KW, LI] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # @@ -46,6 +46,7 @@ 07C0..07FF; NKo 0800..083F; Samaritan 0840..085F; Mandaic +08A0..08FF; Arabic Extended-A 0900..097F; Devanagari 0980..09FF; Bengali 0A00..0A7F; Gurmukhi @@ -86,6 +87,7 @@ 1BC0..1BFF; Batak 1C00..1C4F; Lepcha 1C50..1C7F; Ol Chiki +1CC0..1CCF; Sundanese Supplement 1CD0..1CFF; Vedic Extensions 1D00..1D7F; Phonetic Extensions 1D80..1DBF; Phonetic Extensions Supplement @@ -161,6 +163,7 @@ A980..A9DF; Javanese AA00..AA5F; Cham AA60..AA7F; Myanmar Extended-A AA80..AADF; Tai Viet +AAE0..AAFF; Meetei Mayek Extensions AB00..AB2F; Ethiopic Extended-A ABC0..ABFF; Meetei Mayek AC00..D7AF; Hangul Syllables @@ -199,6 +202,8 @@ FFF0..FFFF; Specials 10840..1085F; Imperial Aramaic 10900..1091F; Phoenician 10920..1093F; Lydian +10980..1099F; Meroitic Hieroglyphs +109A0..109FF; Meroitic Cursive 10A00..10A5F; Kharoshthi 10A60..10A7F; Old South Arabian 10B00..10B3F; Avestan @@ -208,10 +213,15 @@ FFF0..FFFF; Specials 10E60..10E7F; Rumi Numeral Symbols 11000..1107F; Brahmi 11080..110CF; Kaithi +110D0..110FF; Sora Sompeng +11100..1114F; Chakma +11180..111DF; Sharada +11680..116CF; Takri 12000..123FF; Cuneiform 12400..1247F; Cuneiform Numbers and Punctuation 13000..1342F; Egyptian Hieroglyphs 16800..16A3F; Bamum Supplement +16F00..16F9F; Miao 1B000..1B0FF; Kana Supplement 1D000..1D0FF; Byzantine Musical Symbols 1D100..1D1FF; Musical Symbols @@ -219,6 +229,7 @@ FFF0..FFFF; Specials 1D300..1D35F; Tai Xuan Jing Symbols 1D360..1D37F; Counting Rod Numerals 1D400..1D7FF; Mathematical Alphanumeric Symbols +1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols 1F000..1F02F; Mahjong Tiles 1F030..1F09F; Domino Tiles 1F0A0..1F0FF; Playing Cards diff --git a/lib/unicore/CJKRadicals.txt b/lib/unicore/CJKRadicals.txt index 32a765330d..a7debb6e1e 100644 --- a/lib/unicore/CJKRadicals.txt +++ b/lib/unicore/CJKRadicals.txt @@ -1,8 +1,8 @@ -# CJKRadicals-6.0.0.txt -# Date: 2010-01-22, 10:53:25 PDT [RC] +# CJKRadicals-6.1.0.txt +# Date: 2011-08-30, 23:14:00 GMT [RC, KW] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr38/ # @@ -24,6 +24,7 @@ # # This file was created for Unicode 5.2 by Richard Cook. # Updated for Unicode 6.0 by Richard Cook. +# Updated for Unicode 6.1 by Ken Whistler. # # #################################################### diff --git a/lib/unicore/CaseFolding.txt b/lib/unicore/CaseFolding.txt index ffe6173d75..0d9a4090cd 100644 --- a/lib/unicore/CaseFolding.txt +++ b/lib/unicore/CaseFolding.txt @@ -1,8 +1,8 @@ -# CaseFolding-6.0.0.txt -# Date: 2010-05-18, 00:48:57 GMT [MD] +# CaseFolding-6.1.0.txt +# Date: 2011-07-25, 21:21:56 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # @@ -52,7 +52,15 @@ # behavior. (The default option is to exclude them.) # # ================================================================= -# @missing 0000..10FFFF; <codepoint> + +# Property: Case_Folding + +# All code points not explicitly listed for Case_Folding +# have the value C for the status field, and the code point itself for the mapping field. + +# @missing: 0000..10FFFF; C; <code point> + +# ================================================================= 0041; C; 0061; # LATIN CAPITAL LETTER A 0042; C; 0062; # LATIN CAPITAL LETTER B 0043; C; 0063; # LATIN CAPITAL LETTER C @@ -574,6 +582,8 @@ 10C3; C; 2D23; # GEORGIAN CAPITAL LETTER WE 10C4; C; 2D24; # GEORGIAN CAPITAL LETTER HAR 10C5; C; 2D25; # GEORGIAN CAPITAL LETTER HOE +10C7; C; 2D27; # GEORGIAN CAPITAL LETTER YN +10CD; C; 2D2D; # GEORGIAN CAPITAL LETTER AEN 1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW 1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE 1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW @@ -1042,6 +1052,7 @@ 2CE2; C; 2CE3; # COPTIC CAPITAL LETTER OLD NUBIAN WAU 2CEB; C; 2CEC; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI 2CED; C; 2CEE; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA +2CF2; C; 2CF3; # COPTIC CAPITAL LETTER BOHAIRIC KHEI A640; C; A641; # CYRILLIC CAPITAL LETTER ZEMLYA A642; C; A643; # CYRILLIC CAPITAL LETTER DZELO A644; C; A645; # CYRILLIC CAPITAL LETTER REVERSED DZE @@ -1126,11 +1137,13 @@ A786; C; A787; # LATIN CAPITAL LETTER INSULAR T A78B; C; A78C; # LATIN CAPITAL LETTER SALTILLO A78D; C; 0265; # LATIN CAPITAL LETTER TURNED H A790; C; A791; # LATIN CAPITAL LETTER N WITH DESCENDER +A792; C; A793; # LATIN CAPITAL LETTER C WITH BAR A7A0; C; A7A1; # LATIN CAPITAL LETTER G WITH OBLIQUE STROKE A7A2; C; A7A3; # LATIN CAPITAL LETTER K WITH OBLIQUE STROKE A7A4; C; A7A5; # LATIN CAPITAL LETTER N WITH OBLIQUE STROKE A7A6; C; A7A7; # LATIN CAPITAL LETTER R WITH OBLIQUE STROKE A7A8; C; A7A9; # LATIN CAPITAL LETTER S WITH OBLIQUE STROKE +A7AA; C; 0266; # LATIN CAPITAL LETTER H WITH HOOK FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL diff --git a/lib/unicore/CompositionExclusions.txt b/lib/unicore/CompositionExclusions.txt index e39c651811..f12f7d61bf 100644 --- a/lib/unicore/CompositionExclusions.txt +++ b/lib/unicore/CompositionExclusions.txt @@ -1,5 +1,5 @@ -# CompositionExclusions-6.0.0.txt -# Date: 2010-06-25, 14:34:00 PDT [KW] +# CompositionExclusions-6.1.0.txt +# Date: 2011-07-12, 00:13:00 GMT [KW, LI] # # This file lists the characters for the Composition Exclusion Table # defined in UAX #15, Unicode Normalization Forms. @@ -7,7 +7,7 @@ # This file is a normative contributory data file in the # Unicode Character Database. # -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # # For more information, see @@ -169,17 +169,16 @@ FB4E # HEBREW LETTER PE WITH RAFE # FA20 CJK COMPATIBILITY IDEOGRAPH-FA20 # FA22 CJK COMPATIBILITY IDEOGRAPH-FA22 # FA25..FA26 [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26 -# FA2A..FA2D [4] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA2D -# FA30..FA6D [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +# FA2A..FA6D [68] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA6D # FA70..FAD9 [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 # 2F800..2FA1D [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 1033 +# Total code points: 1035 # ================================================ # (4) Non-Starter Decompositions # -# These characters can be derived from the UnicodeData file +# These characters can be derived from the UnicodeData.txt file # by including each expanding canonical decomposition # (i.e., those which canonically decompose to a sequence # of characters instead of a single character), such that: diff --git a/lib/unicore/DAge.txt b/lib/unicore/DAge.txt index 4293229cef..6ff0206b4c 100644 --- a/lib/unicore/DAge.txt +++ b/lib/unicore/DAge.txt @@ -1,8 +1,8 @@ -# DerivedAge-6.0.0.txt -# Date: 2010-10-05, 00:47:58 GMT [MD, KW] +# DerivedAge-6.1.0.txt +# Date: 2012-01-20, 21:47:00 GMT [MD, KW] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # @@ -42,12 +42,14 @@ # For more information, see [http://www.unicode.org/reports/tr18/]. # All code points not explicitly listed for Age -# have the value unassigned. +# have the value Unassigned (NA). -# @missing: 0000..10FFFF; unassigned +# @missing: 0000..10FFFF; Unassigned # ================================================ +# Age=V1_1 + # Assigned as of Unicode 1.1.0 (June, 1993) # [excluding removed Hangul Syllables] @@ -357,6 +359,8 @@ FFFE..FFFF ; 1.1 # [2] <noncharacter-FFFE>..<noncharacter-FFFF> # ================================================ +# Age=V2_0 + # Newly assigned in Unicode 2.0.0 (July, 1996) 0591..05A1 ; 2.0 # [17] HEBREW ACCENT ETNAHTA..HEBREW ACCENT PAZER @@ -397,6 +401,8 @@ FFFFE..FFFFF ; 2.0 # [2] <noncharacter-FFFFE>..<noncharacter-FFFFF> # ================================================ +# Age=V2_1 + # Newly assigned in Unicode 2.1.2 (May, 1998) 20AC ; 2.1 # EURO SIGN @@ -406,6 +412,8 @@ FFFC ; 2.1 # OBJECT REPLACEMENT CHARACTER # ================================================ +# Age=V3_0 + # Newly assigned in Unicode 3.0.0 (September, 1999) 01F6..01F9 ; 3.0 # [4] LATIN CAPITAL LETTER HWAIR..LATIN SMALL LETTER N WITH GRAVE @@ -493,9 +501,7 @@ FFFC ; 2.1 # OBJECT REPLACEMENT CHARACTER 1401..1676 ; 3.0 # [630] CANADIAN SYLLABICS E..CANADIAN SYLLABICS NNGAA 1680..169C ; 3.0 # [29] OGHAM SPACE MARK..OGHAM REVERSED FEATHER MARK 16A0..16F0 ; 3.0 # [81] RUNIC LETTER FEHU FEOH FE F..RUNIC BELGTHOR SYMBOL -1780..17B3 ; 3.0 # [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU -17B4..17B5 ; 3.0 # [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA -17B6..17DC ; 3.0 # [39] KHMER VOWEL SIGN AA..KHMER SIGN AVAKRAHASANYA +1780..17DC ; 3.0 # [93] KHMER LETTER KA..KHMER SIGN AVAKRAHASANYA 17E0..17E9 ; 3.0 # [10] KHMER DIGIT ZERO..KHMER DIGIT NINE 1800..180E ; 3.0 # [15] MONGOLIAN BIRGA..MONGOLIAN VOWEL SEPARATOR 1810..1819 ; 3.0 # [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE @@ -537,6 +543,8 @@ FFF9..FFFB ; 3.0 # [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATIO # ================================================ +# Age=V3_1 + # Newly assigned in Unicode 3.1.0 (March, 2001) 03F4..03F5 ; 3.1 # [2] GREEK CAPITAL THETA SYMBOL..GREEK LUNATE EPSILON SYMBOL @@ -582,6 +590,8 @@ E0020..E007F ; 3.1 # [96] TAG SPACE..CANCEL TAG # ================================================ +# Age=V3_2 + # Newly assigned in Unicode 3.2.0 (March, 2002) 0220 ; 3.2 # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG @@ -649,6 +659,8 @@ FF5F..FF60 ; 3.2 # [2] FULLWIDTH LEFT WHITE PARENTHESIS..FULLWIDTH RIGHT WH # ================================================ +# Age=V4_0 + # Newly assigned in Unicode 4.0.0 (April, 2003) 0221 ; 4.0 # LATIN SMALL LETTER D WITH CURL @@ -733,6 +745,8 @@ E0100..E01EF ; 4.0 # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 # ================================================ +# Age=V4_1 + # Newly assigned in Unicode 4.1.0 (March, 2005) 0237..0241 ; 4.1 # [11] LATIN SMALL LETTER DOTLESS J..LATIN CAPITAL LETTER GLOTTAL STOP @@ -826,6 +840,8 @@ FE10..FE19 ; 4.1 # [10] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION F # ================================================ +# Age=V5_0 + # Newly assigned in Unicode 5.0.0 (July, 2006) 0242..024F ; 5.0 # [14] LATIN SMALL LETTER GLOTTAL STOP..LATIN SMALL LETTER Y WITH STROKE @@ -868,6 +884,8 @@ A840..A877 ; 5.0 # [56] PHAGS-PA LETTER KA..PHAGS-PA MARK DOUBLE SHAD # ================================================ +# Age=V5_1 + # Newly assigned in Unicode 5.1.0 (March, 2008) 0370..0373 ; 5.1 # [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI @@ -964,6 +982,8 @@ FE24..FE26 ; 5.1 # [3] COMBINING MACRON LEFT HALF..COMBINING CONJOINING MAC # ================================================ +# Age=V5_2 + # Newly assigned in Unicode 5.2.0 (October, 2009) 0524..0525 ; 5.2 # [2] CYRILLIC CAPITAL LETTER PE WITH DESCENDER..CYRILLIC SMALL LETTER PE WITH DESCENDER @@ -1063,6 +1083,8 @@ FA6B..FA6D ; 5.2 # [3] CJK COMPATIBILITY IDEOGRAPH-FA6B..CJK COMPATIBILITY # ================================================ +# Age=V6_0 + # Newly assigned in Unicode 6.0.0 (October, 2010) 0526..0527 ; 6.0 # [2] CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER..CYRILLIC SMALL LETTER SHHA WITH DESCENDER @@ -1174,4 +1196,102 @@ FBB2..FBC1 ; 6.0 # [16] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL SMALL TAH BEL # Total code points: 2088 +# ================================================ + +# Age=V6_1 + +# Newly assigned in Unicode 6.1.0 (January, 2012) + +058F ; 6.1 # ARMENIAN DRAM SIGN +0604 ; 6.1 # ARABIC SIGN SAMVAT +08A0 ; 6.1 # ARABIC LETTER BEH WITH SMALL V BELOW +08A2..08AC ; 6.1 # [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH +08E4..08FE ; 6.1 # [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT +0AF0 ; 6.1 # GUJARATI ABBREVIATION SIGN +0EDE..0EDF ; 6.1 # [2] LAO LETTER KHMU GO..LAO LETTER KHMU NYO +10C7 ; 6.1 # GEORGIAN CAPITAL LETTER YN +10CD ; 6.1 # GEORGIAN CAPITAL LETTER AEN +10FD..10FF ; 6.1 # [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN +1BAB..1BAD ; 6.1 # [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA +1BBA..1BBF ; 6.1 # [6] SUNDANESE AVAGRAHA..SUNDANESE LETTER FINAL M +1CC0..1CC7 ; 6.1 # [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA +1CF3..1CF6 ; 6.1 # [4] VEDIC SIGN ROTATED ARDHAVISARGA..VEDIC SIGN UPADHMANIYA +27CB ; 6.1 # MATHEMATICAL RISING DIAGONAL +27CD ; 6.1 # MATHEMATICAL FALLING DIAGONAL +2CF2..2CF3 ; 6.1 # [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI +2D27 ; 6.1 # GEORGIAN SMALL LETTER YN +2D2D ; 6.1 # GEORGIAN SMALL LETTER AEN +2D66..2D67 ; 6.1 # [2] TIFINAGH LETTER YE..TIFINAGH LETTER YO +2E32..2E3B ; 6.1 # [10] TURNED COMMA..THREE-EM DASH +9FCC ; 6.1 # CJK UNIFIED IDEOGRAPH-9FCC +A674..A67B ; 6.1 # [8] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC LETTER OMEGA +A69F ; 6.1 # COMBINING CYRILLIC LETTER IOTIFIED E +A792..A793 ; 6.1 # [2] LATIN CAPITAL LETTER C WITH BAR..LATIN SMALL LETTER C WITH BAR +A7AA ; 6.1 # LATIN CAPITAL LETTER H WITH HOOK +A7F8..A7F9 ; 6.1 # [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +AAE0..AAF6 ; 6.1 # [23] MEETEI MAYEK LETTER E..MEETEI MAYEK VIRAMA +FA2E..FA2F ; 6.1 # [2] CJK COMPATIBILITY IDEOGRAPH-FA2E..CJK COMPATIBILITY IDEOGRAPH-FA2F +10980..109B7 ; 6.1 # [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; 6.1 # [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN +110D0..110E8 ; 6.1 # [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +110F0..110F9 ; 6.1 # [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11100..11134 ; 6.1 # [53] CHAKMA SIGN CANDRABINDU..CHAKMA MAAYYAA +11136..11143 ; 6.1 # [14] CHAKMA DIGIT ZERO..CHAKMA QUESTION MARK +11180..111C8 ; 6.1 # [73] SHARADA SIGN CANDRABINDU..SHARADA SEPARATOR +111D0..111D9 ; 6.1 # [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +11680..116B7 ; 6.1 # [56] TAKRI LETTER A..TAKRI SIGN NUKTA +116C0..116C9 ; 6.1 # [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE +16F00..16F44 ; 6.1 # [69] MIAO LETTER PA..MIAO LETTER HHA +16F50..16F7E ; 6.1 # [47] MIAO LETTER NASALIZATION..MIAO VOWEL SIGN NG +16F8F..16F9F ; 6.1 # [17] MIAO TONE RIGHT..MIAO LETTER REFORMED TONE-8 +1EE00..1EE03 ; 6.1 # [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; 6.1 # [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; 6.1 # [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; 6.1 # ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; 6.1 # ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; 6.1 # [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; 6.1 # [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; 6.1 # ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; 6.1 # ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; 6.1 # ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; 6.1 # ARABIC MATHEMATICAL TAILED HAH +1EE49 ; 6.1 # ARABIC MATHEMATICAL TAILED YEH +1EE4B ; 6.1 # ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; 6.1 # [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; 6.1 # [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; 6.1 # ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; 6.1 # ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; 6.1 # ARABIC MATHEMATICAL TAILED DAD +1EE5B ; 6.1 # ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; 6.1 # ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; 6.1 # ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; 6.1 # [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; 6.1 # ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; 6.1 # [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; 6.1 # [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; 6.1 # [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; 6.1 # [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; 6.1 # ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; 6.1 # [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; 6.1 # [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; 6.1 # [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; 6.1 # [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; 6.1 # [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1EEF0..1EEF1 ; 6.1 # [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL +1F16A..1F16B ; 6.1 # [2] RAISED MC SIGN..RAISED MD SIGN +1F540..1F543 ; 6.1 # [4] CIRCLED CROSS POMMEE..NOTCHED LEFT SEMICIRCLE WITH THREE DOTS +1F600 ; 6.1 # GRINNING FACE +1F611 ; 6.1 # EXPRESSIONLESS FACE +1F615 ; 6.1 # CONFUSED FACE +1F617 ; 6.1 # KISSING FACE +1F619 ; 6.1 # KISSING FACE WITH SMILING EYES +1F61B ; 6.1 # FACE WITH STUCK-OUT TONGUE +1F61F ; 6.1 # WORRIED FACE +1F626..1F627 ; 6.1 # [2] FROWNING FACE WITH OPEN MOUTH..ANGUISHED FACE +1F62C ; 6.1 # GRIMACING FACE +1F62E..1F62F ; 6.1 # [2] FACE WITH OPEN MOUTH..HUSHED FACE +1F634 ; 6.1 # SLEEPING FACE + +# Total code points: 732 + # EOF diff --git a/lib/unicore/DCoreProperties.txt b/lib/unicore/DCoreProperties.txt index 7c7a784942..abdcd2201e 100644 --- a/lib/unicore/DCoreProperties.txt +++ b/lib/unicore/DCoreProperties.txt @@ -1,8 +1,8 @@ -# DerivedCoreProperties-6.0.0.txt -# Date: 2010-08-19, 00:48:05 GMT [MD] +# DerivedCoreProperties-6.1.0.txt +# Date: 2011-12-11, 18:26:55 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -113,9 +113,7 @@ 27C0..27C4 ; Math # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET 27C5 ; Math # Ps LEFT S-SHAPED BAG DELIMITER 27C6 ; Math # Pe RIGHT S-SHAPED BAG DELIMITER -27C7..27CA ; Math # Sm [4] OR WITH DOT INSIDE..VERTICAL BAR WITH HORIZONTAL STROKE -27CC ; Math # Sm LONG DIVISION -27CE..27E5 ; Math # Sm [24] SQUARED LOGICAL AND..WHITE SQUARE WITH RIGHTWARDS TICK +27C7..27E5 ; Math # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK 27E6 ; Math # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET 27E7 ; Math # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET 27E8 ; Math # Ps MATHEMATICAL LEFT ANGLE BRACKET @@ -216,8 +214,42 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 1D7C3 ; Math # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL 1D7C4..1D7CB ; Math # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1D7CE..1D7FF ; Math # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1EE00..1EE03 ; Math # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; Math # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; Math # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; Math # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; Math # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; Math # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; Math # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; Math # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; Math # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; Math # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; Math # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; Math # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; Math # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; Math # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; Math # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; Math # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; Math # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; Math # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; Math # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; Math # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; Math # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; Math # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; Math # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; Math # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; Math # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; Math # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; Math # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; Math # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; Math # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; Math # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; Math # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; Math # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; Math # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1EEF0..1EEF1 ; Math # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL -# Total code points: 2165 +# Total code points: 2310 # ================================================ @@ -226,9 +258,9 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 0041..005A ; Alphabetic # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z 0061..007A ; Alphabetic # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z -00AA ; Alphabetic # L& FEMININE ORDINAL INDICATOR +00AA ; Alphabetic # Lo FEMININE ORDINAL INDICATOR 00B5 ; Alphabetic # L& MICRO SIGN -00BA ; Alphabetic # L& MASCULINE ORDINAL INDICATOR +00BA ; Alphabetic # Lo MASCULINE ORDINAL INDICATOR 00C0..00D6 ; Alphabetic # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS 00D8..00F6 ; Alphabetic # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS 00F8..01BA ; Alphabetic # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL @@ -303,6 +335,10 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 0828 ; Alphabetic # Lm SAMARITAN MODIFIER LETTER I 0829..082C ; Alphabetic # Mn [4] SAMARITAN VOWEL SIGN LONG I..SAMARITAN VOWEL SIGN SUKUN 0840..0858 ; Alphabetic # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +08A0 ; Alphabetic # Lo ARABIC LETTER BEH WITH SMALL V BELOW +08A2..08AC ; Alphabetic # Lo [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH +08E4..08E9 ; Alphabetic # Mn [6] ARABIC CURLY FATHA..ARABIC CURLY KASRATAN +08F0..08FE ; Alphabetic # Mn [15] ARABIC OPEN FATHATAN..ARABIC DAMMA WITH DOT 0900..0902 ; Alphabetic # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 0903 ; Alphabetic # Mc DEVANAGARI SIGN VISARGA 0904..0939 ; Alphabetic # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA @@ -500,7 +536,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 0EC0..0EC4 ; Alphabetic # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI 0EC6 ; Alphabetic # Lm LAO KO LA 0ECD ; Alphabetic # Mn LAO NIGGAHITA -0EDC..0EDD ; Alphabetic # Lo [2] LAO HO NO..LAO HO MO +0EDC..0EDF ; Alphabetic # Lo [4] LAO HO NO..LAO LETTER KHMU NYO 0F00 ; Alphabetic # Lo TIBETAN SYLLABLE OM 0F40..0F47 ; Alphabetic # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA 0F49..0F6C ; Alphabetic # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA @@ -538,9 +574,11 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 109C ; Alphabetic # Mc MYANMAR VOWEL SIGN AITON A 109D ; Alphabetic # Mn MYANMAR VOWEL SIGN AITON AI 10A0..10C5 ; Alphabetic # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Alphabetic # L& GEORGIAN CAPITAL LETTER YN +10CD ; Alphabetic # L& GEORGIAN CAPITAL LETTER AEN 10D0..10FA ; Alphabetic # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN 10FC ; Alphabetic # Lm MODIFIER LETTER GEORGIAN NAR -1100..1248 ; Alphabetic # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +10FD..1248 ; Alphabetic # Lo [332] GEORGIAN LETTER AEN..ETHIOPIC SYLLABLE QWA 124A..124D ; Alphabetic # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE 1250..1256 ; Alphabetic # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO 1258 ; Alphabetic # Lo ETHIOPIC SYLLABLE QHWA @@ -636,8 +674,9 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 1BA2..1BA5 ; Alphabetic # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU 1BA6..1BA7 ; Alphabetic # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG 1BA8..1BA9 ; Alphabetic # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAC..1BAD ; Alphabetic # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA 1BAE..1BAF ; Alphabetic # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA -1BC0..1BE5 ; Alphabetic # Lo [38] BATAK LETTER A..BATAK LETTER U +1BBA..1BE5 ; Alphabetic # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U 1BE7 ; Alphabetic # Mc BATAK VOWEL SIGN E 1BE8..1BE9 ; Alphabetic # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE 1BEA..1BEC ; Alphabetic # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O @@ -653,10 +692,11 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 1C78..1C7D ; Alphabetic # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD 1CE9..1CEC ; Alphabetic # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL 1CEE..1CF1 ; Alphabetic # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA -1CF2 ; Alphabetic # Mc VEDIC SIGN ARDHAVISARGA +1CF2..1CF3 ; Alphabetic # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF5..1CF6 ; Alphabetic # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 1D00..1D2B ; Alphabetic # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D61 ; Alphabetic # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D77 ; Alphabetic # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D2C..1D6A ; Alphabetic # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; Alphabetic # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; Alphabetic # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; Alphabetic # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; Alphabetic # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA @@ -703,12 +743,15 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 24B6..24E9 ; Alphabetic # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z 2C00..2C2E ; Alphabetic # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 2C30..2C5E ; Alphabetic # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE -2C60..2C7C ; Alphabetic # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; Alphabetic # Lm MODIFIER LETTER CAPITAL V +2C60..2C7B ; Alphabetic # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; Alphabetic # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; Alphabetic # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI 2CEB..2CEE ; Alphabetic # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; Alphabetic # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; Alphabetic # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE -2D30..2D65 ; Alphabetic # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D27 ; Alphabetic # L& GEORGIAN SMALL LETTER YN +2D2D ; Alphabetic # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; Alphabetic # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO 2D6F ; Alphabetic # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D80..2D96 ; Alphabetic # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE 2DA0..2DA6 ; Alphabetic # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO @@ -740,7 +783,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 31A0..31BA ; Alphabetic # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY 31F0..31FF ; Alphabetic # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3400..4DB5 ; Alphabetic # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 -4E00..9FCB ; Alphabetic # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB +4E00..9FCC ; Alphabetic # Lo [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC A000..A014 ; Alphabetic # Lo [21] YI SYLLABLE IT..YI SYLLABLE E A015 ; Alphabetic # Lm YI SYLLABLE WU A016..A48C ; Alphabetic # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR @@ -752,8 +795,10 @@ A610..A61F ; Alphabetic # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG A62A..A62B ; Alphabetic # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO A640..A66D ; Alphabetic # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O A66E ; Alphabetic # Lo CYRILLIC LETTER MULTIOCULAR O +A674..A67B ; Alphabetic # Mn [8] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC LETTER OMEGA A67F ; Alphabetic # Lm CYRILLIC PAYEROK A680..A697 ; Alphabetic # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE +A69F ; Alphabetic # Mn COMBINING CYRILLIC LETTER IOTIFIED E A6A0..A6E5 ; Alphabetic # Lo [70] BAMUM LETTER A..BAMUM LETTER KI A6E6..A6EF ; Alphabetic # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM A717..A71F ; Alphabetic # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK @@ -762,8 +807,9 @@ A770 ; Alphabetic # Lm MODIFIER LETTER US A771..A787 ; Alphabetic # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A788 ; Alphabetic # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; Alphabetic # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A791 ; Alphabetic # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER -A7A0..A7A9 ; Alphabetic # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE +A790..A793 ; Alphabetic # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR +A7A0..A7AA ; Alphabetic # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK +A7F8..A7F9 ; Alphabetic # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; Alphabetic # L& LATIN LETTER SMALL CAPITAL TURNED M A7FB..A801 ; Alphabetic # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I A803..A805 ; Alphabetic # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O @@ -820,6 +866,13 @@ AAC0 ; Alphabetic # Lo TAI VIET TONE MAI NUENG AAC2 ; Alphabetic # Lo TAI VIET TONE MAI SONG AADB..AADC ; Alphabetic # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG AADD ; Alphabetic # Lm TAI VIET SYMBOL SAM +AAE0..AAEA ; Alphabetic # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAEB ; Alphabetic # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; Alphabetic # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF2 ; Alphabetic # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; Alphabetic # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF5 ; Alphabetic # Mc MEETEI MAYEK VOWEL SIGN VISARGA AB01..AB06 ; Alphabetic # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO AB09..AB0E ; Alphabetic # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO AB11..AB16 ; Alphabetic # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO @@ -834,8 +887,7 @@ ABE9..ABEA ; Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MA AC00..D7A3 ; Alphabetic # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH D7B0..D7C6 ; Alphabetic # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E D7CB..D7FB ; Alphabetic # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH -F900..FA2D ; Alphabetic # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; Alphabetic # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +F900..FA6D ; Alphabetic # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; Alphabetic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB00..FB06 ; Alphabetic # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; Alphabetic # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -894,6 +946,8 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 1083F..10855 ; Alphabetic # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW 10900..10915 ; Alphabetic # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; Alphabetic # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10980..109B7 ; Alphabetic # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; Alphabetic # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; Alphabetic # Lo KHAROSHTHI LETTER A 10A01..10A03 ; Alphabetic # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R 10A05..10A06 ; Alphabetic # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O @@ -916,10 +970,33 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 110B0..110B2 ; Alphabetic # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II 110B3..110B6 ; Alphabetic # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B7..110B8 ; Alphabetic # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +110D0..110E8 ; Alphabetic # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +11100..11102 ; Alphabetic # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11103..11126 ; Alphabetic # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11127..1112B ; Alphabetic # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; Alphabetic # Mc CHAKMA VOWEL SIGN E +1112D..11132 ; Alphabetic # Mn [6] CHAKMA VOWEL SIGN AI..CHAKMA AU MARK +11180..11181 ; Alphabetic # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; Alphabetic # Mc SHARADA SIGN VISARGA +11183..111B2 ; Alphabetic # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111B3..111B5 ; Alphabetic # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; Alphabetic # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF ; Alphabetic # Mc SHARADA VOWEL SIGN AU +111C1..111C4 ; Alphabetic # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +11680..116AA ; Alphabetic # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116AB ; Alphabetic # Mn TAKRI SIGN ANUSVARA +116AC ; Alphabetic # Mc TAKRI SIGN VISARGA +116AD ; Alphabetic # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; Alphabetic # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; Alphabetic # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU 12000..1236E ; Alphabetic # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM 12400..12462 ; Alphabetic # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER 13000..1342E ; Alphabetic # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 16800..16A38 ; Alphabetic # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16F00..16F44 ; Alphabetic # Lo [69] MIAO LETTER PA..MIAO LETTER HHA +16F50 ; Alphabetic # Lo MIAO LETTER NASALIZATION +16F51..16F7E ; Alphabetic # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG +16F93..16F9F ; Alphabetic # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 1B000..1B001 ; Alphabetic # Lo [2] KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE 1D400..1D454 ; Alphabetic # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G 1D456..1D49C ; Alphabetic # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A @@ -951,12 +1028,45 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 1D78A..1D7A8 ; Alphabetic # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA 1D7AA..1D7C2 ; Alphabetic # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA 1D7C4..1D7CB ; Alphabetic # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1EE00..1EE03 ; Alphabetic # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; Alphabetic # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; Alphabetic # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; Alphabetic # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; Alphabetic # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; Alphabetic # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; Alphabetic # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; Alphabetic # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; Alphabetic # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; Alphabetic # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; Alphabetic # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; Alphabetic # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; Alphabetic # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; Alphabetic # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; Alphabetic # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; Alphabetic # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; Alphabetic # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; Alphabetic # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; Alphabetic # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; Alphabetic # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; Alphabetic # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; Alphabetic # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; Alphabetic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 20000..2A6D6 ; Alphabetic # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 2A700..2B734 ; Alphabetic # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 2B740..2B81D ; Alphabetic # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2F800..2FA1D ; Alphabetic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 101539 +# Total code points: 102159 # ================================================ @@ -964,9 +1074,9 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG # Generated from: Ll + Other_Lowercase 0061..007A ; Lowercase # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z -00AA ; Lowercase # L& FEMININE ORDINAL INDICATOR +00AA ; Lowercase # Lo FEMININE ORDINAL INDICATOR 00B5 ; Lowercase # L& MICRO SIGN -00BA ; Lowercase # L& MASCULINE ORDINAL INDICATOR +00BA ; Lowercase # Lo MASCULINE ORDINAL INDICATOR 00DF..00F6 ; Lowercase # L& [24] LATIN SMALL LETTER SHARP S..LATIN SMALL LETTER O WITH DIAERESIS 00F8..00FF ; Lowercase # L& [8] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER Y WITH DIAERESIS 0101 ; Lowercase # L& LATIN SMALL LETTER A WITH MACRON @@ -1237,8 +1347,8 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 0527 ; Lowercase # L& CYRILLIC SMALL LETTER SHHA WITH DESCENDER 0561..0587 ; Lowercase # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN 1D00..1D2B ; Lowercase # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D61 ; Lowercase # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D77 ; Lowercase # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D2C..1D6A ; Lowercase # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; Lowercase # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; Lowercase # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; Lowercase # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; Lowercase # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA @@ -1386,7 +1496,9 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 1FE0..1FE7 ; Lowercase # L& [8] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI 1FF2..1FF4 ; Lowercase # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI 1FF6..1FF7 ; Lowercase # L& [2] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI -2090..2094 ; Lowercase # Lm [5] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA +2071 ; Lowercase # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; Lowercase # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; Lowercase # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T 210A ; Lowercase # L& SCRIPT SMALL G 210E..210F ; Lowercase # L& [2] PLANCK CONSTANT..PLANCK CONSTANT OVER TWO PI 2113 ; Lowercase # L& SCRIPT SMALL L @@ -1407,8 +1519,8 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 2C6C ; Lowercase # L& LATIN SMALL LETTER Z WITH DESCENDER 2C71 ; Lowercase # L& LATIN SMALL LETTER V WITH RIGHT HOOK 2C73..2C74 ; Lowercase # L& [2] LATIN SMALL LETTER W WITH HOOK..LATIN SMALL LETTER V WITH CURL -2C76..2C7C ; Lowercase # L& [7] LATIN SMALL LETTER HALF H..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; Lowercase # Lm MODIFIER LETTER CAPITAL V +2C76..2C7B ; Lowercase # L& [6] LATIN SMALL LETTER HALF H..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; Lowercase # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C81 ; Lowercase # L& COPTIC SMALL LETTER ALFA 2C83 ; Lowercase # L& COPTIC SMALL LETTER VIDA 2C85 ; Lowercase # L& COPTIC SMALL LETTER GAMMA @@ -1461,7 +1573,10 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 2CE3..2CE4 ; Lowercase # L& [2] COPTIC SMALL LETTER OLD NUBIAN WAU..COPTIC SYMBOL KAI 2CEC ; Lowercase # L& COPTIC SMALL LETTER CRYPTOGRAMMIC SHEI 2CEE ; Lowercase # L& COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF3 ; Lowercase # L& COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; Lowercase # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Lowercase # L& GEORGIAN SMALL LETTER YN +2D2D ; Lowercase # L& GEORGIAN SMALL LETTER AEN A641 ; Lowercase # L& CYRILLIC SMALL LETTER ZEMLYA A643 ; Lowercase # L& CYRILLIC SMALL LETTER DZELO A645 ; Lowercase # L& CYRILLIC SMALL LETTER REVERSED DZE @@ -1547,11 +1662,13 @@ A787 ; Lowercase # L& LATIN SMALL LETTER INSULAR T A78C ; Lowercase # L& LATIN SMALL LETTER SALTILLO A78E ; Lowercase # L& LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A791 ; Lowercase # L& LATIN SMALL LETTER N WITH DESCENDER +A793 ; Lowercase # L& LATIN SMALL LETTER C WITH BAR A7A1 ; Lowercase # L& LATIN SMALL LETTER G WITH OBLIQUE STROKE A7A3 ; Lowercase # L& LATIN SMALL LETTER K WITH OBLIQUE STROKE A7A5 ; Lowercase # L& LATIN SMALL LETTER N WITH OBLIQUE STROKE A7A7 ; Lowercase # L& LATIN SMALL LETTER R WITH OBLIQUE STROKE A7A9 ; Lowercase # L& LATIN SMALL LETTER S WITH OBLIQUE STROKE +A7F8..A7F9 ; Lowercase # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; Lowercase # L& LATIN LETTER SMALL CAPITAL TURNED M FB00..FB06 ; Lowercase # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; Lowercase # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -1586,7 +1703,7 @@ FF41..FF5A ; Lowercase # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 1D7C4..1D7C9 ; Lowercase # L& [6] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC PI SYMBOL 1D7CB ; Lowercase # L& MATHEMATICAL BOLD SMALL DIGAMMA -# Total code points: 1918 +# Total code points: 1934 # ================================================ @@ -1861,6 +1978,8 @@ FF41..FF5A ; Lowercase # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 0526 ; Uppercase # L& CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER 0531..0556 ; Uppercase # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH 10A0..10C5 ; Uppercase # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Uppercase # L& GEORGIAN CAPITAL LETTER YN +10CD ; Uppercase # L& GEORGIAN CAPITAL LETTER AEN 1E00 ; Uppercase # L& LATIN CAPITAL LETTER A WITH RING BELOW 1E02 ; Uppercase # L& LATIN CAPITAL LETTER B WITH DOT ABOVE 1E04 ; Uppercase # L& LATIN CAPITAL LETTER B WITH DOT BELOW @@ -2077,6 +2196,7 @@ FF41..FF5A ; Lowercase # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 2CE2 ; Uppercase # L& COPTIC CAPITAL LETTER OLD NUBIAN WAU 2CEB ; Uppercase # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI 2CED ; Uppercase # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA +2CF2 ; Uppercase # L& COPTIC CAPITAL LETTER BOHAIRIC KHEI A640 ; Uppercase # L& CYRILLIC CAPITAL LETTER ZEMLYA A642 ; Uppercase # L& CYRILLIC CAPITAL LETTER DZELO A644 ; Uppercase # L& CYRILLIC CAPITAL LETTER REVERSED DZE @@ -2160,11 +2280,13 @@ A786 ; Uppercase # L& LATIN CAPITAL LETTER INSULAR T A78B ; Uppercase # L& LATIN CAPITAL LETTER SALTILLO A78D ; Uppercase # L& LATIN CAPITAL LETTER TURNED H A790 ; Uppercase # L& LATIN CAPITAL LETTER N WITH DESCENDER +A792 ; Uppercase # L& LATIN CAPITAL LETTER C WITH BAR A7A0 ; Uppercase # L& LATIN CAPITAL LETTER G WITH OBLIQUE STROKE A7A2 ; Uppercase # L& LATIN CAPITAL LETTER K WITH OBLIQUE STROKE A7A4 ; Uppercase # L& LATIN CAPITAL LETTER N WITH OBLIQUE STROKE A7A6 ; Uppercase # L& LATIN CAPITAL LETTER R WITH OBLIQUE STROKE A7A8 ; Uppercase # L& LATIN CAPITAL LETTER S WITH OBLIQUE STROKE +A7AA ; Uppercase # L& LATIN CAPITAL LETTER H WITH HOOK FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z 10400..10427 ; Uppercase # L& [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW 1D400..1D419 ; Uppercase # L& [26] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL BOLD CAPITAL Z @@ -2199,7 +2321,7 @@ FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH 1D790..1D7A8 ; Uppercase # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA 1D7CA ; Uppercase # L& MATHEMATICAL BOLD CAPITAL DIGAMMA -# Total code points: 1478 +# Total code points: 1483 # ================================================ @@ -2209,9 +2331,9 @@ FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH 0041..005A ; Cased # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z 0061..007A ; Cased # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z -00AA ; Cased # L& FEMININE ORDINAL INDICATOR +00AA ; Cased # Lo FEMININE ORDINAL INDICATOR 00B5 ; Cased # L& MICRO SIGN -00BA ; Cased # L& MASCULINE ORDINAL INDICATOR +00BA ; Cased # Lo MASCULINE ORDINAL INDICATOR 00C0..00D6 ; Cased # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS 00D8..00F6 ; Cased # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS 00F8..01BA ; Cased # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL @@ -2236,9 +2358,11 @@ FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH 0531..0556 ; Cased # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH 0561..0587 ; Cased # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN 10A0..10C5 ; Cased # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Cased # L& GEORGIAN CAPITAL LETTER YN +10CD ; Cased # L& GEORGIAN CAPITAL LETTER AEN 1D00..1D2B ; Cased # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D61 ; Cased # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D77 ; Cased # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D2C..1D6A ; Cased # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; Cased # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; Cased # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; Cased # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; Cased # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA @@ -2261,7 +2385,9 @@ FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH 1FE0..1FEC ; Cased # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA 1FF2..1FF4 ; Cased # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI 1FF6..1FFC ; Cased # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI -2090..2094 ; Cased # Lm [5] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA +2071 ; Cased # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; Cased # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; Cased # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T 2102 ; Cased # L& DOUBLE-STRUCK CAPITAL C 2107 ; Cased # L& EULER CONSTANT 210A..2113 ; Cased # L& [10] SCRIPT SMALL G..SCRIPT SMALL L @@ -2281,19 +2407,23 @@ FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH 24B6..24E9 ; Cased # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z 2C00..2C2E ; Cased # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 2C30..2C5E ; Cased # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE -2C60..2C7C ; Cased # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; Cased # Lm MODIFIER LETTER CAPITAL V +2C60..2C7B ; Cased # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; Cased # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; Cased # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI 2CEB..2CEE ; Cased # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; Cased # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; Cased # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Cased # L& GEORGIAN SMALL LETTER YN +2D2D ; Cased # L& GEORGIAN SMALL LETTER AEN A640..A66D ; Cased # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O A680..A697 ; Cased # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE A722..A76F ; Cased # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON A770 ; Cased # Lm MODIFIER LETTER US A771..A787 ; Cased # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A78B..A78E ; Cased # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A791 ; Cased # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER -A7A0..A7A9 ; Cased # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE +A790..A793 ; Cased # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR +A7A0..A7AA ; Cased # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK +A7F8..A7F9 ; Cased # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; Cased # L& LATIN LETTER SMALL CAPITAL TURNED M FB00..FB06 ; Cased # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; Cased # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -2331,7 +2461,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1D7AA..1D7C2 ; Cased # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA 1D7C4..1D7CB ; Cased # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA -# Total code points: 3427 +# Total code points: 3448 # ================================================ @@ -2377,7 +2507,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 05C4..05C5 ; Case_Ignorable # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT 05C7 ; Case_Ignorable # Mn HEBREW POINT QAMATS QATAN 05F4 ; Case_Ignorable # Po HEBREW PUNCTUATION GERSHAYIM -0600..0603 ; Case_Ignorable # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA +0600..0604 ; Case_Ignorable # Cf [5] ARABIC NUMBER SIGN..ARABIC SIGN SAMVAT 0610..061A ; Case_Ignorable # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA 0640 ; Case_Ignorable # Lm ARABIC TATWEEL 064B..065F ; Case_Ignorable # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW @@ -2403,6 +2533,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 0828 ; Case_Ignorable # Lm SAMARITAN MODIFIER LETTER I 0829..082D ; Case_Ignorable # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; Case_Ignorable # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +08E4..08FE ; Case_Ignorable # Mn [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT 0900..0902 ; Case_Ignorable # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 093A ; Case_Ignorable # Mn DEVANAGARI VOWEL SIGN OE 093C ; Case_Ignorable # Mn DEVANAGARI SIGN NUKTA @@ -2492,7 +2623,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1732..1734 ; Case_Ignorable # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD 1752..1753 ; Case_Ignorable # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U 1772..1773 ; Case_Ignorable # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U -17B4..17B5 ; Case_Ignorable # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B4..17B5 ; Case_Ignorable # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 17B7..17BD ; Case_Ignorable # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA 17C6 ; Case_Ignorable # Mn KHMER SIGN NIKAHIT 17C9..17D3 ; Case_Ignorable # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT @@ -2523,6 +2654,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1B80..1B81 ; Case_Ignorable # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR 1BA2..1BA5 ; Case_Ignorable # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU 1BA8..1BA9 ; Case_Ignorable # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAB ; Case_Ignorable # Mn SUNDANESE SIGN VIRAMA 1BE6 ; Case_Ignorable # Mn BATAK SIGN TOMPI 1BE8..1BE9 ; Case_Ignorable # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE 1BED ; Case_Ignorable # Mn BATAK VOWEL SIGN KARO O @@ -2534,7 +2666,8 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1CD4..1CE0 ; Case_Ignorable # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA 1CE2..1CE8 ; Case_Ignorable # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL 1CED ; Case_Ignorable # Mn VEDIC SIGN TIRYAK -1D2C..1D61 ; Case_Ignorable # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI +1CF4 ; Case_Ignorable # Mn VEDIC TONE CANDRA ABOVE +1D2C..1D6A ; Case_Ignorable # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI 1D78 ; Case_Ignorable # Lm MODIFIER LETTER CYRILLIC EN 1D9B..1DBF ; Case_Ignorable # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA 1DC0..1DE6 ; Case_Ignorable # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z @@ -2561,14 +2694,14 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 20E1 ; Case_Ignorable # Mn COMBINING LEFT RIGHT ARROW ABOVE 20E2..20E4 ; Case_Ignorable # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE 20E5..20F0 ; Case_Ignorable # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE -2C7D ; Case_Ignorable # Lm MODIFIER LETTER CAPITAL V +2C7C..2C7D ; Case_Ignorable # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2CEF..2CF1 ; Case_Ignorable # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS 2D6F ; Case_Ignorable # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D7F ; Case_Ignorable # Mn TIFINAGH CONSONANT JOINER 2DE0..2DFF ; Case_Ignorable # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS 2E2F ; Case_Ignorable # Lm VERTICAL TILDE 3005 ; Case_Ignorable # Lm IDEOGRAPHIC ITERATION MARK -302A..302F ; Case_Ignorable # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK +302A..302D ; Case_Ignorable # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK 3031..3035 ; Case_Ignorable # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF 303B ; Case_Ignorable # Lm VERTICAL IDEOGRAPHIC ITERATION MARK 3099..309A ; Case_Ignorable # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK @@ -2580,8 +2713,9 @@ A4F8..A4FD ; Case_Ignorable # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER T A60C ; Case_Ignorable # Lm VAI SYLLABLE LENGTHENER A66F ; Case_Ignorable # Mn COMBINING CYRILLIC VZMET A670..A672 ; Case_Ignorable # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN -A67C..A67D ; Case_Ignorable # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A674..A67D ; Case_Ignorable # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK A67F ; Case_Ignorable # Lm CYRILLIC PAYEROK +A69F ; Case_Ignorable # Mn COMBINING CYRILLIC LETTER IOTIFIED E A6F0..A6F1 ; Case_Ignorable # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS A700..A716 ; Case_Ignorable # Sk [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR A717..A71F ; Case_Ignorable # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK @@ -2589,6 +2723,7 @@ A720..A721 ; Case_Ignorable # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE.. A770 ; Case_Ignorable # Lm MODIFIER LETTER US A788 ; Case_Ignorable # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; Case_Ignorable # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN +A7F8..A7F9 ; Case_Ignorable # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A802 ; Case_Ignorable # Mn SYLOTI NAGRI SIGN DVISVARA A806 ; Case_Ignorable # Mn SYLOTI NAGRI SIGN HASANTA A80B ; Case_Ignorable # Mn SYLOTI NAGRI SIGN ANUSVARA @@ -2614,6 +2749,9 @@ AAB7..AAB8 ; Case_Ignorable # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA AABE..AABF ; Case_Ignorable # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK AAC1 ; Case_Ignorable # Mn TAI VIET TONE MAI THO AADD ; Case_Ignorable # Lm TAI VIET SYMBOL SAM +AAEC..AAED ; Case_Ignorable # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAF3..AAF4 ; Case_Ignorable # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF6 ; Case_Ignorable # Mn MEETEI MAYEK VIRAMA ABE5 ; Case_Ignorable # Mn MEETEI MAYEK VOWEL SIGN ANAP ABE8 ; Case_Ignorable # Mn MEETEI MAYEK VOWEL SIGN UNAP ABED ; Case_Ignorable # Mn MEETEI MAYEK APUN IYEK @@ -2646,6 +2784,17 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 110B3..110B6 ; Case_Ignorable # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B9..110BA ; Case_Ignorable # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA 110BD ; Case_Ignorable # Cf KAITHI NUMBER SIGN +11100..11102 ; Case_Ignorable # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; Case_Ignorable # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112D..11134 ; Case_Ignorable # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11180..11181 ; Case_Ignorable # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +111B6..111BE ; Case_Ignorable # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +116AB ; Case_Ignorable # Mn TAKRI SIGN ANUSVARA +116AD ; Case_Ignorable # Mn TAKRI VOWEL SIGN AA +116B0..116B5 ; Case_Ignorable # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B7 ; Case_Ignorable # Mn TAKRI SIGN NUKTA +16F8F..16F92 ; Case_Ignorable # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16F93..16F9F ; Case_Ignorable # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 1D167..1D169 ; Case_Ignorable # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 1D173..1D17A ; Case_Ignorable # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE 1D17B..1D182 ; Case_Ignorable # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE @@ -2656,7 +2805,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1692 +# Total code points: 1799 # ================================================ @@ -2932,6 +3081,8 @@ E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELEC 0526 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER 0531..0556 ; Changes_When_Lowercased # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH 10A0..10C5 ; Changes_When_Lowercased # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Changes_When_Lowercased # L& GEORGIAN CAPITAL LETTER YN +10CD ; Changes_When_Lowercased # L& GEORGIAN CAPITAL LETTER AEN 1E00 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH RING BELOW 1E02 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER B WITH DOT ABOVE 1E04 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER B WITH DOT BELOW @@ -3141,6 +3292,7 @@ E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELEC 2CE2 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER OLD NUBIAN WAU 2CEB ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI 2CED ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA +2CF2 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER BOHAIRIC KHEI A640 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER ZEMLYA A642 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER DZELO A644 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER REVERSED DZE @@ -3224,15 +3376,17 @@ A786 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER INSULAR A78B ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER SALTILLO A78D ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER TURNED H A790 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER N WITH DESCENDER +A792 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER C WITH BAR A7A0 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER G WITH OBLIQUE STROKE A7A2 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER K WITH OBLIQUE STROKE A7A4 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER N WITH OBLIQUE STROKE A7A6 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER R WITH OBLIQUE STROKE A7A8 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER S WITH OBLIQUE STROKE +A7AA ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER H WITH HOOK FF21..FF3A ; Changes_When_Lowercased # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z 10400..10427 ; Changes_When_Lowercased # L& [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW -# Total code points: 1038 +# Total code points: 1043 # ================================================ @@ -3390,7 +3544,7 @@ FF21..FF3A ; Changes_When_Lowercased # L& [26] FULLWIDTH LATIN CAPITAL LETTE 025B ; Changes_When_Uppercased # L& LATIN SMALL LETTER OPEN E 0260 ; Changes_When_Uppercased # L& LATIN SMALL LETTER G WITH HOOK 0263 ; Changes_When_Uppercased # L& LATIN SMALL LETTER GAMMA -0265 ; Changes_When_Uppercased # L& LATIN SMALL LETTER TURNED H +0265..0266 ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER TURNED H..LATIN SMALL LETTER H WITH HOOK 0268..0269 ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER I WITH STROKE..LATIN SMALL LETTER IOTA 026B ; Changes_When_Uppercased # L& LATIN SMALL LETTER L WITH MIDDLE TILDE 026F ; Changes_When_Uppercased # L& LATIN SMALL LETTER TURNED M @@ -3731,7 +3885,10 @@ FF21..FF3A ; Changes_When_Lowercased # L& [26] FULLWIDTH LATIN CAPITAL LETTE 2CE3 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER OLD NUBIAN WAU 2CEC ; Changes_When_Uppercased # L& COPTIC SMALL LETTER CRYPTOGRAMMIC SHEI 2CEE ; Changes_When_Uppercased # L& COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF3 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; Changes_When_Uppercased # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Changes_When_Uppercased # L& GEORGIAN SMALL LETTER YN +2D2D ; Changes_When_Uppercased # L& GEORGIAN SMALL LETTER AEN A641 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER ZEMLYA A643 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER DZELO A645 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER REVERSED DZE @@ -3814,6 +3971,7 @@ A785 ; Changes_When_Uppercased # L& LATIN SMALL LETTER INSULAR S A787 ; Changes_When_Uppercased # L& LATIN SMALL LETTER INSULAR T A78C ; Changes_When_Uppercased # L& LATIN SMALL LETTER SALTILLO A791 ; Changes_When_Uppercased # L& LATIN SMALL LETTER N WITH DESCENDER +A793 ; Changes_When_Uppercased # L& LATIN SMALL LETTER C WITH BAR A7A1 ; Changes_When_Uppercased # L& LATIN SMALL LETTER G WITH OBLIQUE STROKE A7A3 ; Changes_When_Uppercased # L& LATIN SMALL LETTER K WITH OBLIQUE STROKE A7A5 ; Changes_When_Uppercased # L& LATIN SMALL LETTER N WITH OBLIQUE STROKE @@ -3824,7 +3982,7 @@ FB13..FB17 ; Changes_When_Uppercased # L& [5] ARMENIAN SMALL LIGATURE MEN N FF41..FF5A ; Changes_When_Uppercased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z 10428..1044F ; Changes_When_Uppercased # L& [40] DESERET SMALL LETTER LONG I..DESERET SMALL LETTER EW -# Total code points: 1121 +# Total code points: 1126 # ================================================ @@ -3983,7 +4141,7 @@ FF41..FF5A ; Changes_When_Uppercased # L& [26] FULLWIDTH LATIN SMALL LETTER 025B ; Changes_When_Titlecased # L& LATIN SMALL LETTER OPEN E 0260 ; Changes_When_Titlecased # L& LATIN SMALL LETTER G WITH HOOK 0263 ; Changes_When_Titlecased # L& LATIN SMALL LETTER GAMMA -0265 ; Changes_When_Titlecased # L& LATIN SMALL LETTER TURNED H +0265..0266 ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER TURNED H..LATIN SMALL LETTER H WITH HOOK 0268..0269 ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER I WITH STROKE..LATIN SMALL LETTER IOTA 026B ; Changes_When_Titlecased # L& LATIN SMALL LETTER L WITH MIDDLE TILDE 026F ; Changes_When_Titlecased # L& LATIN SMALL LETTER TURNED M @@ -4324,7 +4482,10 @@ FF41..FF5A ; Changes_When_Uppercased # L& [26] FULLWIDTH LATIN SMALL LETTER 2CE3 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER OLD NUBIAN WAU 2CEC ; Changes_When_Titlecased # L& COPTIC SMALL LETTER CRYPTOGRAMMIC SHEI 2CEE ; Changes_When_Titlecased # L& COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF3 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; Changes_When_Titlecased # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Changes_When_Titlecased # L& GEORGIAN SMALL LETTER YN +2D2D ; Changes_When_Titlecased # L& GEORGIAN SMALL LETTER AEN A641 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER ZEMLYA A643 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER DZELO A645 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER REVERSED DZE @@ -4407,6 +4568,7 @@ A785 ; Changes_When_Titlecased # L& LATIN SMALL LETTER INSULAR S A787 ; Changes_When_Titlecased # L& LATIN SMALL LETTER INSULAR T A78C ; Changes_When_Titlecased # L& LATIN SMALL LETTER SALTILLO A791 ; Changes_When_Titlecased # L& LATIN SMALL LETTER N WITH DESCENDER +A793 ; Changes_When_Titlecased # L& LATIN SMALL LETTER C WITH BAR A7A1 ; Changes_When_Titlecased # L& LATIN SMALL LETTER G WITH OBLIQUE STROKE A7A3 ; Changes_When_Titlecased # L& LATIN SMALL LETTER K WITH OBLIQUE STROKE A7A5 ; Changes_When_Titlecased # L& LATIN SMALL LETTER N WITH OBLIQUE STROKE @@ -4417,7 +4579,7 @@ FB13..FB17 ; Changes_When_Titlecased # L& [5] ARMENIAN SMALL LIGATURE MEN N FF41..FF5A ; Changes_When_Titlecased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z 10428..1044F ; Changes_When_Titlecased # L& [40] DESERET SMALL LETTER LONG I..DESERET SMALL LETTER EW -# Total code points: 1094 +# Total code points: 1099 # ================================================ @@ -4700,6 +4862,8 @@ FF41..FF5A ; Changes_When_Titlecased # L& [26] FULLWIDTH LATIN SMALL LETTER 0531..0556 ; Changes_When_Casefolded # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH 0587 ; Changes_When_Casefolded # L& ARMENIAN SMALL LIGATURE ECH YIWN 10A0..10C5 ; Changes_When_Casefolded # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Changes_When_Casefolded # L& GEORGIAN CAPITAL LETTER YN +10CD ; Changes_When_Casefolded # L& GEORGIAN CAPITAL LETTER AEN 1E00 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH RING BELOW 1E02 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER B WITH DOT ABOVE 1E04 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER B WITH DOT BELOW @@ -4911,6 +5075,7 @@ FF41..FF5A ; Changes_When_Titlecased # L& [26] FULLWIDTH LATIN SMALL LETTER 2CE2 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER OLD NUBIAN WAU 2CEB ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI 2CED ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA +2CF2 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER BOHAIRIC KHEI A640 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER ZEMLYA A642 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER DZELO A644 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER REVERSED DZE @@ -4994,17 +5159,19 @@ A786 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER INSULAR A78B ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER SALTILLO A78D ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER TURNED H A790 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER N WITH DESCENDER +A792 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER C WITH BAR A7A0 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER G WITH OBLIQUE STROKE A7A2 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER K WITH OBLIQUE STROKE A7A4 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER N WITH OBLIQUE STROKE A7A6 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER R WITH OBLIQUE STROKE A7A8 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER S WITH OBLIQUE STROKE +A7AA ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER H WITH HOOK FB00..FB06 ; Changes_When_Casefolded # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; Changes_When_Casefolded # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH FF21..FF3A ; Changes_When_Casefolded # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z 10400..10427 ; Changes_When_Casefolded # L& [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW -# Total code points: 1102 +# Total code points: 1107 # ================================================ @@ -5033,7 +5200,7 @@ FF21..FF3A ; Changes_When_Casefolded # L& [26] FULLWIDTH LATIN CAPITAL LETTE 025B ; Changes_When_Casemapped # L& LATIN SMALL LETTER OPEN E 0260 ; Changes_When_Casemapped # L& LATIN SMALL LETTER G WITH HOOK 0263 ; Changes_When_Casemapped # L& LATIN SMALL LETTER GAMMA -0265 ; Changes_When_Casemapped # L& LATIN SMALL LETTER TURNED H +0265..0266 ; Changes_When_Casemapped # L& [2] LATIN SMALL LETTER TURNED H..LATIN SMALL LETTER H WITH HOOK 0268..0269 ; Changes_When_Casemapped # L& [2] LATIN SMALL LETTER I WITH STROKE..LATIN SMALL LETTER IOTA 026B ; Changes_When_Casemapped # L& LATIN SMALL LETTER L WITH MIDDLE TILDE 026F ; Changes_When_Casemapped # L& LATIN SMALL LETTER TURNED M @@ -5061,6 +5228,8 @@ FF21..FF3A ; Changes_When_Casefolded # L& [26] FULLWIDTH LATIN CAPITAL LETTE 0531..0556 ; Changes_When_Casemapped # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH 0561..0587 ; Changes_When_Casemapped # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN 10A0..10C5 ; Changes_When_Casemapped # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Changes_When_Casemapped # L& GEORGIAN CAPITAL LETTER YN +10CD ; Changes_When_Casemapped # L& GEORGIAN CAPITAL LETTER AEN 1D79 ; Changes_When_Casemapped # L& LATIN SMALL LETTER INSULAR G 1D7D ; Changes_When_Casemapped # L& LATIN SMALL LETTER P WITH STROKE 1E00..1E9B ; Changes_When_Casemapped # L& [156] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER LONG S WITH DOT ABOVE @@ -5098,22 +5267,25 @@ FF21..FF3A ; Changes_When_Casefolded # L& [26] FULLWIDTH LATIN CAPITAL LETTE 2C75..2C76 ; Changes_When_Casemapped # L& [2] LATIN CAPITAL LETTER HALF H..LATIN SMALL LETTER HALF H 2C7E..2CE3 ; Changes_When_Casemapped # L& [102] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SMALL LETTER OLD NUBIAN WAU 2CEB..2CEE ; Changes_When_Casemapped # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; Changes_When_Casemapped # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; Changes_When_Casemapped # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Changes_When_Casemapped # L& GEORGIAN SMALL LETTER YN +2D2D ; Changes_When_Casemapped # L& GEORGIAN SMALL LETTER AEN A640..A66D ; Changes_When_Casemapped # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O A680..A697 ; Changes_When_Casemapped # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE A722..A72F ; Changes_When_Casemapped # L& [14] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CUATRILLO WITH COMMA A732..A76F ; Changes_When_Casemapped # L& [62] LATIN CAPITAL LETTER AA..LATIN SMALL LETTER CON A779..A787 ; Changes_When_Casemapped # L& [15] LATIN CAPITAL LETTER INSULAR D..LATIN SMALL LETTER INSULAR T A78B..A78D ; Changes_When_Casemapped # L& [3] LATIN CAPITAL LETTER SALTILLO..LATIN CAPITAL LETTER TURNED H -A790..A791 ; Changes_When_Casemapped # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER -A7A0..A7A9 ; Changes_When_Casemapped # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE +A790..A793 ; Changes_When_Casemapped # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR +A7A0..A7AA ; Changes_When_Casemapped # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK FB00..FB06 ; Changes_When_Casemapped # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; Changes_When_Casemapped # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH FF21..FF3A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z 10400..1044F ; Changes_When_Casemapped # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW -# Total code points: 2128 +# Total code points: 2138 # ================================================ @@ -5128,9 +5300,9 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 0041..005A ; ID_Start # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z 0061..007A ; ID_Start # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z -00AA ; ID_Start # L& FEMININE ORDINAL INDICATOR +00AA ; ID_Start # Lo FEMININE ORDINAL INDICATOR 00B5 ; ID_Start # L& MICRO SIGN -00BA ; ID_Start # L& MASCULINE ORDINAL INDICATOR +00BA ; ID_Start # Lo MASCULINE ORDINAL INDICATOR 00C0..00D6 ; ID_Start # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS 00D8..00F6 ; ID_Start # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS 00F8..01BA ; ID_Start # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL @@ -5184,6 +5356,8 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 0824 ; ID_Start # Lm SAMARITAN MODIFIER LETTER SHORT A 0828 ; ID_Start # Lm SAMARITAN MODIFIER LETTER I 0840..0858 ; ID_Start # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +08A0 ; ID_Start # Lo ARABIC LETTER BEH WITH SMALL V BELOW +08A2..08AC ; ID_Start # Lo [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH 0904..0939 ; ID_Start # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA 093D ; ID_Start # Lo DEVANAGARI SIGN AVAGRAHA 0950 ; ID_Start # Lo DEVANAGARI OM @@ -5291,7 +5465,7 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 0EBD ; ID_Start # Lo LAO SEMIVOWEL SIGN NYO 0EC0..0EC4 ; ID_Start # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI 0EC6 ; ID_Start # Lm LAO KO LA -0EDC..0EDD ; ID_Start # Lo [2] LAO HO NO..LAO HO MO +0EDC..0EDF ; ID_Start # Lo [4] LAO HO NO..LAO LETTER KHMU NYO 0F00 ; ID_Start # Lo TIBETAN SYLLABLE OM 0F40..0F47 ; ID_Start # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA 0F49..0F6C ; ID_Start # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA @@ -5306,9 +5480,11 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 1075..1081 ; ID_Start # Lo [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA 108E ; ID_Start # Lo MYANMAR LETTER RUMAI PALAUNG FA 10A0..10C5 ; ID_Start # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; ID_Start # L& GEORGIAN CAPITAL LETTER YN +10CD ; ID_Start # L& GEORGIAN CAPITAL LETTER AEN 10D0..10FA ; ID_Start # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN 10FC ; ID_Start # Lm MODIFIER LETTER GEORGIAN NAR -1100..1248 ; ID_Start # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +10FD..1248 ; ID_Start # Lo [332] GEORGIAN LETTER AEN..ETHIOPIC SYLLABLE QWA 124A..124D ; ID_Start # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE 1250..1256 ; ID_Start # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO 1258 ; ID_Start # Lo ETHIOPIC SYLLABLE QHWA @@ -5358,16 +5534,17 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 1B45..1B4B ; ID_Start # Lo [7] BALINESE LETTER KAF SASAK..BALINESE LETTER ASYURA SASAK 1B83..1BA0 ; ID_Start # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA 1BAE..1BAF ; ID_Start # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA -1BC0..1BE5 ; ID_Start # Lo [38] BATAK LETTER A..BATAK LETTER U +1BBA..1BE5 ; ID_Start # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U 1C00..1C23 ; ID_Start # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A 1C4D..1C4F ; ID_Start # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA 1C5A..1C77 ; ID_Start # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; ID_Start # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD 1CE9..1CEC ; ID_Start # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL 1CEE..1CF1 ; ID_Start # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA +1CF5..1CF6 ; ID_Start # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 1D00..1D2B ; ID_Start # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D61 ; ID_Start # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D77 ; ID_Start # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D2C..1D6A ; ID_Start # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; ID_Start # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; ID_Start # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; ID_Start # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; ID_Start # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA @@ -5415,12 +5592,15 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 2185..2188 ; ID_Start # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND 2C00..2C2E ; ID_Start # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 2C30..2C5E ; ID_Start # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE -2C60..2C7C ; ID_Start # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; ID_Start # Lm MODIFIER LETTER CAPITAL V +2C60..2C7B ; ID_Start # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; ID_Start # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; ID_Start # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI 2CEB..2CEE ; ID_Start # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; ID_Start # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; ID_Start # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE -2D30..2D65 ; ID_Start # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D27 ; ID_Start # L& GEORGIAN SMALL LETTER YN +2D2D ; ID_Start # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; ID_Start # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO 2D6F ; ID_Start # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D80..2D96 ; ID_Start # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE 2DA0..2DA6 ; ID_Start # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO @@ -5451,7 +5631,7 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 31A0..31BA ; ID_Start # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY 31F0..31FF ; ID_Start # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3400..4DB5 ; ID_Start # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 -4E00..9FCB ; ID_Start # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB +4E00..9FCC ; ID_Start # Lo [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC A000..A014 ; ID_Start # Lo [21] YI SYLLABLE IT..YI SYLLABLE E A015 ; ID_Start # Lm YI SYLLABLE WU A016..A48C ; ID_Start # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR @@ -5473,8 +5653,9 @@ A770 ; ID_Start # Lm MODIFIER LETTER US A771..A787 ; ID_Start # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A788 ; ID_Start # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; ID_Start # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A791 ; ID_Start # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER -A7A0..A7A9 ; ID_Start # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE +A790..A793 ; ID_Start # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR +A7A0..A7AA ; ID_Start # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK +A7F8..A7F9 ; ID_Start # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; ID_Start # L& LATIN LETTER SMALL CAPITAL TURNED M A7FB..A801 ; ID_Start # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I A803..A805 ; ID_Start # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O @@ -5504,6 +5685,9 @@ AAC0 ; ID_Start # Lo TAI VIET TONE MAI NUENG AAC2 ; ID_Start # Lo TAI VIET TONE MAI SONG AADB..AADC ; ID_Start # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG AADD ; ID_Start # Lm TAI VIET SYMBOL SAM +AAE0..AAEA ; ID_Start # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAF2 ; ID_Start # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; ID_Start # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK AB01..AB06 ; ID_Start # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO AB09..AB0E ; ID_Start # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO AB11..AB16 ; ID_Start # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO @@ -5513,8 +5697,7 @@ ABC0..ABE2 ; ID_Start # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER AC00..D7A3 ; ID_Start # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH D7B0..D7C6 ; ID_Start # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E D7CB..D7FB ; ID_Start # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH -F900..FA2D ; ID_Start # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; ID_Start # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +F900..FA6D ; ID_Start # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; ID_Start # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB00..FB06 ; ID_Start # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; ID_Start # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -5572,6 +5755,8 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1083F..10855 ; ID_Start # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW 10900..10915 ; ID_Start # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; ID_Start # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10980..109B7 ; ID_Start # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; ID_Start # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; ID_Start # Lo KHAROSHTHI LETTER A 10A10..10A13 ; ID_Start # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA 10A15..10A17 ; ID_Start # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA @@ -5583,10 +5768,18 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10C00..10C48 ; ID_Start # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH 11003..11037 ; ID_Start # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA 11083..110AF ; ID_Start # Lo [45] KAITHI LETTER A..KAITHI LETTER HA +110D0..110E8 ; ID_Start # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +11103..11126 ; ID_Start # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11183..111B2 ; ID_Start # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111C1..111C4 ; ID_Start # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +11680..116AA ; ID_Start # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA 12000..1236E ; ID_Start # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM 12400..12462 ; ID_Start # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER 13000..1342E ; ID_Start # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 16800..16A38 ; ID_Start # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16F00..16F44 ; ID_Start # Lo [69] MIAO LETTER PA..MIAO LETTER HHA +16F50 ; ID_Start # Lo MIAO LETTER NASALIZATION +16F93..16F9F ; ID_Start # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 1B000..1B001 ; ID_Start # Lo [2] KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE 1D400..1D454 ; ID_Start # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G 1D456..1D49C ; ID_Start # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A @@ -5618,12 +5811,45 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1D78A..1D7A8 ; ID_Start # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA 1D7AA..1D7C2 ; ID_Start # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA 1D7C4..1D7CB ; ID_Start # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1EE00..1EE03 ; ID_Start # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; ID_Start # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; ID_Start # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; ID_Start # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; ID_Start # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; ID_Start # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; ID_Start # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; ID_Start # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; ID_Start # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; ID_Start # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; ID_Start # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; ID_Start # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; ID_Start # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; ID_Start # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; ID_Start # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; ID_Start # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; ID_Start # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; ID_Start # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; ID_Start # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; ID_Start # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; ID_Start # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; ID_Start # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; ID_Start # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; ID_Start # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; ID_Start # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; ID_Start # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; ID_Start # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; ID_Start # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; ID_Start # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; ID_Start # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; ID_Start # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; ID_Start # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; ID_Start # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 20000..2A6D6 ; ID_Start # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 2A700..2B734 ; ID_Start # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 2B740..2B81D ; ID_Start # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2F800..2FA1D ; ID_Start # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 100747 +# Total code points: 101240 # ================================================ @@ -5641,10 +5867,10 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 0041..005A ; ID_Continue # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z 005F ; ID_Continue # Pc LOW LINE 0061..007A ; ID_Continue # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z -00AA ; ID_Continue # L& FEMININE ORDINAL INDICATOR +00AA ; ID_Continue # Lo FEMININE ORDINAL INDICATOR 00B5 ; ID_Continue # L& MICRO SIGN 00B7 ; ID_Continue # Po MIDDLE DOT -00BA ; ID_Continue # L& MASCULINE ORDINAL INDICATOR +00BA ; ID_Continue # Lo MASCULINE ORDINAL INDICATOR 00C0..00D6 ; ID_Continue # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS 00D8..00F6 ; ID_Continue # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS 00F8..01BA ; ID_Continue # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL @@ -5725,6 +5951,9 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 0829..082D ; ID_Continue # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0840..0858 ; ID_Continue # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 0859..085B ; ID_Continue # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +08A0 ; ID_Continue # Lo ARABIC LETTER BEH WITH SMALL V BELOW +08A2..08AC ; ID_Continue # Lo [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH +08E4..08FE ; ID_Continue # Mn [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT 0900..0902 ; ID_Continue # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 0903 ; ID_Continue # Mc DEVANAGARI SIGN VISARGA 0904..0939 ; ID_Continue # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA @@ -5946,7 +6175,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 0EC6 ; ID_Continue # Lm LAO KO LA 0EC8..0ECD ; ID_Continue # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA 0ED0..0ED9 ; ID_Continue # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE -0EDC..0EDD ; ID_Continue # Lo [2] LAO HO NO..LAO HO MO +0EDC..0EDF ; ID_Continue # Lo [4] LAO HO NO..LAO LETTER KHMU NYO 0F00 ; ID_Continue # Lo TIBETAN SYLLABLE OM 0F18..0F19 ; ID_Continue # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS 0F20..0F29 ; ID_Continue # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE @@ -5998,9 +6227,11 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 109A..109C ; ID_Continue # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A 109D ; ID_Continue # Mn MYANMAR VOWEL SIGN AITON AI 10A0..10C5 ; ID_Continue # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; ID_Continue # L& GEORGIAN CAPITAL LETTER YN +10CD ; ID_Continue # L& GEORGIAN CAPITAL LETTER AEN 10D0..10FA ; ID_Continue # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN 10FC ; ID_Continue # Lm MODIFIER LETTER GEORGIAN NAR -1100..1248 ; ID_Continue # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +10FD..1248 ; ID_Continue # Lo [332] GEORGIAN LETTER AEN..ETHIOPIC SYLLABLE QWA 124A..124D ; ID_Continue # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE 1250..1256 ; ID_Continue # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO 1258 ; ID_Continue # Lo ETHIOPIC SYLLABLE QHWA @@ -6036,6 +6267,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 176E..1770 ; ID_Continue # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA 1772..1773 ; ID_Continue # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U 1780..17B3 ; ID_Continue # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU +17B4..17B5 ; ID_Continue # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 17B6 ; ID_Continue # Mc KHMER VOWEL SIGN AA 17B7..17BD ; ID_Continue # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA 17BE..17C5 ; ID_Continue # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU @@ -6114,9 +6346,11 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1BA6..1BA7 ; ID_Continue # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG 1BA8..1BA9 ; ID_Continue # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG 1BAA ; ID_Continue # Mc SUNDANESE SIGN PAMAAEH +1BAB ; ID_Continue # Mn SUNDANESE SIGN VIRAMA +1BAC..1BAD ; ID_Continue # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA 1BAE..1BAF ; ID_Continue # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA 1BB0..1BB9 ; ID_Continue # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE -1BC0..1BE5 ; ID_Continue # Lo [38] BATAK LETTER A..BATAK LETTER U +1BBA..1BE5 ; ID_Continue # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U 1BE6 ; ID_Continue # Mn BATAK SIGN TOMPI 1BE7 ; ID_Continue # Mc BATAK VOWEL SIGN E 1BE8..1BE9 ; ID_Continue # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE @@ -6142,10 +6376,12 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1CE9..1CEC ; ID_Continue # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL 1CED ; ID_Continue # Mn VEDIC SIGN TIRYAK 1CEE..1CF1 ; ID_Continue # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA -1CF2 ; ID_Continue # Mc VEDIC SIGN ARDHAVISARGA +1CF2..1CF3 ; ID_Continue # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF4 ; ID_Continue # Mn VEDIC TONE CANDRA ABOVE +1CF5..1CF6 ; ID_Continue # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 1D00..1D2B ; ID_Continue # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D61 ; ID_Continue # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D77 ; ID_Continue # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D2C..1D6A ; ID_Continue # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; ID_Continue # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; ID_Continue # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; ID_Continue # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; ID_Continue # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA @@ -6200,13 +6436,16 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 2185..2188 ; ID_Continue # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND 2C00..2C2E ; ID_Continue # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 2C30..2C5E ; ID_Continue # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE -2C60..2C7C ; ID_Continue # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; ID_Continue # Lm MODIFIER LETTER CAPITAL V +2C60..2C7B ; ID_Continue # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; ID_Continue # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; ID_Continue # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI 2CEB..2CEE ; ID_Continue # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA 2CEF..2CF1 ; ID_Continue # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2CF2..2CF3 ; ID_Continue # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; ID_Continue # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE -2D30..2D65 ; ID_Continue # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D27 ; ID_Continue # L& GEORGIAN SMALL LETTER YN +2D2D ; ID_Continue # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; ID_Continue # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO 2D6F ; ID_Continue # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D7F ; ID_Continue # Mn TIFINAGH CONSONANT JOINER 2D80..2D96 ; ID_Continue # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE @@ -6223,7 +6462,8 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 3006 ; ID_Continue # Lo IDEOGRAPHIC CLOSING MARK 3007 ; ID_Continue # Nl IDEOGRAPHIC NUMBER ZERO 3021..3029 ; ID_Continue # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE -302A..302F ; ID_Continue # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK +302A..302D ; ID_Continue # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; ID_Continue # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK 3031..3035 ; ID_Continue # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF 3038..303A ; ID_Continue # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY 303B ; ID_Continue # Lm VERTICAL IDEOGRAPHIC ITERATION MARK @@ -6241,7 +6481,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 31A0..31BA ; ID_Continue # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY 31F0..31FF ; ID_Continue # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3400..4DB5 ; ID_Continue # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 -4E00..9FCB ; ID_Continue # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB +4E00..9FCC ; ID_Continue # Lo [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC A000..A014 ; ID_Continue # Lo [21] YI SYLLABLE IT..YI SYLLABLE E A015 ; ID_Continue # Lm YI SYLLABLE WU A016..A48C ; ID_Continue # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR @@ -6255,9 +6495,10 @@ A62A..A62B ; ID_Continue # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE A640..A66D ; ID_Continue # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O A66E ; ID_Continue # Lo CYRILLIC LETTER MULTIOCULAR O A66F ; ID_Continue # Mn COMBINING CYRILLIC VZMET -A67C..A67D ; ID_Continue # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A674..A67D ; ID_Continue # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK A67F ; ID_Continue # Lm CYRILLIC PAYEROK A680..A697 ; ID_Continue # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE +A69F ; ID_Continue # Mn COMBINING CYRILLIC LETTER IOTIFIED E A6A0..A6E5 ; ID_Continue # Lo [70] BAMUM LETTER A..BAMUM LETTER KI A6E6..A6EF ; ID_Continue # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM A6F0..A6F1 ; ID_Continue # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS @@ -6267,8 +6508,9 @@ A770 ; ID_Continue # Lm MODIFIER LETTER US A771..A787 ; ID_Continue # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A788 ; ID_Continue # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; ID_Continue # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A791 ; ID_Continue # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER -A7A0..A7A9 ; ID_Continue # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE +A790..A793 ; ID_Continue # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR +A7A0..A7AA ; ID_Continue # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK +A7F8..A7F9 ; ID_Continue # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; ID_Continue # L& LATIN LETTER SMALL CAPITAL TURNED M A7FB..A801 ; ID_Continue # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I A802 ; ID_Continue # Mn SYLOTI NAGRI SIGN DVISVARA @@ -6337,6 +6579,14 @@ AAC1 ; ID_Continue # Mn TAI VIET TONE MAI THO AAC2 ; ID_Continue # Lo TAI VIET TONE MAI SONG AADB..AADC ; ID_Continue # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG AADD ; ID_Continue # Lm TAI VIET SYMBOL SAM +AAE0..AAEA ; ID_Continue # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAEB ; ID_Continue # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; ID_Continue # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; ID_Continue # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF2 ; ID_Continue # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; ID_Continue # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF5 ; ID_Continue # Mc MEETEI MAYEK VOWEL SIGN VISARGA +AAF6 ; ID_Continue # Mn MEETEI MAYEK VIRAMA AB01..AB06 ; ID_Continue # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO AB09..AB0E ; ID_Continue # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO AB11..AB16 ; ID_Continue # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO @@ -6354,8 +6604,7 @@ ABF0..ABF9 ; ID_Continue # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIG AC00..D7A3 ; ID_Continue # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH D7B0..D7C6 ; ID_Continue # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E D7CB..D7FB ; ID_Continue # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH -F900..FA2D ; ID_Continue # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; ID_Continue # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +F900..FA6D ; ID_Continue # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; ID_Continue # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB00..FB06 ; ID_Continue # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; ID_Continue # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -6422,6 +6671,8 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 1083F..10855 ; ID_Continue # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW 10900..10915 ; ID_Continue # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; ID_Continue # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10980..109B7 ; ID_Continue # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; ID_Continue # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; ID_Continue # Lo KHAROSHTHI LETTER A 10A01..10A03 ; ID_Continue # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R 10A05..10A06 ; ID_Continue # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O @@ -6449,10 +6700,40 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 110B3..110B6 ; ID_Continue # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B7..110B8 ; ID_Continue # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU 110B9..110BA ; ID_Continue # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +110D0..110E8 ; ID_Continue # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +110F0..110F9 ; ID_Continue # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11100..11102 ; ID_Continue # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11103..11126 ; ID_Continue # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11127..1112B ; ID_Continue # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; ID_Continue # Mc CHAKMA VOWEL SIGN E +1112D..11134 ; ID_Continue # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11136..1113F ; ID_Continue # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +11180..11181 ; ID_Continue # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; ID_Continue # Mc SHARADA SIGN VISARGA +11183..111B2 ; ID_Continue # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111B3..111B5 ; ID_Continue # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; ID_Continue # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF..111C0 ; ID_Continue # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +111C1..111C4 ; ID_Continue # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111D0..111D9 ; ID_Continue # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +11680..116AA ; ID_Continue # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116AB ; ID_Continue # Mn TAKRI SIGN ANUSVARA +116AC ; ID_Continue # Mc TAKRI SIGN VISARGA +116AD ; ID_Continue # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; ID_Continue # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; ID_Continue # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B6 ; ID_Continue # Mc TAKRI SIGN VIRAMA +116B7 ; ID_Continue # Mn TAKRI SIGN NUKTA +116C0..116C9 ; ID_Continue # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 12000..1236E ; ID_Continue # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM 12400..12462 ; ID_Continue # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER 13000..1342E ; ID_Continue # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 16800..16A38 ; ID_Continue # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16F00..16F44 ; ID_Continue # Lo [69] MIAO LETTER PA..MIAO LETTER HHA +16F50 ; ID_Continue # Lo MIAO LETTER NASALIZATION +16F51..16F7E ; ID_Continue # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG +16F8F..16F92 ; ID_Continue # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16F93..16F9F ; ID_Continue # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 1B000..1B001 ; ID_Continue # Lo [2] KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE 1D165..1D166 ; ID_Continue # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM 1D167..1D169 ; ID_Continue # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 @@ -6492,13 +6773,46 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 1D7AA..1D7C2 ; ID_Continue # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA 1D7C4..1D7CB ; ID_Continue # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1D7CE..1D7FF ; ID_Continue # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1EE00..1EE03 ; ID_Continue # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; ID_Continue # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; ID_Continue # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; ID_Continue # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; ID_Continue # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; ID_Continue # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; ID_Continue # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; ID_Continue # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; ID_Continue # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; ID_Continue # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; ID_Continue # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; ID_Continue # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; ID_Continue # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; ID_Continue # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; ID_Continue # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; ID_Continue # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; ID_Continue # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; ID_Continue # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; ID_Continue # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; ID_Continue # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; ID_Continue # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; ID_Continue # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; ID_Continue # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 20000..2A6D6 ; ID_Continue # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 2A700..2B734 ; ID_Continue # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 2B740..2B81D ; ID_Continue # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2F800..2FA1D ; ID_Continue # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 102675 +# Total code points: 103355 # ================================================ @@ -6511,9 +6825,9 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR 0041..005A ; XID_Start # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z 0061..007A ; XID_Start # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z -00AA ; XID_Start # L& FEMININE ORDINAL INDICATOR +00AA ; XID_Start # Lo FEMININE ORDINAL INDICATOR 00B5 ; XID_Start # L& MICRO SIGN -00BA ; XID_Start # L& MASCULINE ORDINAL INDICATOR +00BA ; XID_Start # Lo MASCULINE ORDINAL INDICATOR 00C0..00D6 ; XID_Start # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS 00D8..00F6 ; XID_Start # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS 00F8..01BA ; XID_Start # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL @@ -6566,6 +6880,8 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR 0824 ; XID_Start # Lm SAMARITAN MODIFIER LETTER SHORT A 0828 ; XID_Start # Lm SAMARITAN MODIFIER LETTER I 0840..0858 ; XID_Start # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +08A0 ; XID_Start # Lo ARABIC LETTER BEH WITH SMALL V BELOW +08A2..08AC ; XID_Start # Lo [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH 0904..0939 ; XID_Start # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA 093D ; XID_Start # Lo DEVANAGARI SIGN AVAGRAHA 0950 ; XID_Start # Lo DEVANAGARI OM @@ -6673,7 +6989,7 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR 0EBD ; XID_Start # Lo LAO SEMIVOWEL SIGN NYO 0EC0..0EC4 ; XID_Start # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI 0EC6 ; XID_Start # Lm LAO KO LA -0EDC..0EDD ; XID_Start # Lo [2] LAO HO NO..LAO HO MO +0EDC..0EDF ; XID_Start # Lo [4] LAO HO NO..LAO LETTER KHMU NYO 0F00 ; XID_Start # Lo TIBETAN SYLLABLE OM 0F40..0F47 ; XID_Start # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA 0F49..0F6C ; XID_Start # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA @@ -6688,9 +7004,11 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR 1075..1081 ; XID_Start # Lo [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA 108E ; XID_Start # Lo MYANMAR LETTER RUMAI PALAUNG FA 10A0..10C5 ; XID_Start # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; XID_Start # L& GEORGIAN CAPITAL LETTER YN +10CD ; XID_Start # L& GEORGIAN CAPITAL LETTER AEN 10D0..10FA ; XID_Start # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN 10FC ; XID_Start # Lm MODIFIER LETTER GEORGIAN NAR -1100..1248 ; XID_Start # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +10FD..1248 ; XID_Start # Lo [332] GEORGIAN LETTER AEN..ETHIOPIC SYLLABLE QWA 124A..124D ; XID_Start # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE 1250..1256 ; XID_Start # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO 1258 ; XID_Start # Lo ETHIOPIC SYLLABLE QHWA @@ -6740,16 +7058,17 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR 1B45..1B4B ; XID_Start # Lo [7] BALINESE LETTER KAF SASAK..BALINESE LETTER ASYURA SASAK 1B83..1BA0 ; XID_Start # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA 1BAE..1BAF ; XID_Start # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA -1BC0..1BE5 ; XID_Start # Lo [38] BATAK LETTER A..BATAK LETTER U +1BBA..1BE5 ; XID_Start # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U 1C00..1C23 ; XID_Start # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A 1C4D..1C4F ; XID_Start # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA 1C5A..1C77 ; XID_Start # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; XID_Start # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD 1CE9..1CEC ; XID_Start # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL 1CEE..1CF1 ; XID_Start # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA +1CF5..1CF6 ; XID_Start # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 1D00..1D2B ; XID_Start # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D61 ; XID_Start # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D77 ; XID_Start # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D2C..1D6A ; XID_Start # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; XID_Start # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; XID_Start # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; XID_Start # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; XID_Start # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA @@ -6797,12 +7116,15 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR 2185..2188 ; XID_Start # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND 2C00..2C2E ; XID_Start # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 2C30..2C5E ; XID_Start # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE -2C60..2C7C ; XID_Start # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; XID_Start # Lm MODIFIER LETTER CAPITAL V +2C60..2C7B ; XID_Start # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; XID_Start # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; XID_Start # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI 2CEB..2CEE ; XID_Start # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; XID_Start # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; XID_Start # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE -2D30..2D65 ; XID_Start # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D27 ; XID_Start # L& GEORGIAN SMALL LETTER YN +2D2D ; XID_Start # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; XID_Start # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO 2D6F ; XID_Start # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D80..2D96 ; XID_Start # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE 2DA0..2DA6 ; XID_Start # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO @@ -6832,7 +7154,7 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR 31A0..31BA ; XID_Start # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY 31F0..31FF ; XID_Start # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3400..4DB5 ; XID_Start # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 -4E00..9FCB ; XID_Start # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB +4E00..9FCC ; XID_Start # Lo [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC A000..A014 ; XID_Start # Lo [21] YI SYLLABLE IT..YI SYLLABLE E A015 ; XID_Start # Lm YI SYLLABLE WU A016..A48C ; XID_Start # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR @@ -6854,8 +7176,9 @@ A770 ; XID_Start # Lm MODIFIER LETTER US A771..A787 ; XID_Start # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A788 ; XID_Start # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; XID_Start # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A791 ; XID_Start # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER -A7A0..A7A9 ; XID_Start # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE +A790..A793 ; XID_Start # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR +A7A0..A7AA ; XID_Start # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK +A7F8..A7F9 ; XID_Start # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; XID_Start # L& LATIN LETTER SMALL CAPITAL TURNED M A7FB..A801 ; XID_Start # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I A803..A805 ; XID_Start # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O @@ -6885,6 +7208,9 @@ AAC0 ; XID_Start # Lo TAI VIET TONE MAI NUENG AAC2 ; XID_Start # Lo TAI VIET TONE MAI SONG AADB..AADC ; XID_Start # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG AADD ; XID_Start # Lm TAI VIET SYMBOL SAM +AAE0..AAEA ; XID_Start # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAF2 ; XID_Start # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; XID_Start # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK AB01..AB06 ; XID_Start # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO AB09..AB0E ; XID_Start # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO AB11..AB16 ; XID_Start # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO @@ -6894,8 +7220,7 @@ ABC0..ABE2 ; XID_Start # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTE AC00..D7A3 ; XID_Start # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH D7B0..D7C6 ; XID_Start # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E D7CB..D7FB ; XID_Start # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH -F900..FA2D ; XID_Start # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; XID_Start # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +F900..FA6D ; XID_Start # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; XID_Start # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB00..FB06 ; XID_Start # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; XID_Start # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -6958,6 +7283,8 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 1083F..10855 ; XID_Start # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW 10900..10915 ; XID_Start # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; XID_Start # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10980..109B7 ; XID_Start # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; XID_Start # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; XID_Start # Lo KHAROSHTHI LETTER A 10A10..10A13 ; XID_Start # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA 10A15..10A17 ; XID_Start # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA @@ -6969,10 +7296,18 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 10C00..10C48 ; XID_Start # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH 11003..11037 ; XID_Start # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA 11083..110AF ; XID_Start # Lo [45] KAITHI LETTER A..KAITHI LETTER HA +110D0..110E8 ; XID_Start # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +11103..11126 ; XID_Start # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11183..111B2 ; XID_Start # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111C1..111C4 ; XID_Start # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +11680..116AA ; XID_Start # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA 12000..1236E ; XID_Start # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM 12400..12462 ; XID_Start # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER 13000..1342E ; XID_Start # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 16800..16A38 ; XID_Start # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16F00..16F44 ; XID_Start # Lo [69] MIAO LETTER PA..MIAO LETTER HHA +16F50 ; XID_Start # Lo MIAO LETTER NASALIZATION +16F93..16F9F ; XID_Start # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 1B000..1B001 ; XID_Start # Lo [2] KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE 1D400..1D454 ; XID_Start # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G 1D456..1D49C ; XID_Start # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A @@ -7004,19 +7339,51 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 1D78A..1D7A8 ; XID_Start # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA 1D7AA..1D7C2 ; XID_Start # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA 1D7C4..1D7CB ; XID_Start # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1EE00..1EE03 ; XID_Start # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; XID_Start # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; XID_Start # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; XID_Start # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; XID_Start # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; XID_Start # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; XID_Start # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; XID_Start # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; XID_Start # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; XID_Start # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; XID_Start # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; XID_Start # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; XID_Start # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; XID_Start # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; XID_Start # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; XID_Start # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; XID_Start # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; XID_Start # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; XID_Start # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; XID_Start # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; XID_Start # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; XID_Start # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; XID_Start # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; XID_Start # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; XID_Start # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; XID_Start # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; XID_Start # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; XID_Start # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; XID_Start # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; XID_Start # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; XID_Start # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; XID_Start # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; XID_Start # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 20000..2A6D6 ; XID_Start # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 2A700..2B734 ; XID_Start # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 2B740..2B81D ; XID_Start # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2F800..2FA1D ; XID_Start # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 100724 +# Total code points: 101217 # ================================================ # Derived Property: XID_Continue # Mod_ID_Continue modified for closure under NFKx # Modified as described in UAX #15 -# NOTE: Cf characters should be filtered out. # NOTE: Does NOT remove the non-NFKx characters. # Merely ensures that if isIdentifer(string) then isIdentifier(NFKx(string)) # NOTE: See UAX #31 for more information @@ -7025,10 +7392,10 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 0041..005A ; XID_Continue # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z 005F ; XID_Continue # Pc LOW LINE 0061..007A ; XID_Continue # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z -00AA ; XID_Continue # L& FEMININE ORDINAL INDICATOR +00AA ; XID_Continue # Lo FEMININE ORDINAL INDICATOR 00B5 ; XID_Continue # L& MICRO SIGN 00B7 ; XID_Continue # Po MIDDLE DOT -00BA ; XID_Continue # L& MASCULINE ORDINAL INDICATOR +00BA ; XID_Continue # Lo MASCULINE ORDINAL INDICATOR 00C0..00D6 ; XID_Continue # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS 00D8..00F6 ; XID_Continue # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS 00F8..01BA ; XID_Continue # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL @@ -7108,6 +7475,9 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 0829..082D ; XID_Continue # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0840..0858 ; XID_Continue # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 0859..085B ; XID_Continue # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +08A0 ; XID_Continue # Lo ARABIC LETTER BEH WITH SMALL V BELOW +08A2..08AC ; XID_Continue # Lo [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH +08E4..08FE ; XID_Continue # Mn [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT 0900..0902 ; XID_Continue # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 0903 ; XID_Continue # Mc DEVANAGARI SIGN VISARGA 0904..0939 ; XID_Continue # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA @@ -7329,7 +7699,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 0EC6 ; XID_Continue # Lm LAO KO LA 0EC8..0ECD ; XID_Continue # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA 0ED0..0ED9 ; XID_Continue # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE -0EDC..0EDD ; XID_Continue # Lo [2] LAO HO NO..LAO HO MO +0EDC..0EDF ; XID_Continue # Lo [4] LAO HO NO..LAO LETTER KHMU NYO 0F00 ; XID_Continue # Lo TIBETAN SYLLABLE OM 0F18..0F19 ; XID_Continue # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS 0F20..0F29 ; XID_Continue # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE @@ -7381,9 +7751,11 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 109A..109C ; XID_Continue # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A 109D ; XID_Continue # Mn MYANMAR VOWEL SIGN AITON AI 10A0..10C5 ; XID_Continue # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; XID_Continue # L& GEORGIAN CAPITAL LETTER YN +10CD ; XID_Continue # L& GEORGIAN CAPITAL LETTER AEN 10D0..10FA ; XID_Continue # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN 10FC ; XID_Continue # Lm MODIFIER LETTER GEORGIAN NAR -1100..1248 ; XID_Continue # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +10FD..1248 ; XID_Continue # Lo [332] GEORGIAN LETTER AEN..ETHIOPIC SYLLABLE QWA 124A..124D ; XID_Continue # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE 1250..1256 ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO 1258 ; XID_Continue # Lo ETHIOPIC SYLLABLE QHWA @@ -7419,6 +7791,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 176E..1770 ; XID_Continue # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA 1772..1773 ; XID_Continue # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U 1780..17B3 ; XID_Continue # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU +17B4..17B5 ; XID_Continue # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 17B6 ; XID_Continue # Mc KHMER VOWEL SIGN AA 17B7..17BD ; XID_Continue # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA 17BE..17C5 ; XID_Continue # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU @@ -7497,9 +7870,11 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 1BA6..1BA7 ; XID_Continue # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG 1BA8..1BA9 ; XID_Continue # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG 1BAA ; XID_Continue # Mc SUNDANESE SIGN PAMAAEH +1BAB ; XID_Continue # Mn SUNDANESE SIGN VIRAMA +1BAC..1BAD ; XID_Continue # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA 1BAE..1BAF ; XID_Continue # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA 1BB0..1BB9 ; XID_Continue # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE -1BC0..1BE5 ; XID_Continue # Lo [38] BATAK LETTER A..BATAK LETTER U +1BBA..1BE5 ; XID_Continue # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U 1BE6 ; XID_Continue # Mn BATAK SIGN TOMPI 1BE7 ; XID_Continue # Mc BATAK VOWEL SIGN E 1BE8..1BE9 ; XID_Continue # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE @@ -7525,10 +7900,12 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 1CE9..1CEC ; XID_Continue # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL 1CED ; XID_Continue # Mn VEDIC SIGN TIRYAK 1CEE..1CF1 ; XID_Continue # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA -1CF2 ; XID_Continue # Mc VEDIC SIGN ARDHAVISARGA +1CF2..1CF3 ; XID_Continue # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF4 ; XID_Continue # Mn VEDIC TONE CANDRA ABOVE +1CF5..1CF6 ; XID_Continue # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 1D00..1D2B ; XID_Continue # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D61 ; XID_Continue # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D77 ; XID_Continue # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D2C..1D6A ; XID_Continue # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; XID_Continue # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; XID_Continue # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; XID_Continue # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; XID_Continue # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA @@ -7583,13 +7960,16 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 2185..2188 ; XID_Continue # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND 2C00..2C2E ; XID_Continue # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 2C30..2C5E ; XID_Continue # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE -2C60..2C7C ; XID_Continue # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; XID_Continue # Lm MODIFIER LETTER CAPITAL V +2C60..2C7B ; XID_Continue # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; XID_Continue # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; XID_Continue # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI 2CEB..2CEE ; XID_Continue # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA 2CEF..2CF1 ; XID_Continue # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2CF2..2CF3 ; XID_Continue # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; XID_Continue # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE -2D30..2D65 ; XID_Continue # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D27 ; XID_Continue # L& GEORGIAN SMALL LETTER YN +2D2D ; XID_Continue # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; XID_Continue # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO 2D6F ; XID_Continue # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D7F ; XID_Continue # Mn TIFINAGH CONSONANT JOINER 2D80..2D96 ; XID_Continue # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE @@ -7606,7 +7986,8 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 3006 ; XID_Continue # Lo IDEOGRAPHIC CLOSING MARK 3007 ; XID_Continue # Nl IDEOGRAPHIC NUMBER ZERO 3021..3029 ; XID_Continue # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE -302A..302F ; XID_Continue # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK +302A..302D ; XID_Continue # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; XID_Continue # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK 3031..3035 ; XID_Continue # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF 3038..303A ; XID_Continue # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY 303B ; XID_Continue # Lm VERTICAL IDEOGRAPHIC ITERATION MARK @@ -7623,7 +8004,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 31A0..31BA ; XID_Continue # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY 31F0..31FF ; XID_Continue # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3400..4DB5 ; XID_Continue # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 -4E00..9FCB ; XID_Continue # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB +4E00..9FCC ; XID_Continue # Lo [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC A000..A014 ; XID_Continue # Lo [21] YI SYLLABLE IT..YI SYLLABLE E A015 ; XID_Continue # Lm YI SYLLABLE WU A016..A48C ; XID_Continue # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR @@ -7637,9 +8018,10 @@ A62A..A62B ; XID_Continue # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOL A640..A66D ; XID_Continue # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O A66E ; XID_Continue # Lo CYRILLIC LETTER MULTIOCULAR O A66F ; XID_Continue # Mn COMBINING CYRILLIC VZMET -A67C..A67D ; XID_Continue # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A674..A67D ; XID_Continue # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK A67F ; XID_Continue # Lm CYRILLIC PAYEROK A680..A697 ; XID_Continue # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE +A69F ; XID_Continue # Mn COMBINING CYRILLIC LETTER IOTIFIED E A6A0..A6E5 ; XID_Continue # Lo [70] BAMUM LETTER A..BAMUM LETTER KI A6E6..A6EF ; XID_Continue # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM A6F0..A6F1 ; XID_Continue # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS @@ -7649,8 +8031,9 @@ A770 ; XID_Continue # Lm MODIFIER LETTER US A771..A787 ; XID_Continue # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A788 ; XID_Continue # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; XID_Continue # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A791 ; XID_Continue # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER -A7A0..A7A9 ; XID_Continue # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE +A790..A793 ; XID_Continue # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR +A7A0..A7AA ; XID_Continue # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK +A7F8..A7F9 ; XID_Continue # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; XID_Continue # L& LATIN LETTER SMALL CAPITAL TURNED M A7FB..A801 ; XID_Continue # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I A802 ; XID_Continue # Mn SYLOTI NAGRI SIGN DVISVARA @@ -7719,6 +8102,14 @@ AAC1 ; XID_Continue # Mn TAI VIET TONE MAI THO AAC2 ; XID_Continue # Lo TAI VIET TONE MAI SONG AADB..AADC ; XID_Continue # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG AADD ; XID_Continue # Lm TAI VIET SYMBOL SAM +AAE0..AAEA ; XID_Continue # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAEB ; XID_Continue # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; XID_Continue # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; XID_Continue # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF2 ; XID_Continue # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; XID_Continue # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF5 ; XID_Continue # Mc MEETEI MAYEK VOWEL SIGN VISARGA +AAF6 ; XID_Continue # Mn MEETEI MAYEK VIRAMA AB01..AB06 ; XID_Continue # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO AB09..AB0E ; XID_Continue # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO AB11..AB16 ; XID_Continue # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO @@ -7736,8 +8127,7 @@ ABF0..ABF9 ; XID_Continue # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI AC00..D7A3 ; XID_Continue # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH D7B0..D7C6 ; XID_Continue # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E D7CB..D7FB ; XID_Continue # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH -F900..FA2D ; XID_Continue # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; XID_Continue # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +F900..FA6D ; XID_Continue # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; XID_Continue # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB00..FB06 ; XID_Continue # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; XID_Continue # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -7810,6 +8200,8 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 1083F..10855 ; XID_Continue # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW 10900..10915 ; XID_Continue # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; XID_Continue # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10980..109B7 ; XID_Continue # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; XID_Continue # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; XID_Continue # Lo KHAROSHTHI LETTER A 10A01..10A03 ; XID_Continue # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R 10A05..10A06 ; XID_Continue # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O @@ -7837,10 +8229,40 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 110B3..110B6 ; XID_Continue # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B7..110B8 ; XID_Continue # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU 110B9..110BA ; XID_Continue # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +110D0..110E8 ; XID_Continue # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +110F0..110F9 ; XID_Continue # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11100..11102 ; XID_Continue # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11103..11126 ; XID_Continue # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11127..1112B ; XID_Continue # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; XID_Continue # Mc CHAKMA VOWEL SIGN E +1112D..11134 ; XID_Continue # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11136..1113F ; XID_Continue # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +11180..11181 ; XID_Continue # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; XID_Continue # Mc SHARADA SIGN VISARGA +11183..111B2 ; XID_Continue # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111B3..111B5 ; XID_Continue # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; XID_Continue # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF..111C0 ; XID_Continue # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +111C1..111C4 ; XID_Continue # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111D0..111D9 ; XID_Continue # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +11680..116AA ; XID_Continue # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116AB ; XID_Continue # Mn TAKRI SIGN ANUSVARA +116AC ; XID_Continue # Mc TAKRI SIGN VISARGA +116AD ; XID_Continue # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; XID_Continue # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; XID_Continue # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B6 ; XID_Continue # Mc TAKRI SIGN VIRAMA +116B7 ; XID_Continue # Mn TAKRI SIGN NUKTA +116C0..116C9 ; XID_Continue # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 12000..1236E ; XID_Continue # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM 12400..12462 ; XID_Continue # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER 13000..1342E ; XID_Continue # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 16800..16A38 ; XID_Continue # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16F00..16F44 ; XID_Continue # Lo [69] MIAO LETTER PA..MIAO LETTER HHA +16F50 ; XID_Continue # Lo MIAO LETTER NASALIZATION +16F51..16F7E ; XID_Continue # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG +16F8F..16F92 ; XID_Continue # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16F93..16F9F ; XID_Continue # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 1B000..1B001 ; XID_Continue # Lo [2] KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE 1D165..1D166 ; XID_Continue # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM 1D167..1D169 ; XID_Continue # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 @@ -7880,13 +8302,46 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 1D7AA..1D7C2 ; XID_Continue # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA 1D7C4..1D7CB ; XID_Continue # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1D7CE..1D7FF ; XID_Continue # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1EE00..1EE03 ; XID_Continue # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; XID_Continue # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; XID_Continue # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; XID_Continue # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; XID_Continue # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; XID_Continue # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; XID_Continue # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; XID_Continue # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; XID_Continue # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; XID_Continue # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; XID_Continue # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; XID_Continue # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; XID_Continue # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; XID_Continue # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; XID_Continue # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; XID_Continue # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; XID_Continue # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; XID_Continue # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; XID_Continue # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; XID_Continue # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; XID_Continue # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; XID_Continue # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; XID_Continue # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 20000..2A6D6 ; XID_Continue # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 2A700..2B734 ; XID_Continue # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 2B740..2B81D ; XID_Continue # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2F800..2FA1D ; XID_Continue # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 102656 +# Total code points: 103336 # ================================================ @@ -7897,12 +8352,12 @@ E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTO # + Variation_Selector # - White_Space # - FFF9..FFFB (Annotation Characters) -# - 0600..0603, 06DD, 070F, 110BD (exceptional Cf characters that should be visible) +# - 0600..0604, 06DD, 070F, 110BD (exceptional Cf characters that should be visible) 00AD ; Default_Ignorable_Code_Point # Cf SOFT HYPHEN 034F ; Default_Ignorable_Code_Point # Mn COMBINING GRAPHEME JOINER 115F..1160 ; Default_Ignorable_Code_Point # Lo [2] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG FILLER -17B4..17B5 ; Default_Ignorable_Code_Point # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B4..17B5 ; Default_Ignorable_Code_Point # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 180B..180D ; Default_Ignorable_Code_Point # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE 200B..200F ; Default_Ignorable_Code_Point # Cf [5] ZERO WIDTH SPACE..RIGHT-TO-LEFT MARK 202A..202E ; Default_Ignorable_Code_Point # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE @@ -7956,6 +8411,7 @@ E01F0..E0FFF ; Default_Ignorable_Code_Point # Cn [3600] <reserved-E01F0>..<rese 0825..0827 ; Grapheme_Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; Grapheme_Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; Grapheme_Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +08E4..08FE ; Grapheme_Extend # Mn [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT 0900..0902 ; Grapheme_Extend # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 093A ; Grapheme_Extend # Mn DEVANAGARI VOWEL SIGN OE 093C ; Grapheme_Extend # Mn DEVANAGARI SIGN NUKTA @@ -8053,6 +8509,7 @@ E01F0..E0FFF ; Default_Ignorable_Code_Point # Cn [3600] <reserved-E01F0>..<rese 1732..1734 ; Grapheme_Extend # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD 1752..1753 ; Grapheme_Extend # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U 1772..1773 ; Grapheme_Extend # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +17B4..17B5 ; Grapheme_Extend # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 17B7..17BD ; Grapheme_Extend # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA 17C6 ; Grapheme_Extend # Mn KHMER SIGN NIKAHIT 17C9..17D3 ; Grapheme_Extend # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT @@ -8080,6 +8537,7 @@ E01F0..E0FFF ; Default_Ignorable_Code_Point # Cn [3600] <reserved-E01F0>..<rese 1B80..1B81 ; Grapheme_Extend # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR 1BA2..1BA5 ; Grapheme_Extend # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU 1BA8..1BA9 ; Grapheme_Extend # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAB ; Grapheme_Extend # Mn SUNDANESE SIGN VIRAMA 1BE6 ; Grapheme_Extend # Mn BATAK SIGN TOMPI 1BE8..1BE9 ; Grapheme_Extend # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE 1BED ; Grapheme_Extend # Mn BATAK VOWEL SIGN KARO O @@ -8090,6 +8548,7 @@ E01F0..E0FFF ; Default_Ignorable_Code_Point # Cn [3600] <reserved-E01F0>..<rese 1CD4..1CE0 ; Grapheme_Extend # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA 1CE2..1CE8 ; Grapheme_Extend # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL 1CED ; Grapheme_Extend # Mn VEDIC SIGN TIRYAK +1CF4 ; Grapheme_Extend # Mn VEDIC TONE CANDRA ABOVE 1DC0..1DE6 ; Grapheme_Extend # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z 1DFC..1DFF ; Grapheme_Extend # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW 200C..200D ; Grapheme_Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER @@ -8101,11 +8560,13 @@ E01F0..E0FFF ; Default_Ignorable_Code_Point # Cn [3600] <reserved-E01F0>..<rese 2CEF..2CF1 ; Grapheme_Extend # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS 2D7F ; Grapheme_Extend # Mn TIFINAGH CONSONANT JOINER 2DE0..2DFF ; Grapheme_Extend # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS -302A..302F ; Grapheme_Extend # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK +302A..302D ; Grapheme_Extend # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; Grapheme_Extend # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK 3099..309A ; Grapheme_Extend # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK A66F ; Grapheme_Extend # Mn COMBINING CYRILLIC VZMET A670..A672 ; Grapheme_Extend # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN -A67C..A67D ; Grapheme_Extend # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A674..A67D ; Grapheme_Extend # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A69F ; Grapheme_Extend # Mn COMBINING CYRILLIC LETTER IOTIFIED E A6F0..A6F1 ; Grapheme_Extend # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS A802 ; Grapheme_Extend # Mn SYLOTI NAGRI SIGN DVISVARA A806 ; Grapheme_Extend # Mn SYLOTI NAGRI SIGN HASANTA @@ -8129,6 +8590,8 @@ AAB2..AAB4 ; Grapheme_Extend # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U AAB7..AAB8 ; Grapheme_Extend # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA AABE..AABF ; Grapheme_Extend # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK AAC1 ; Grapheme_Extend # Mn TAI VIET TONE MAI THO +AAEC..AAED ; Grapheme_Extend # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAF6 ; Grapheme_Extend # Mn MEETEI MAYEK VIRAMA ABE5 ; Grapheme_Extend # Mn MEETEI MAYEK VOWEL SIGN ANAP ABE8 ; Grapheme_Extend # Mn MEETEI MAYEK VOWEL SIGN UNAP ABED ; Grapheme_Extend # Mn MEETEI MAYEK APUN IYEK @@ -8147,6 +8610,16 @@ FF9E..FF9F ; Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK. 11080..11081 ; Grapheme_Extend # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA 110B3..110B6 ; Grapheme_Extend # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B9..110BA ; Grapheme_Extend # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +11100..11102 ; Grapheme_Extend # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; Grapheme_Extend # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112D..11134 ; Grapheme_Extend # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11180..11181 ; Grapheme_Extend # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +111B6..111BE ; Grapheme_Extend # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +116AB ; Grapheme_Extend # Mn TAKRI SIGN ANUSVARA +116AD ; Grapheme_Extend # Mn TAKRI VOWEL SIGN AA +116B0..116B5 ; Grapheme_Extend # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B7 ; Grapheme_Extend # Mn TAKRI SIGN NUKTA +16F8F..16F92 ; Grapheme_Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 1D165 ; Grapheme_Extend # Mc MUSICAL SYMBOL COMBINING STEM 1D167..1D169 ; Grapheme_Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 1D16E..1D172 ; Grapheme_Extend # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5 @@ -8156,7 +8629,7 @@ FF9E..FF9F ; Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK. 1D242..1D244 ; Grapheme_Extend # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1234 +# Total code points: 1317 # ================================================ @@ -8195,10 +8668,11 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 00A0 ; Grapheme_Base # Zs NO-BREAK SPACE 00A1 ; Grapheme_Base # Po INVERTED EXCLAMATION MARK 00A2..00A5 ; Grapheme_Base # Sc [4] CENT SIGN..YEN SIGN -00A6..00A7 ; Grapheme_Base # So [2] BROKEN BAR..SECTION SIGN +00A6 ; Grapheme_Base # So BROKEN BAR +00A7 ; Grapheme_Base # Po SECTION SIGN 00A8 ; Grapheme_Base # Sk DIAERESIS 00A9 ; Grapheme_Base # So COPYRIGHT SIGN -00AA ; Grapheme_Base # L& FEMININE ORDINAL INDICATOR +00AA ; Grapheme_Base # Lo FEMININE ORDINAL INDICATOR 00AB ; Grapheme_Base # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 00AC ; Grapheme_Base # Sm NOT SIGN 00AE ; Grapheme_Base # So REGISTERED SIGN @@ -8208,11 +8682,10 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 00B2..00B3 ; Grapheme_Base # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE 00B4 ; Grapheme_Base # Sk ACUTE ACCENT 00B5 ; Grapheme_Base # L& MICRO SIGN -00B6 ; Grapheme_Base # So PILCROW SIGN -00B7 ; Grapheme_Base # Po MIDDLE DOT +00B6..00B7 ; Grapheme_Base # Po [2] PILCROW SIGN..MIDDLE DOT 00B8 ; Grapheme_Base # Sk CEDILLA 00B9 ; Grapheme_Base # No SUPERSCRIPT ONE -00BA ; Grapheme_Base # L& MASCULINE ORDINAL INDICATOR +00BA ; Grapheme_Base # Lo MASCULINE ORDINAL INDICATOR 00BB ; Grapheme_Base # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 00BC..00BE ; Grapheme_Base # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS 00BF ; Grapheme_Base # Po INVERTED QUESTION MARK @@ -8261,6 +8734,7 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 0561..0587 ; Grapheme_Base # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN 0589 ; Grapheme_Base # Po ARMENIAN FULL STOP 058A ; Grapheme_Base # Pd ARMENIAN HYPHEN +058F ; Grapheme_Base # Sc ARMENIAN DRAM SIGN 05BE ; Grapheme_Base # Pd HEBREW PUNCTUATION MAQAF 05C0 ; Grapheme_Base # Po HEBREW PUNCTUATION PASEQ 05C3 ; Grapheme_Base # Po HEBREW PUNCTUATION SOF PASUQ @@ -8310,6 +8784,8 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 0830..083E ; Grapheme_Base # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU 0840..0858 ; Grapheme_Base # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 085E ; Grapheme_Base # Po MANDAIC PUNCTUATION +08A0 ; Grapheme_Base # Lo ARABIC LETTER BEH WITH SMALL V BELOW +08A2..08AC ; Grapheme_Base # Lo [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH 0903 ; Grapheme_Base # Mc DEVANAGARI SIGN VISARGA 0904..0939 ; Grapheme_Base # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA 093B ; Grapheme_Base # Mc DEVANAGARI VOWEL SIGN OOE @@ -8372,6 +8848,7 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 0AD0 ; Grapheme_Base # Lo GUJARATI OM 0AE0..0AE1 ; Grapheme_Base # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL 0AE6..0AEF ; Grapheme_Base # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE +0AF0 ; Grapheme_Base # Po GUJARATI ABBREVIATION SIGN 0AF1 ; Grapheme_Base # Sc GUJARATI RUPEE SIGN 0B02..0B03 ; Grapheme_Base # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA 0B05..0B0C ; Grapheme_Base # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L @@ -8488,11 +8965,13 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 0EC0..0EC4 ; Grapheme_Base # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI 0EC6 ; Grapheme_Base # Lm LAO KO LA 0ED0..0ED9 ; Grapheme_Base # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE -0EDC..0EDD ; Grapheme_Base # Lo [2] LAO HO NO..LAO HO MO +0EDC..0EDF ; Grapheme_Base # Lo [4] LAO HO NO..LAO LETTER KHMU NYO 0F00 ; Grapheme_Base # Lo TIBETAN SYLLABLE OM 0F01..0F03 ; Grapheme_Base # So [3] TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA 0F04..0F12 ; Grapheme_Base # Po [15] TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK RGYA GRAM SHAD -0F13..0F17 ; Grapheme_Base # So [5] TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS +0F13 ; Grapheme_Base # So TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN +0F14 ; Grapheme_Base # Po TIBETAN MARK GTER TSHEG +0F15..0F17 ; Grapheme_Base # So [3] TIBETAN LOGOTYPE SIGN CHAD RTAGS..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS 0F1A..0F1F ; Grapheme_Base # So [6] TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RDEL DKAR RDEL NAG 0F20..0F29 ; Grapheme_Base # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE 0F2A..0F33 ; Grapheme_Base # No [10] TIBETAN DIGIT HALF ONE..TIBETAN DIGIT HALF ZERO @@ -8540,10 +9019,12 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 109A..109C ; Grapheme_Base # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A 109E..109F ; Grapheme_Base # So [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION 10A0..10C5 ; Grapheme_Base # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Grapheme_Base # L& GEORGIAN CAPITAL LETTER YN +10CD ; Grapheme_Base # L& GEORGIAN CAPITAL LETTER AEN 10D0..10FA ; Grapheme_Base # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN 10FB ; Grapheme_Base # Po GEORGIAN PARAGRAPH SEPARATOR 10FC ; Grapheme_Base # Lm MODIFIER LETTER GEORGIAN NAR -1100..1248 ; Grapheme_Base # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +10FD..1248 ; Grapheme_Base # Lo [332] GEORGIAN LETTER AEN..ETHIOPIC SYLLABLE QWA 124A..124D ; Grapheme_Base # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE 1250..1256 ; Grapheme_Base # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO 1258 ; Grapheme_Base # Lo ETHIOPIC SYLLABLE QHWA @@ -8559,8 +9040,7 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 12D8..1310 ; Grapheme_Base # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA 1312..1315 ; Grapheme_Base # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE 1318..135A ; Grapheme_Base # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA -1360 ; Grapheme_Base # So ETHIOPIC SECTION MARK -1361..1368 ; Grapheme_Base # Po [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR +1360..1368 ; Grapheme_Base # Po [9] ETHIOPIC SECTION MARK..ETHIOPIC PARAGRAPH SEPARATOR 1369..137C ; Grapheme_Base # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND 1380..138F ; Grapheme_Base # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE 1390..1399 ; Grapheme_Base # So [10] ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT @@ -8652,9 +9132,10 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 1BA1 ; Grapheme_Base # Mc SUNDANESE CONSONANT SIGN PAMINGKAL 1BA6..1BA7 ; Grapheme_Base # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG 1BAA ; Grapheme_Base # Mc SUNDANESE SIGN PAMAAEH +1BAC..1BAD ; Grapheme_Base # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA 1BAE..1BAF ; Grapheme_Base # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA 1BB0..1BB9 ; Grapheme_Base # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE -1BC0..1BE5 ; Grapheme_Base # Lo [38] BATAK LETTER A..BATAK LETTER U +1BBA..1BE5 ; Grapheme_Base # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U 1BE7 ; Grapheme_Base # Mc BATAK VOWEL SIGN E 1BEA..1BEC ; Grapheme_Base # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O 1BEE ; Grapheme_Base # Mc BATAK VOWEL SIGN U @@ -8670,14 +9151,16 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 1C5A..1C77 ; Grapheme_Base # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; Grapheme_Base # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD 1C7E..1C7F ; Grapheme_Base # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD +1CC0..1CC7 ; Grapheme_Base # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA 1CD3 ; Grapheme_Base # Po VEDIC SIGN NIHSHVASA 1CE1 ; Grapheme_Base # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA 1CE9..1CEC ; Grapheme_Base # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL 1CEE..1CF1 ; Grapheme_Base # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA -1CF2 ; Grapheme_Base # Mc VEDIC SIGN ARDHAVISARGA +1CF2..1CF3 ; Grapheme_Base # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF5..1CF6 ; Grapheme_Base # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 1D00..1D2B ; Grapheme_Base # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D61 ; Grapheme_Base # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D77 ; Grapheme_Base # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D2C..1D6A ; Grapheme_Base # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; Grapheme_Base # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; Grapheme_Base # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; Grapheme_Base # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; Grapheme_Base # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA @@ -8850,9 +9333,7 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 27C0..27C4 ; Grapheme_Base # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET 27C5 ; Grapheme_Base # Ps LEFT S-SHAPED BAG DELIMITER 27C6 ; Grapheme_Base # Pe RIGHT S-SHAPED BAG DELIMITER -27C7..27CA ; Grapheme_Base # Sm [4] OR WITH DOT INSIDE..VERTICAL BAR WITH HORIZONTAL STROKE -27CC ; Grapheme_Base # Sm LONG DIVISION -27CE..27E5 ; Grapheme_Base # Sm [24] SQUARED LOGICAL AND..WHITE SQUARE WITH RIGHTWARDS TICK +27C7..27E5 ; Grapheme_Base # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK 27E6 ; Grapheme_Base # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET 27E7 ; Grapheme_Base # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET 27E8 ; Grapheme_Base # Ps MATHEMATICAL LEFT ANGLE BRACKET @@ -8904,16 +9385,19 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 2B50..2B59 ; Grapheme_Base # So [10] WHITE MEDIUM STAR..HEAVY CIRCLED SALTIRE 2C00..2C2E ; Grapheme_Base # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 2C30..2C5E ; Grapheme_Base # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE -2C60..2C7C ; Grapheme_Base # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; Grapheme_Base # Lm MODIFIER LETTER CAPITAL V +2C60..2C7B ; Grapheme_Base # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; Grapheme_Base # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; Grapheme_Base # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI 2CE5..2CEA ; Grapheme_Base # So [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA 2CEB..2CEE ; Grapheme_Base # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; Grapheme_Base # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2CF9..2CFC ; Grapheme_Base # Po [4] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN VERSE DIVIDER 2CFD ; Grapheme_Base # No COPTIC FRACTION ONE HALF 2CFE..2CFF ; Grapheme_Base # Po [2] COPTIC FULL STOP..COPTIC MORPHOLOGICAL DIVIDER 2D00..2D25 ; Grapheme_Base # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE -2D30..2D65 ; Grapheme_Base # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D27 ; Grapheme_Base # L& GEORGIAN SMALL LETTER YN +2D2D ; Grapheme_Base # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; Grapheme_Base # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO 2D6F ; Grapheme_Base # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D70 ; Grapheme_Base # Po TIFINAGH SEPARATOR MARK 2D80..2D96 ; Grapheme_Base # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE @@ -8956,7 +9440,8 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 2E29 ; Grapheme_Base # Pe RIGHT DOUBLE PARENTHESIS 2E2A..2E2E ; Grapheme_Base # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK 2E2F ; Grapheme_Base # Lm VERTICAL TILDE -2E30..2E31 ; Grapheme_Base # Po [2] RING POINT..WORD SEPARATOR MIDDLE DOT +2E30..2E39 ; Grapheme_Base # Po [10] RING POINT..TOP HALF SECTION SIGN +2E3A..2E3B ; Grapheme_Base # Pd [2] TWO-EM DASH..THREE-EM DASH 2E80..2E99 ; Grapheme_Base # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP 2E9B..2EF3 ; Grapheme_Base # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE 2F00..2FD5 ; Grapheme_Base # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE @@ -9018,7 +9503,9 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 31F0..31FF ; Grapheme_Base # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3200..321E ; Grapheme_Base # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU 3220..3229 ; Grapheme_Base # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN -322A..3250 ; Grapheme_Base # So [39] PARENTHESIZED IDEOGRAPH MOON..PARTNERSHIP SIGN +322A..3247 ; Grapheme_Base # So [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO +3248..324F ; Grapheme_Base # No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE +3250 ; Grapheme_Base # So PARTNERSHIP SIGN 3251..325F ; Grapheme_Base # No [15] CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE 3260..327F ; Grapheme_Base # So [32] CIRCLED HANGUL KIYEOK..KOREAN STANDARD SYMBOL 3280..3289 ; Grapheme_Base # No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN @@ -9028,7 +9515,7 @@ E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELE 3300..33FF ; Grapheme_Base # So [256] SQUARE APAATO..SQUARE GAL 3400..4DB5 ; Grapheme_Base # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 4DC0..4DFF ; Grapheme_Base # So [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION -4E00..9FCB ; Grapheme_Base # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB +4E00..9FCC ; Grapheme_Base # Lo [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC A000..A014 ; Grapheme_Base # Lo [21] YI SYLLABLE IT..YI SYLLABLE E A015 ; Grapheme_Base # Lm YI SYLLABLE WU A016..A48C ; Grapheme_Base # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR @@ -9060,8 +9547,9 @@ A771..A787 ; Grapheme_Base # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LET A788 ; Grapheme_Base # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; Grapheme_Base # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; Grapheme_Base # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A791 ; Grapheme_Base # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER -A7A0..A7A9 ; Grapheme_Base # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE +A790..A793 ; Grapheme_Base # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR +A7A0..A7AA ; Grapheme_Base # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK +A7F8..A7F9 ; Grapheme_Base # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; Grapheme_Base # L& LATIN LETTER SMALL CAPITAL TURNED M A7FB..A801 ; Grapheme_Base # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I A803..A805 ; Grapheme_Base # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O @@ -9123,6 +9611,13 @@ AAC2 ; Grapheme_Base # Lo TAI VIET TONE MAI SONG AADB..AADC ; Grapheme_Base # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG AADD ; Grapheme_Base # Lm TAI VIET SYMBOL SAM AADE..AADF ; Grapheme_Base # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI +AAE0..AAEA ; Grapheme_Base # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAEB ; Grapheme_Base # Mc MEETEI MAYEK VOWEL SIGN II +AAEE..AAEF ; Grapheme_Base # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF0..AAF1 ; Grapheme_Base # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM +AAF2 ; Grapheme_Base # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; Grapheme_Base # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF5 ; Grapheme_Base # Mc MEETEI MAYEK VOWEL SIGN VISARGA AB01..AB06 ; Grapheme_Base # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO AB09..AB0E ; Grapheme_Base # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO AB11..AB16 ; Grapheme_Base # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO @@ -9138,8 +9633,7 @@ ABF0..ABF9 ; Grapheme_Base # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK D AC00..D7A3 ; Grapheme_Base # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH D7B0..D7C6 ; Grapheme_Base # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E D7CB..D7FB ; Grapheme_Base # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH -F900..FA2D ; Grapheme_Base # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; Grapheme_Base # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +F900..FA6D ; Grapheme_Base # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; Grapheme_Base # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB00..FB06 ; Grapheme_Base # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; Grapheme_Base # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -9263,8 +9757,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 1003F..1004D ; Grapheme_Base # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO 10050..1005D ; Grapheme_Base # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 10080..100FA ; Grapheme_Base # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 -10100..10101 ; Grapheme_Base # Po [2] AEGEAN WORD SEPARATOR LINE..AEGEAN WORD SEPARATOR DOT -10102 ; Grapheme_Base # So AEGEAN CHECK MARK +10100..10102 ; Grapheme_Base # Po [3] AEGEAN WORD SEPARATOR LINE..AEGEAN CHECK MARK 10107..10133 ; Grapheme_Base # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND 10137..1013F ; Grapheme_Base # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT 10140..10174 ; Grapheme_Base # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS @@ -9303,6 +9796,8 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 1091F ; Grapheme_Base # Po PHOENICIAN WORD SEPARATOR 10920..10939 ; Grapheme_Base # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C 1093F ; Grapheme_Base # Po LYDIAN TRIANGULAR MARK +10980..109B7 ; Grapheme_Base # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; Grapheme_Base # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; Grapheme_Base # Lo KHAROSHTHI LETTER A 10A10..10A13 ; Grapheme_Base # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA 10A15..10A17 ; Grapheme_Base # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA @@ -9332,11 +9827,33 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 110B7..110B8 ; Grapheme_Base # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU 110BB..110BC ; Grapheme_Base # Po [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN 110BE..110C1 ; Grapheme_Base # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +110D0..110E8 ; Grapheme_Base # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +110F0..110F9 ; Grapheme_Base # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11103..11126 ; Grapheme_Base # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +1112C ; Grapheme_Base # Mc CHAKMA VOWEL SIGN E +11136..1113F ; Grapheme_Base # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +11140..11143 ; Grapheme_Base # Po [4] CHAKMA SECTION MARK..CHAKMA QUESTION MARK +11182 ; Grapheme_Base # Mc SHARADA SIGN VISARGA +11183..111B2 ; Grapheme_Base # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111B3..111B5 ; Grapheme_Base # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111BF..111C0 ; Grapheme_Base # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +111C1..111C4 ; Grapheme_Base # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111C5..111C8 ; Grapheme_Base # Po [4] SHARADA DANDA..SHARADA SEPARATOR +111D0..111D9 ; Grapheme_Base # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +11680..116AA ; Grapheme_Base # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116AC ; Grapheme_Base # Mc TAKRI SIGN VISARGA +116AE..116AF ; Grapheme_Base # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B6 ; Grapheme_Base # Mc TAKRI SIGN VIRAMA +116C0..116C9 ; Grapheme_Base # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 12000..1236E ; Grapheme_Base # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM 12400..12462 ; Grapheme_Base # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER 12470..12473 ; Grapheme_Base # Po [4] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON 13000..1342E ; Grapheme_Base # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 16800..16A38 ; Grapheme_Base # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16F00..16F44 ; Grapheme_Base # Lo [69] MIAO LETTER PA..MIAO LETTER HHA +16F50 ; Grapheme_Base # Lo MIAO LETTER NASALIZATION +16F51..16F7E ; Grapheme_Base # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG +16F93..16F9F ; Grapheme_Base # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 1B000..1B001 ; Grapheme_Base # Lo [2] KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE 1D000..1D0F5 ; Grapheme_Base # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO 1D100..1D126 ; Grapheme_Base # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 @@ -9392,6 +9909,40 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 1D7C3 ; Grapheme_Base # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL 1D7C4..1D7CB ; Grapheme_Base # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1D7CE..1D7FF ; Grapheme_Base # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1EE00..1EE03 ; Grapheme_Base # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; Grapheme_Base # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; Grapheme_Base # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; Grapheme_Base # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; Grapheme_Base # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; Grapheme_Base # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; Grapheme_Base # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; Grapheme_Base # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; Grapheme_Base # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; Grapheme_Base # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; Grapheme_Base # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; Grapheme_Base # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; Grapheme_Base # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; Grapheme_Base # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; Grapheme_Base # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; Grapheme_Base # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; Grapheme_Base # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; Grapheme_Base # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; Grapheme_Base # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; Grapheme_Base # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; Grapheme_Base # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; Grapheme_Base # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; Grapheme_Base # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1EEF0..1EEF1 ; Grapheme_Base # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL 1F000..1F02B ; Grapheme_Base # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK 1F030..1F093 ; Grapheme_Base # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06 1F0A0..1F0AE ; Grapheme_Base # So [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES @@ -9400,7 +9951,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 1F0D1..1F0DF ; Grapheme_Base # So [15] PLAYING CARD ACE OF CLUBS..PLAYING CARD WHITE JOKER 1F100..1F10A ; Grapheme_Base # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA 1F110..1F12E ; Grapheme_Base # So [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ -1F130..1F169 ; Grapheme_Base # So [58] SQUARED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F130..1F16B ; Grapheme_Base # So [60] SQUARED LATIN CAPITAL LETTER A..RAISED MD SIGN 1F170..1F19A ; Grapheme_Base # So [43] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VS 1F1E6..1F202 ; Grapheme_Base # So [29] REGIONAL INDICATOR SYMBOL LETTER A..SQUARED KATAKANA SA 1F210..1F23A ; Grapheme_Base # So [43] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-55B6 @@ -9418,19 +9969,9 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 1F442..1F4F7 ; Grapheme_Base # So [182] EAR..CAMERA 1F4F9..1F4FC ; Grapheme_Base # So [4] VIDEO CAMERA..VIDEOCASSETTE 1F500..1F53D ; Grapheme_Base # So [62] TWISTED RIGHTWARDS ARROWS..DOWN-POINTING SMALL RED TRIANGLE +1F540..1F543 ; Grapheme_Base # So [4] CIRCLED CROSS POMMEE..NOTCHED LEFT SEMICIRCLE WITH THREE DOTS 1F550..1F567 ; Grapheme_Base # So [24] CLOCK FACE ONE OCLOCK..CLOCK FACE TWELVE-THIRTY -1F5FB..1F5FF ; Grapheme_Base # So [5] MOUNT FUJI..MOYAI -1F601..1F610 ; Grapheme_Base # So [16] GRINNING FACE WITH SMILING EYES..NEUTRAL FACE -1F612..1F614 ; Grapheme_Base # So [3] UNAMUSED FACE..PENSIVE FACE -1F616 ; Grapheme_Base # So CONFOUNDED FACE -1F618 ; Grapheme_Base # So FACE THROWING A KISS -1F61A ; Grapheme_Base # So KISSING FACE WITH CLOSED EYES -1F61C..1F61E ; Grapheme_Base # So [3] FACE WITH STUCK-OUT TONGUE AND WINKING EYE..DISAPPOINTED FACE -1F620..1F625 ; Grapheme_Base # So [6] ANGRY FACE..DISAPPOINTED BUT RELIEVED FACE -1F628..1F62B ; Grapheme_Base # So [4] FEARFUL FACE..TIRED FACE -1F62D ; Grapheme_Base # So LOUDLY CRYING FACE -1F630..1F633 ; Grapheme_Base # So [4] FACE WITH OPEN MOUTH AND COLD SWEAT..FLUSHED FACE -1F635..1F640 ; Grapheme_Base # So [12] DIZZY FACE..WEARY CAT FACE +1F5FB..1F640 ; Grapheme_Base # So [70] MOUNT FUJI..WEARY CAT FACE 1F645..1F64F ; Grapheme_Base # So [11] FACE WITH NO GOOD GESTURE..PERSON WITH FOLDED HANDS 1F680..1F6C5 ; Grapheme_Base # So [70] ROCKET..LEFT LUGGAGE 1F700..1F773 ; Grapheme_Base # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE @@ -9439,7 +9980,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 2B740..2B81D ; Grapheme_Base # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2F800..2FA1D ; Grapheme_Base # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 108010 +# Total code points: 108660 # ================================================ @@ -9466,17 +10007,22 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 1A60 ; Grapheme_Link # Mn TAI THAM SIGN SAKOT 1B44 ; Grapheme_Link # Mc BALINESE ADEG ADEG 1BAA ; Grapheme_Link # Mc SUNDANESE SIGN PAMAAEH +1BAB ; Grapheme_Link # Mn SUNDANESE SIGN VIRAMA 1BF2..1BF3 ; Grapheme_Link # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN 2D7F ; Grapheme_Link # Mn TIFINAGH CONSONANT JOINER A806 ; Grapheme_Link # Mn SYLOTI NAGRI SIGN HASANTA A8C4 ; Grapheme_Link # Mn SAURASHTRA SIGN VIRAMA A953 ; Grapheme_Link # Mc REJANG VIRAMA A9C0 ; Grapheme_Link # Mc JAVANESE PANGKON +AAF6 ; Grapheme_Link # Mn MEETEI MAYEK VIRAMA ABED ; Grapheme_Link # Mn MEETEI MAYEK APUN IYEK 10A3F ; Grapheme_Link # Mn KHAROSHTHI VIRAMA 11046 ; Grapheme_Link # Mn BRAHMI VIRAMA 110B9 ; Grapheme_Link # Mn KAITHI SIGN VIRAMA +11133..11134 ; Grapheme_Link # Mn [2] CHAKMA VIRAMA..CHAKMA MAAYYAA +111C0 ; Grapheme_Link # Mc SHARADA SIGN VIRAMA +116B6 ; Grapheme_Link # Mc TAKRI SIGN VIRAMA -# Total code points: 31 +# Total code points: 37 # EOF diff --git a/lib/unicore/DNormalizationProps.txt b/lib/unicore/DNormalizationProps.txt index e67276d090..2d71747767 100644 --- a/lib/unicore/DNormalizationProps.txt +++ b/lib/unicore/DNormalizationProps.txt @@ -1,8 +1,8 @@ -# DerivedNormalizationProps-6.0.0.txt -# Date: 2010-05-20, 15:14:12 GMT [MD] +# DerivedNormalizationProps-6.1.0.txt +# Date: 2011-07-26, 04:18:07 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -131,6 +131,7 @@ 33DD ; FC_NFKC; 0077 0062 # So SQUARE WB 33DE ; FC_NFKC; 0076 2215 006D # So SQUARE V OVER M 33DF ; FC_NFKC; 0061 2215 006D # So SQUARE A OVER M +A7F8 ; FC_NFKC; 0127 # Lm MODIFIER LETTER CAPITAL H WITH STROKE 1D400 ; FC_NFKC; 0061 # L& MATHEMATICAL BOLD CAPITAL A 1D401 ; FC_NFKC; 0062 # L& MATHEMATICAL BOLD CAPITAL B 1D402 ; FC_NFKC; 0063 # L& MATHEMATICAL BOLD CAPITAL C @@ -643,9 +644,11 @@ 1F14D ; FC_NFKC; 0073 0073 # So SQUARED SS 1F14E ; FC_NFKC; 0070 0070 0076 # So SQUARED PPV 1F14F ; FC_NFKC; 0077 0063 # So SQUARED WC +1F16A ; FC_NFKC; 006D 0063 # So RAISED MC SIGN +1F16B ; FC_NFKC; 006D 0064 # So RAISED MD SIGN 1F190 ; FC_NFKC; 0064 006A # So SQUARE DJ -# Total code points: 630 +# Total code points: 633 # ================================================ @@ -713,8 +716,7 @@ FA15..FA1E ; Full_Composition_Exclusion # Lo [10] CJK COMPATIBILITY IDEOGRAP FA20 ; Full_Composition_Exclusion # Lo CJK COMPATIBILITY IDEOGRAPH-FA20 FA22 ; Full_Composition_Exclusion # Lo CJK COMPATIBILITY IDEOGRAPH-FA22 FA25..FA26 ; Full_Composition_Exclusion # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26 -FA2A..FA2D ; Full_Composition_Exclusion # Lo [4] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; Full_Composition_Exclusion # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA2A..FA6D ; Full_Composition_Exclusion # Lo [68] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; Full_Composition_Exclusion # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB1D ; Full_Composition_Exclusion # Lo HEBREW LETTER YOD WITH HIRIQ FB1F ; Full_Composition_Exclusion # Lo HEBREW LIGATURE YIDDISH YOD YOD PATAH @@ -728,7 +730,7 @@ FB46..FB4E ; Full_Composition_Exclusion # Lo [9] HEBREW LETTER TSADI WITH D 1D1BB..1D1C0 ; Full_Composition_Exclusion # So [6] MUSICAL SYMBOL MINIMA..MUSICAL SYMBOL FUSA BLACK 2F800..2FA1D ; Full_Composition_Exclusion # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 1118 +# Total code points: 1120 # ================================================ @@ -964,8 +966,7 @@ FA15..FA1E ; NFD_QC; N # Lo [10] CJK COMPATIBILITY IDEOGRAPH-FA15..CJK COMPA FA20 ; NFD_QC; N # Lo CJK COMPATIBILITY IDEOGRAPH-FA20 FA22 ; NFD_QC; N # Lo CJK COMPATIBILITY IDEOGRAPH-FA22 FA25..FA26 ; NFD_QC; N # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26 -FA2A..FA2D ; NFD_QC; N # Lo [4] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; NFD_QC; N # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA2A..FA6D ; NFD_QC; N # Lo [68] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; NFD_QC; N # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB1D ; NFD_QC; N # Lo HEBREW LETTER YOD WITH HIRIQ FB1F ; NFD_QC; N # Lo HEBREW LIGATURE YIDDISH YOD YOD PATAH @@ -978,11 +979,12 @@ FB46..FB4E ; NFD_QC; N # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBREW LET 1109A ; NFD_QC; N # Lo KAITHI LETTER DDDHA 1109C ; NFD_QC; N # Lo KAITHI LETTER RHA 110AB ; NFD_QC; N # Lo KAITHI LETTER VA +1112E..1112F ; NFD_QC; N # Mn [2] CHAKMA VOWEL SIGN O..CHAKMA VOWEL SIGN AU 1D15E..1D164 ; NFD_QC; N # So [7] MUSICAL SYMBOL HALF NOTE..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE 1D1BB..1D1C0 ; NFD_QC; N # So [6] MUSICAL SYMBOL MINIMA..MUSICAL SYMBOL FUSA BLACK 2F800..2FA1D ; NFD_QC; N # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 13221 +# Total code points: 13225 # ================================================ @@ -1058,8 +1060,7 @@ FA15..FA1E ; NFC_QC; N # Lo [10] CJK COMPATIBILITY IDEOGRAPH-FA15..CJK COMPA FA20 ; NFC_QC; N # Lo CJK COMPATIBILITY IDEOGRAPH-FA20 FA22 ; NFC_QC; N # Lo CJK COMPATIBILITY IDEOGRAPH-FA22 FA25..FA26 ; NFC_QC; N # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26 -FA2A..FA2D ; NFC_QC; N # Lo [4] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; NFC_QC; N # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA2A..FA6D ; NFC_QC; N # Lo [68] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; NFC_QC; N # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB1D ; NFC_QC; N # Lo HEBREW LETTER YOD WITH HIRIQ FB1F ; NFC_QC; N # Lo HEBREW LIGATURE YIDDISH YOD YOD PATAH @@ -1073,7 +1074,7 @@ FB46..FB4E ; NFC_QC; N # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBREW LET 1D1BB..1D1C0 ; NFC_QC; N # So [6] MUSICAL SYMBOL MINIMA..MUSICAL SYMBOL FUSA BLACK 2F800..2FA1D ; NFC_QC; N # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 1118 +# Total code points: 1120 # ================================================ @@ -1114,8 +1115,9 @@ FB46..FB4E ; NFC_QC; N # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBREW LET 1B35 ; NFC_QC; M # Mc BALINESE VOWEL SIGN TEDUNG 3099..309A ; NFC_QC; M # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK 110BA ; NFC_QC; M # Mn KAITHI SIGN NUKTA +11127 ; NFC_QC; M # Mn CHAKMA VOWEL SIGN A -# Total code points: 103 +# Total code points: 104 # ================================================ @@ -1132,14 +1134,14 @@ FB46..FB4E ; NFC_QC; N # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBREW LET 00A0 ; NFKD_QC; N # Zs NO-BREAK SPACE 00A8 ; NFKD_QC; N # Sk DIAERESIS -00AA ; NFKD_QC; N # L& FEMININE ORDINAL INDICATOR +00AA ; NFKD_QC; N # Lo FEMININE ORDINAL INDICATOR 00AF ; NFKD_QC; N # Sk MACRON 00B2..00B3 ; NFKD_QC; N # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE 00B4 ; NFKD_QC; N # Sk ACUTE ACCENT 00B5 ; NFKD_QC; N # L& MICRO SIGN 00B8 ; NFKD_QC; N # Sk CEDILLA 00B9 ; NFKD_QC; N # No SUPERSCRIPT ONE -00BA ; NFKD_QC; N # L& MASCULINE ORDINAL INDICATOR +00BA ; NFKD_QC; N # Lo MASCULINE ORDINAL INDICATOR 00BC..00BE ; NFKD_QC; N # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS 00C0..00C5 ; NFKD_QC; N # L& [6] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER A WITH RING ABOVE 00C7..00CF ; NFKD_QC; N # L& [9] LATIN CAPITAL LETTER C WITH CEDILLA..LATIN CAPITAL LETTER I WITH DIAERESIS @@ -1267,8 +1269,7 @@ FB46..FB4E ; NFC_QC; N # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBREW LET 1D2C..1D2E ; NFKD_QC; N # Lm [3] MODIFIER LETTER CAPITAL A..MODIFIER LETTER CAPITAL B 1D30..1D3A ; NFKD_QC; N # Lm [11] MODIFIER LETTER CAPITAL D..MODIFIER LETTER CAPITAL N 1D3C..1D4D ; NFKD_QC; N # Lm [18] MODIFIER LETTER CAPITAL O..MODIFIER LETTER SMALL G -1D4F..1D61 ; NFKD_QC; N # Lm [19] MODIFIER LETTER SMALL K..MODIFIER LETTER SMALL CHI -1D62..1D6A ; NFKD_QC; N # L& [9] LATIN SUBSCRIPT SMALL LETTER I..GREEK SUBSCRIPT SMALL LETTER CHI +1D4F..1D6A ; NFKD_QC; N # Lm [28] MODIFIER LETTER SMALL K..GREEK SUBSCRIPT SMALL LETTER CHI 1D78 ; NFKD_QC; N # Lm MODIFIER LETTER CYRILLIC EN 1D9B..1DBF ; NFKD_QC; N # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA 1E00..1E9B ; NFKD_QC; N # L& [156] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER LONG S WITH DOT ABOVE @@ -1383,8 +1384,7 @@ FB46..FB4E ; NFC_QC; N # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBREW LET 2A0C ; NFKD_QC; N # Sm QUADRUPLE INTEGRAL OPERATOR 2A74..2A76 ; NFKD_QC; N # Sm [3] DOUBLE COLON EQUAL..THREE CONSECUTIVE EQUALS SIGNS 2ADC ; NFKD_QC; N # Sm FORKING -2C7C ; NFKD_QC; N # L& LATIN SUBSCRIPT SMALL LETTER J -2C7D ; NFKD_QC; N # Lm MODIFIER LETTER CAPITAL V +2C7C..2C7D ; NFKD_QC; N # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2D6F ; NFKD_QC; N # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2E9F ; NFKD_QC; N # So CJK RADICAL MOTHER 2EF3 ; NFKD_QC; N # So CJK RADICAL C-SIMPLIFIED TURTLE @@ -1455,6 +1455,7 @@ FB46..FB4E ; NFC_QC; N # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBREW LET 32C0..32FE ; NFKD_QC; N # So [63] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..CIRCLED KATAKANA WO 3300..33FF ; NFKD_QC; N # So [256] SQUARE APAATO..SQUARE GAL A770 ; NFKD_QC; N # Lm MODIFIER LETTER US +A7F8..A7F9 ; NFKD_QC; N # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE AC00..D7A3 ; NFKD_QC; N # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH F900..FA0D ; NFKD_QC; N # Lo [270] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA0D FA10 ; NFKD_QC; N # Lo CJK COMPATIBILITY IDEOGRAPH-FA10 @@ -1463,8 +1464,7 @@ FA15..FA1E ; NFKD_QC; N # Lo [10] CJK COMPATIBILITY IDEOGRAPH-FA15..CJK COMP FA20 ; NFKD_QC; N # Lo CJK COMPATIBILITY IDEOGRAPH-FA20 FA22 ; NFKD_QC; N # Lo CJK COMPATIBILITY IDEOGRAPH-FA22 FA25..FA26 ; NFKD_QC; N # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26 -FA2A..FA2D ; NFKD_QC; N # Lo [4] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; NFKD_QC; N # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA2A..FA6D ; NFKD_QC; N # Lo [68] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; NFKD_QC; N # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB00..FB06 ; NFKD_QC; N # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; NFKD_QC; N # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -1580,6 +1580,7 @@ FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 1109A ; NFKD_QC; N # Lo KAITHI LETTER DDDHA 1109C ; NFKD_QC; N # Lo KAITHI LETTER RHA 110AB ; NFKD_QC; N # Lo KAITHI LETTER VA +1112E..1112F ; NFKD_QC; N # Mn [2] CHAKMA VOWEL SIGN O..CHAKMA VOWEL SIGN AU 1D15E..1D164 ; NFKD_QC; N # So [7] MUSICAL SYMBOL HALF NOTE..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE 1D1BB..1D1C0 ; NFKD_QC; N # So [6] MUSICAL SYMBOL MINIMA..MUSICAL SYMBOL FUSA BLACK 1D400..1D454 ; NFKD_QC; N # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G @@ -1623,9 +1624,43 @@ FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 1D7C3 ; NFKD_QC; N # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL 1D7C4..1D7CB ; NFKD_QC; N # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1D7CE..1D7FF ; NFKD_QC; N # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1EE00..1EE03 ; NFKD_QC; N # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; NFKD_QC; N # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; NFKD_QC; N # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; NFKD_QC; N # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; NFKD_QC; N # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; NFKD_QC; N # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; NFKD_QC; N # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; NFKD_QC; N # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; NFKD_QC; N # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; NFKD_QC; N # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; NFKD_QC; N # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; NFKD_QC; N # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; NFKD_QC; N # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; NFKD_QC; N # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; NFKD_QC; N # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; NFKD_QC; N # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; NFKD_QC; N # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; NFKD_QC; N # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; NFKD_QC; N # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; NFKD_QC; N # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; NFKD_QC; N # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; NFKD_QC; N # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; NFKD_QC; N # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; NFKD_QC; N # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; NFKD_QC; N # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; NFKD_QC; N # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; NFKD_QC; N # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; NFKD_QC; N # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; NFKD_QC; N # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; NFKD_QC; N # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; NFKD_QC; N # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; NFKD_QC; N # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; NFKD_QC; N # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 1F100..1F10A ; NFKD_QC; N # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA 1F110..1F12E ; NFKD_QC; N # So [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ 1F130..1F14F ; NFKD_QC; N # So [32] SQUARED LATIN CAPITAL LETTER A..SQUARED WC +1F16A..1F16B ; NFKD_QC; N # So [2] RAISED MC SIGN..RAISED MD SIGN 1F190 ; NFKD_QC; N # So SQUARE DJ 1F200..1F202 ; NFKD_QC; N # So [3] SQUARE HIRAGANA HOKA..SQUARED KATAKANA SA 1F210..1F23A ; NFKD_QC; N # So [43] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-55B6 @@ -1633,7 +1668,7 @@ FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 1F250..1F251 ; NFKD_QC; N # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT 2F800..2FA1D ; NFKD_QC; N # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 16731 +# Total code points: 16880 # ================================================ @@ -1650,14 +1685,14 @@ FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 00A0 ; NFKC_QC; N # Zs NO-BREAK SPACE 00A8 ; NFKC_QC; N # Sk DIAERESIS -00AA ; NFKC_QC; N # L& FEMININE ORDINAL INDICATOR +00AA ; NFKC_QC; N # Lo FEMININE ORDINAL INDICATOR 00AF ; NFKC_QC; N # Sk MACRON 00B2..00B3 ; NFKC_QC; N # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE 00B4 ; NFKC_QC; N # Sk ACUTE ACCENT 00B5 ; NFKC_QC; N # L& MICRO SIGN 00B8 ; NFKC_QC; N # Sk CEDILLA 00B9 ; NFKC_QC; N # No SUPERSCRIPT ONE -00BA ; NFKC_QC; N # L& MASCULINE ORDINAL INDICATOR +00BA ; NFKC_QC; N # Lo MASCULINE ORDINAL INDICATOR 00BC..00BE ; NFKC_QC; N # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS 0132..0133 ; NFKC_QC; N # L& [2] LATIN CAPITAL LIGATURE IJ..LATIN SMALL LIGATURE IJ 013F..0140 ; NFKC_QC; N # L& [2] LATIN CAPITAL LETTER L WITH MIDDLE DOT..LATIN SMALL LETTER L WITH MIDDLE DOT @@ -1712,8 +1747,7 @@ FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 1D2C..1D2E ; NFKC_QC; N # Lm [3] MODIFIER LETTER CAPITAL A..MODIFIER LETTER CAPITAL B 1D30..1D3A ; NFKC_QC; N # Lm [11] MODIFIER LETTER CAPITAL D..MODIFIER LETTER CAPITAL N 1D3C..1D4D ; NFKC_QC; N # Lm [18] MODIFIER LETTER CAPITAL O..MODIFIER LETTER SMALL G -1D4F..1D61 ; NFKC_QC; N # Lm [19] MODIFIER LETTER SMALL K..MODIFIER LETTER SMALL CHI -1D62..1D6A ; NFKC_QC; N # L& [9] LATIN SUBSCRIPT SMALL LETTER I..GREEK SUBSCRIPT SMALL LETTER CHI +1D4F..1D6A ; NFKC_QC; N # Lm [28] MODIFIER LETTER SMALL K..GREEK SUBSCRIPT SMALL LETTER CHI 1D78 ; NFKC_QC; N # Lm MODIFIER LETTER CYRILLIC EN 1D9B..1DBF ; NFKC_QC; N # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA 1E9A..1E9B ; NFKC_QC; N # L& [2] LATIN SMALL LETTER A WITH RIGHT HALF RING..LATIN SMALL LETTER LONG S WITH DOT ABOVE @@ -1801,8 +1835,7 @@ FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 2A0C ; NFKC_QC; N # Sm QUADRUPLE INTEGRAL OPERATOR 2A74..2A76 ; NFKC_QC; N # Sm [3] DOUBLE COLON EQUAL..THREE CONSECUTIVE EQUALS SIGNS 2ADC ; NFKC_QC; N # Sm FORKING -2C7C ; NFKC_QC; N # L& LATIN SUBSCRIPT SMALL LETTER J -2C7D ; NFKC_QC; N # Lm MODIFIER LETTER CAPITAL V +2C7C..2C7D ; NFKC_QC; N # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2D6F ; NFKC_QC; N # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2E9F ; NFKC_QC; N # So CJK RADICAL MOTHER 2EF3 ; NFKC_QC; N # So CJK RADICAL C-SIMPLIFIED TURTLE @@ -1828,6 +1861,7 @@ FFED..FFEE ; NFKD_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 32C0..32FE ; NFKC_QC; N # So [63] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..CIRCLED KATAKANA WO 3300..33FF ; NFKC_QC; N # So [256] SQUARE APAATO..SQUARE GAL A770 ; NFKC_QC; N # Lm MODIFIER LETTER US +A7F8..A7F9 ; NFKC_QC; N # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE F900..FA0D ; NFKC_QC; N # Lo [270] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA0D FA10 ; NFKC_QC; N # Lo CJK COMPATIBILITY IDEOGRAPH-FA10 FA12 ; NFKC_QC; N # Lo CJK COMPATIBILITY IDEOGRAPH-FA12 @@ -1835,8 +1869,7 @@ FA15..FA1E ; NFKC_QC; N # Lo [10] CJK COMPATIBILITY IDEOGRAPH-FA15..CJK COMP FA20 ; NFKC_QC; N # Lo CJK COMPATIBILITY IDEOGRAPH-FA20 FA22 ; NFKC_QC; N # Lo CJK COMPATIBILITY IDEOGRAPH-FA22 FA25..FA26 ; NFKC_QC; N # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26 -FA2A..FA2D ; NFKC_QC; N # Lo [4] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; NFKC_QC; N # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA2A..FA6D ; NFKC_QC; N # Lo [68] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; NFKC_QC; N # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB00..FB06 ; NFKC_QC; N # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; NFKC_QC; N # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -1992,9 +2025,43 @@ FFED..FFEE ; NFKC_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 1D7C3 ; NFKC_QC; N # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL 1D7C4..1D7CB ; NFKC_QC; N # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1D7CE..1D7FF ; NFKC_QC; N # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1EE00..1EE03 ; NFKC_QC; N # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; NFKC_QC; N # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; NFKC_QC; N # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; NFKC_QC; N # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; NFKC_QC; N # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; NFKC_QC; N # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; NFKC_QC; N # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; NFKC_QC; N # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; NFKC_QC; N # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; NFKC_QC; N # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; NFKC_QC; N # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; NFKC_QC; N # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; NFKC_QC; N # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; NFKC_QC; N # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; NFKC_QC; N # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; NFKC_QC; N # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; NFKC_QC; N # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; NFKC_QC; N # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; NFKC_QC; N # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; NFKC_QC; N # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; NFKC_QC; N # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; NFKC_QC; N # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; NFKC_QC; N # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; NFKC_QC; N # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; NFKC_QC; N # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; NFKC_QC; N # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; NFKC_QC; N # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; NFKC_QC; N # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; NFKC_QC; N # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; NFKC_QC; N # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; NFKC_QC; N # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; NFKC_QC; N # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; NFKC_QC; N # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 1F100..1F10A ; NFKC_QC; N # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA 1F110..1F12E ; NFKC_QC; N # So [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ 1F130..1F14F ; NFKC_QC; N # So [32] SQUARED LATIN CAPITAL LETTER A..SQUARED WC +1F16A..1F16B ; NFKC_QC; N # So [2] RAISED MC SIGN..RAISED MD SIGN 1F190 ; NFKC_QC; N # So SQUARE DJ 1F200..1F202 ; NFKC_QC; N # So [3] SQUARE HIRAGANA HOKA..SQUARED KATAKANA SA 1F210..1F23A ; NFKC_QC; N # So [43] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-55B6 @@ -2002,7 +2069,7 @@ FFED..FFEE ; NFKC_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 1F250..1F251 ; NFKC_QC; N # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT 2F800..2FA1D ; NFKC_QC; N # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 4640 +# Total code points: 4787 # ================================================ @@ -2043,8 +2110,9 @@ FFED..FFEE ; NFKC_QC; N # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CI 1B35 ; NFKC_QC; M # Mc BALINESE VOWEL SIGN TEDUNG 3099..309A ; NFKC_QC; M # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK 110BA ; NFKC_QC; M # Mn KAITHI SIGN NUKTA +11127 ; NFKC_QC; M # Mn CHAKMA VOWEL SIGN A -# Total code points: 103 +# Total code points: 104 # ================================================ @@ -2269,10 +2337,11 @@ FB46..FB4E ; Expands_On_NFD # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBRE 1109A ; Expands_On_NFD # Lo KAITHI LETTER DDDHA 1109C ; Expands_On_NFD # Lo KAITHI LETTER RHA 110AB ; Expands_On_NFD # Lo KAITHI LETTER VA +1112E..1112F ; Expands_On_NFD # Mn [2] CHAKMA VOWEL SIGN O..CHAKMA VOWEL SIGN AU 1D15E..1D164 ; Expands_On_NFD # So [7] MUSICAL SYMBOL HALF NOTE..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE 1D1BB..1D1C0 ; Expands_On_NFD # So [6] MUSICAL SYMBOL MINIMA..MUSICAL SYMBOL FUSA BLACK -# Total code points: 12206 +# Total code points: 12208 # ================================================ @@ -2617,18 +2686,20 @@ FFE3 ; Expands_On_NFKD # Sk FULLWIDTH MACRON 1109A ; Expands_On_NFKD # Lo KAITHI LETTER DDDHA 1109C ; Expands_On_NFKD # Lo KAITHI LETTER RHA 110AB ; Expands_On_NFKD # Lo KAITHI LETTER VA +1112E..1112F ; Expands_On_NFKD # Mn [2] CHAKMA VOWEL SIGN O..CHAKMA VOWEL SIGN AU 1D15E..1D164 ; Expands_On_NFKD # So [7] MUSICAL SYMBOL HALF NOTE..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE 1D1BB..1D1C0 ; Expands_On_NFKD # So [6] MUSICAL SYMBOL MINIMA..MUSICAL SYMBOL FUSA BLACK 1F100..1F10A ; Expands_On_NFKD # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA 1F110..1F12A ; Expands_On_NFKD # So [27] PARENTHESIZED LATIN CAPITAL LETTER A..TORTOISE SHELL BRACKETED LATIN CAPITAL LETTER S 1F12D..1F12E ; Expands_On_NFKD # So [2] CIRCLED CD..CIRCLED WZ 1F14A..1F14F ; Expands_On_NFKD # So [6] SQUARED HV..SQUARED WC +1F16A..1F16B ; Expands_On_NFKD # So [2] RAISED MC SIGN..RAISED MD SIGN 1F190 ; Expands_On_NFKD # So SQUARE DJ 1F200..1F201 ; Expands_On_NFKD # So [2] SQUARE HIRAGANA HOKA..SQUARED KATAKANA KOKO 1F213 ; Expands_On_NFKD # So SQUARED KATAKANA DE 1F240..1F248 ; Expands_On_NFKD # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 -# Total code points: 13376 +# Total code points: 13380 # ================================================ @@ -2761,11 +2832,12 @@ FFE3 ; Expands_On_NFKC # Sk FULLWIDTH MACRON 1F110..1F12A ; Expands_On_NFKC # So [27] PARENTHESIZED LATIN CAPITAL LETTER A..TORTOISE SHELL BRACKETED LATIN CAPITAL LETTER S 1F12D..1F12E ; Expands_On_NFKC # So [2] CIRCLED CD..CIRCLED WZ 1F14A..1F14F ; Expands_On_NFKC # So [6] SQUARED HV..SQUARED WC +1F16A..1F16B ; Expands_On_NFKC # So [2] RAISED MC SIGN..RAISED MD SIGN 1F190 ; Expands_On_NFKC # So SQUARE DJ 1F200..1F201 ; Expands_On_NFKC # So [2] SQUARE HIRAGANA HOKA..SQUARED KATAKANA KOKO 1F240..1F248 ; Expands_On_NFKC # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 -# Total code points: 1233 +# Total code points: 1235 # ================================================ @@ -2810,7 +2882,7 @@ FFE3 ; Expands_On_NFKC # Sk FULLWIDTH MACRON 005A ; NFKC_CF; 007A # L& LATIN CAPITAL LETTER Z 00A0 ; NFKC_CF; 0020 # Zs NO-BREAK SPACE 00A8 ; NFKC_CF; 0020 0308 # Sk DIAERESIS -00AA ; NFKC_CF; 0061 # L& FEMININE ORDINAL INDICATOR +00AA ; NFKC_CF; 0061 # Lo FEMININE ORDINAL INDICATOR 00AD ; NFKC_CF; # Cf SOFT HYPHEN 00AF ; NFKC_CF; 0020 0304 # Sk MACRON 00B2 ; NFKC_CF; 0032 # No SUPERSCRIPT TWO @@ -2819,7 +2891,7 @@ FFE3 ; Expands_On_NFKC # Sk FULLWIDTH MACRON 00B5 ; NFKC_CF; 03BC # L& MICRO SIGN 00B8 ; NFKC_CF; 0020 0327 # Sk CEDILLA 00B9 ; NFKC_CF; 0031 # No SUPERSCRIPT ONE -00BA ; NFKC_CF; 006F # L& MASCULINE ORDINAL INDICATOR +00BA ; NFKC_CF; 006F # Lo MASCULINE ORDINAL INDICATOR 00BC ; NFKC_CF; 0031 2044 0034 # No VULGAR FRACTION ONE QUARTER 00BD ; NFKC_CF; 0031 2044 0032 # No VULGAR FRACTION ONE HALF 00BE ; NFKC_CF; 0033 2044 0034 # No VULGAR FRACTION THREE QUARTERS @@ -3390,9 +3462,11 @@ FFE3 ; Expands_On_NFKC # Sk FULLWIDTH MACRON 10C3 ; NFKC_CF; 2D23 # L& GEORGIAN CAPITAL LETTER WE 10C4 ; NFKC_CF; 2D24 # L& GEORGIAN CAPITAL LETTER HAR 10C5 ; NFKC_CF; 2D25 # L& GEORGIAN CAPITAL LETTER HOE +10C7 ; NFKC_CF; 2D27 # L& GEORGIAN CAPITAL LETTER YN +10CD ; NFKC_CF; 2D2D # L& GEORGIAN CAPITAL LETTER AEN 10FC ; NFKC_CF; 10DC # Lm MODIFIER LETTER GEORGIAN NAR 115F..1160 ; NFKC_CF; # Lo [2] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG FILLER -17B4..17B5 ; NFKC_CF; # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B4..17B5 ; NFKC_CF; # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 180B..180D ; NFKC_CF; # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE 1D2C ; NFKC_CF; 0061 # Lm MODIFIER LETTER CAPITAL A 1D2D ; NFKC_CF; 00E6 # Lm MODIFIER LETTER CAPITAL AE @@ -3445,15 +3519,15 @@ FFE3 ; Expands_On_NFKC # Sk FULLWIDTH MACRON 1D5F ; NFKC_CF; 03B4 # Lm MODIFIER LETTER SMALL DELTA 1D60 ; NFKC_CF; 03C6 # Lm MODIFIER LETTER SMALL GREEK PHI 1D61 ; NFKC_CF; 03C7 # Lm MODIFIER LETTER SMALL CHI -1D62 ; NFKC_CF; 0069 # L& LATIN SUBSCRIPT SMALL LETTER I -1D63 ; NFKC_CF; 0072 # L& LATIN SUBSCRIPT SMALL LETTER R -1D64 ; NFKC_CF; 0075 # L& LATIN SUBSCRIPT SMALL LETTER U -1D65 ; NFKC_CF; 0076 # L& LATIN SUBSCRIPT SMALL LETTER V -1D66 ; NFKC_CF; 03B2 # L& GREEK SUBSCRIPT SMALL LETTER BETA -1D67 ; NFKC_CF; 03B3 # L& GREEK SUBSCRIPT SMALL LETTER GAMMA -1D68 ; NFKC_CF; 03C1 # L& GREEK SUBSCRIPT SMALL LETTER RHO -1D69 ; NFKC_CF; 03C6 # L& GREEK SUBSCRIPT SMALL LETTER PHI -1D6A ; NFKC_CF; 03C7 # L& GREEK SUBSCRIPT SMALL LETTER CHI +1D62 ; NFKC_CF; 0069 # Lm LATIN SUBSCRIPT SMALL LETTER I +1D63 ; NFKC_CF; 0072 # Lm LATIN SUBSCRIPT SMALL LETTER R +1D64 ; NFKC_CF; 0075 # Lm LATIN SUBSCRIPT SMALL LETTER U +1D65 ; NFKC_CF; 0076 # Lm LATIN SUBSCRIPT SMALL LETTER V +1D66 ; NFKC_CF; 03B2 # Lm GREEK SUBSCRIPT SMALL LETTER BETA +1D67 ; NFKC_CF; 03B3 # Lm GREEK SUBSCRIPT SMALL LETTER GAMMA +1D68 ; NFKC_CF; 03C1 # Lm GREEK SUBSCRIPT SMALL LETTER RHO +1D69 ; NFKC_CF; 03C6 # Lm GREEK SUBSCRIPT SMALL LETTER PHI +1D6A ; NFKC_CF; 03C7 # Lm GREEK SUBSCRIPT SMALL LETTER CHI 1D78 ; NFKC_CF; 043D # Lm MODIFIER LETTER CYRILLIC EN 1D9B ; NFKC_CF; 0252 # Lm MODIFIER LETTER SMALL TURNED ALPHA 1D9C ; NFKC_CF; 0063 # Lm MODIFIER LETTER SMALL C @@ -4148,7 +4222,7 @@ FFE3 ; Expands_On_NFKC # Sk FULLWIDTH MACRON 2C70 ; NFKC_CF; 0252 # L& LATIN CAPITAL LETTER TURNED ALPHA 2C72 ; NFKC_CF; 2C73 # L& LATIN CAPITAL LETTER W WITH HOOK 2C75 ; NFKC_CF; 2C76 # L& LATIN CAPITAL LETTER HALF H -2C7C ; NFKC_CF; 006A # L& LATIN SUBSCRIPT SMALL LETTER J +2C7C ; NFKC_CF; 006A # Lm LATIN SUBSCRIPT SMALL LETTER J 2C7D ; NFKC_CF; 0076 # Lm MODIFIER LETTER CAPITAL V 2C7E ; NFKC_CF; 023F # L& LATIN CAPITAL LETTER S WITH SWASH TAIL 2C7F ; NFKC_CF; 0240 # L& LATIN CAPITAL LETTER Z WITH SWASH TAIL @@ -4204,6 +4278,7 @@ FFE3 ; Expands_On_NFKC # Sk FULLWIDTH MACRON 2CE2 ; NFKC_CF; 2CE3 # L& COPTIC CAPITAL LETTER OLD NUBIAN WAU 2CEB ; NFKC_CF; 2CEC # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI 2CED ; NFKC_CF; 2CEE # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA +2CF2 ; NFKC_CF; 2CF3 # L& COPTIC CAPITAL LETTER BOHAIRIC KHEI 2D6F ; NFKC_CF; 2D61 # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2E9F ; NFKC_CF; 6BCD # So CJK RADICAL MOTHER 2EF3 ; NFKC_CF; 9F9F # So CJK RADICAL C-SIMPLIFIED TURTLE @@ -5124,11 +5199,15 @@ A786 ; NFKC_CF; A787 # L& LATIN CAPITAL LETTER INSULAR A78B ; NFKC_CF; A78C # L& LATIN CAPITAL LETTER SALTILLO A78D ; NFKC_CF; 0265 # L& LATIN CAPITAL LETTER TURNED H A790 ; NFKC_CF; A791 # L& LATIN CAPITAL LETTER N WITH DESCENDER +A792 ; NFKC_CF; A793 # L& LATIN CAPITAL LETTER C WITH BAR A7A0 ; NFKC_CF; A7A1 # L& LATIN CAPITAL LETTER G WITH OBLIQUE STROKE A7A2 ; NFKC_CF; A7A3 # L& LATIN CAPITAL LETTER K WITH OBLIQUE STROKE A7A4 ; NFKC_CF; A7A5 # L& LATIN CAPITAL LETTER N WITH OBLIQUE STROKE A7A6 ; NFKC_CF; A7A7 # L& LATIN CAPITAL LETTER R WITH OBLIQUE STROKE A7A8 ; NFKC_CF; A7A9 # L& LATIN CAPITAL LETTER S WITH OBLIQUE STROKE +A7AA ; NFKC_CF; 0266 # L& LATIN CAPITAL LETTER H WITH HOOK +A7F8 ; NFKC_CF; 0127 # Lm MODIFIER LETTER CAPITAL H WITH STROKE +A7F9 ; NFKC_CF; 0153 # Lm MODIFIER LETTER SMALL LIGATURE OE F900 ; NFKC_CF; 8C48 # Lo CJK COMPATIBILITY IDEOGRAPH-F900 F901 ; NFKC_CF; 66F4 # Lo CJK COMPATIBILITY IDEOGRAPH-F901 F902 ; NFKC_CF; 8ECA # Lo CJK COMPATIBILITY IDEOGRAPH-F902 @@ -5418,6 +5497,8 @@ FA2A ; NFKC_CF; 98EF # Lo CJK COMPATIBILITY IDEOGRAPH-F FA2B ; NFKC_CF; 98FC # Lo CJK COMPATIBILITY IDEOGRAPH-FA2B FA2C ; NFKC_CF; 9928 # Lo CJK COMPATIBILITY IDEOGRAPH-FA2C FA2D ; NFKC_CF; 9DB4 # Lo CJK COMPATIBILITY IDEOGRAPH-FA2D +FA2E ; NFKC_CF; 90DE # Lo CJK COMPATIBILITY IDEOGRAPH-FA2E +FA2F ; NFKC_CF; 96B7 # Lo CJK COMPATIBILITY IDEOGRAPH-FA2F FA30 ; NFKC_CF; 4FAE # Lo CJK COMPATIBILITY IDEOGRAPH-FA30 FA31 ; NFKC_CF; 50E7 # Lo CJK COMPATIBILITY IDEOGRAPH-FA31 FA32 ; NFKC_CF; 514D # Lo CJK COMPATIBILITY IDEOGRAPH-FA32 @@ -7507,6 +7588,147 @@ FFF0..FFF8 ; NFKC_CF; # Cn [9] <reserved-FFF0>..<reserved-FF 1D7FD ; NFKC_CF; 0037 # Nd MATHEMATICAL MONOSPACE DIGIT SEVEN 1D7FE ; NFKC_CF; 0038 # Nd MATHEMATICAL MONOSPACE DIGIT EIGHT 1D7FF ; NFKC_CF; 0039 # Nd MATHEMATICAL MONOSPACE DIGIT NINE +1EE00 ; NFKC_CF; 0627 # Lo ARABIC MATHEMATICAL ALEF +1EE01 ; NFKC_CF; 0628 # Lo ARABIC MATHEMATICAL BEH +1EE02 ; NFKC_CF; 062C # Lo ARABIC MATHEMATICAL JEEM +1EE03 ; NFKC_CF; 062F # Lo ARABIC MATHEMATICAL DAL +1EE05 ; NFKC_CF; 0648 # Lo ARABIC MATHEMATICAL WAW +1EE06 ; NFKC_CF; 0632 # Lo ARABIC MATHEMATICAL ZAIN +1EE07 ; NFKC_CF; 062D # Lo ARABIC MATHEMATICAL HAH +1EE08 ; NFKC_CF; 0637 # Lo ARABIC MATHEMATICAL TAH +1EE09 ; NFKC_CF; 064A # Lo ARABIC MATHEMATICAL YEH +1EE0A ; NFKC_CF; 0643 # Lo ARABIC MATHEMATICAL KAF +1EE0B ; NFKC_CF; 0644 # Lo ARABIC MATHEMATICAL LAM +1EE0C ; NFKC_CF; 0645 # Lo ARABIC MATHEMATICAL MEEM +1EE0D ; NFKC_CF; 0646 # Lo ARABIC MATHEMATICAL NOON +1EE0E ; NFKC_CF; 0633 # Lo ARABIC MATHEMATICAL SEEN +1EE0F ; NFKC_CF; 0639 # Lo ARABIC MATHEMATICAL AIN +1EE10 ; NFKC_CF; 0641 # Lo ARABIC MATHEMATICAL FEH +1EE11 ; NFKC_CF; 0635 # Lo ARABIC MATHEMATICAL SAD +1EE12 ; NFKC_CF; 0642 # Lo ARABIC MATHEMATICAL QAF +1EE13 ; NFKC_CF; 0631 # Lo ARABIC MATHEMATICAL REH +1EE14 ; NFKC_CF; 0634 # Lo ARABIC MATHEMATICAL SHEEN +1EE15 ; NFKC_CF; 062A # Lo ARABIC MATHEMATICAL TEH +1EE16 ; NFKC_CF; 062B # Lo ARABIC MATHEMATICAL THEH +1EE17 ; NFKC_CF; 062E # Lo ARABIC MATHEMATICAL KHAH +1EE18 ; NFKC_CF; 0630 # Lo ARABIC MATHEMATICAL THAL +1EE19 ; NFKC_CF; 0636 # Lo ARABIC MATHEMATICAL DAD +1EE1A ; NFKC_CF; 0638 # Lo ARABIC MATHEMATICAL ZAH +1EE1B ; NFKC_CF; 063A # Lo ARABIC MATHEMATICAL GHAIN +1EE1C ; NFKC_CF; 066E # Lo ARABIC MATHEMATICAL DOTLESS BEH +1EE1D ; NFKC_CF; 06BA # Lo ARABIC MATHEMATICAL DOTLESS NOON +1EE1E ; NFKC_CF; 06A1 # Lo ARABIC MATHEMATICAL DOTLESS FEH +1EE1F ; NFKC_CF; 066F # Lo ARABIC MATHEMATICAL DOTLESS QAF +1EE21 ; NFKC_CF; 0628 # Lo ARABIC MATHEMATICAL INITIAL BEH +1EE22 ; NFKC_CF; 062C # Lo ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; NFKC_CF; 0647 # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; NFKC_CF; 062D # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29 ; NFKC_CF; 064A # Lo ARABIC MATHEMATICAL INITIAL YEH +1EE2A ; NFKC_CF; 0643 # Lo ARABIC MATHEMATICAL INITIAL KAF +1EE2B ; NFKC_CF; 0644 # Lo ARABIC MATHEMATICAL INITIAL LAM +1EE2C ; NFKC_CF; 0645 # Lo ARABIC MATHEMATICAL INITIAL MEEM +1EE2D ; NFKC_CF; 0646 # Lo ARABIC MATHEMATICAL INITIAL NOON +1EE2E ; NFKC_CF; 0633 # Lo ARABIC MATHEMATICAL INITIAL SEEN +1EE2F ; NFKC_CF; 0639 # Lo ARABIC MATHEMATICAL INITIAL AIN +1EE30 ; NFKC_CF; 0641 # Lo ARABIC MATHEMATICAL INITIAL FEH +1EE31 ; NFKC_CF; 0635 # Lo ARABIC MATHEMATICAL INITIAL SAD +1EE32 ; NFKC_CF; 0642 # Lo ARABIC MATHEMATICAL INITIAL QAF +1EE34 ; NFKC_CF; 0634 # Lo ARABIC MATHEMATICAL INITIAL SHEEN +1EE35 ; NFKC_CF; 062A # Lo ARABIC MATHEMATICAL INITIAL TEH +1EE36 ; NFKC_CF; 062B # Lo ARABIC MATHEMATICAL INITIAL THEH +1EE37 ; NFKC_CF; 062E # Lo ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; NFKC_CF; 0636 # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; NFKC_CF; 063A # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; NFKC_CF; 062C # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; NFKC_CF; 062D # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; NFKC_CF; 064A # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; NFKC_CF; 0644 # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D ; NFKC_CF; 0646 # Lo ARABIC MATHEMATICAL TAILED NOON +1EE4E ; NFKC_CF; 0633 # Lo ARABIC MATHEMATICAL TAILED SEEN +1EE4F ; NFKC_CF; 0639 # Lo ARABIC MATHEMATICAL TAILED AIN +1EE51 ; NFKC_CF; 0635 # Lo ARABIC MATHEMATICAL TAILED SAD +1EE52 ; NFKC_CF; 0642 # Lo ARABIC MATHEMATICAL TAILED QAF +1EE54 ; NFKC_CF; 0634 # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; NFKC_CF; 062E # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; NFKC_CF; 0636 # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; NFKC_CF; 063A # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; NFKC_CF; 06BA # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; NFKC_CF; 066F # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61 ; NFKC_CF; 0628 # Lo ARABIC MATHEMATICAL STRETCHED BEH +1EE62 ; NFKC_CF; 062C # Lo ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; NFKC_CF; 0647 # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67 ; NFKC_CF; 062D # Lo ARABIC MATHEMATICAL STRETCHED HAH +1EE68 ; NFKC_CF; 0637 # Lo ARABIC MATHEMATICAL STRETCHED TAH +1EE69 ; NFKC_CF; 064A # Lo ARABIC MATHEMATICAL STRETCHED YEH +1EE6A ; NFKC_CF; 0643 # Lo ARABIC MATHEMATICAL STRETCHED KAF +1EE6C ; NFKC_CF; 0645 # Lo ARABIC MATHEMATICAL STRETCHED MEEM +1EE6D ; NFKC_CF; 0646 # Lo ARABIC MATHEMATICAL STRETCHED NOON +1EE6E ; NFKC_CF; 0633 # Lo ARABIC MATHEMATICAL STRETCHED SEEN +1EE6F ; NFKC_CF; 0639 # Lo ARABIC MATHEMATICAL STRETCHED AIN +1EE70 ; NFKC_CF; 0641 # Lo ARABIC MATHEMATICAL STRETCHED FEH +1EE71 ; NFKC_CF; 0635 # Lo ARABIC MATHEMATICAL STRETCHED SAD +1EE72 ; NFKC_CF; 0642 # Lo ARABIC MATHEMATICAL STRETCHED QAF +1EE74 ; NFKC_CF; 0634 # Lo ARABIC MATHEMATICAL STRETCHED SHEEN +1EE75 ; NFKC_CF; 062A # Lo ARABIC MATHEMATICAL STRETCHED TEH +1EE76 ; NFKC_CF; 062B # Lo ARABIC MATHEMATICAL STRETCHED THEH +1EE77 ; NFKC_CF; 062E # Lo ARABIC MATHEMATICAL STRETCHED KHAH +1EE79 ; NFKC_CF; 0636 # Lo ARABIC MATHEMATICAL STRETCHED DAD +1EE7A ; NFKC_CF; 0638 # Lo ARABIC MATHEMATICAL STRETCHED ZAH +1EE7B ; NFKC_CF; 063A # Lo ARABIC MATHEMATICAL STRETCHED GHAIN +1EE7C ; NFKC_CF; 066E # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; NFKC_CF; 06A1 # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80 ; NFKC_CF; 0627 # Lo ARABIC MATHEMATICAL LOOPED ALEF +1EE81 ; NFKC_CF; 0628 # Lo ARABIC MATHEMATICAL LOOPED BEH +1EE82 ; NFKC_CF; 062C # Lo ARABIC MATHEMATICAL LOOPED JEEM +1EE83 ; NFKC_CF; 062F # Lo ARABIC MATHEMATICAL LOOPED DAL +1EE84 ; NFKC_CF; 0647 # Lo ARABIC MATHEMATICAL LOOPED HEH +1EE85 ; NFKC_CF; 0648 # Lo ARABIC MATHEMATICAL LOOPED WAW +1EE86 ; NFKC_CF; 0632 # Lo ARABIC MATHEMATICAL LOOPED ZAIN +1EE87 ; NFKC_CF; 062D # Lo ARABIC MATHEMATICAL LOOPED HAH +1EE88 ; NFKC_CF; 0637 # Lo ARABIC MATHEMATICAL LOOPED TAH +1EE89 ; NFKC_CF; 064A # Lo ARABIC MATHEMATICAL LOOPED YEH +1EE8B ; NFKC_CF; 0644 # Lo ARABIC MATHEMATICAL LOOPED LAM +1EE8C ; NFKC_CF; 0645 # Lo ARABIC MATHEMATICAL LOOPED MEEM +1EE8D ; NFKC_CF; 0646 # Lo ARABIC MATHEMATICAL LOOPED NOON +1EE8E ; NFKC_CF; 0633 # Lo ARABIC MATHEMATICAL LOOPED SEEN +1EE8F ; NFKC_CF; 0639 # Lo ARABIC MATHEMATICAL LOOPED AIN +1EE90 ; NFKC_CF; 0641 # Lo ARABIC MATHEMATICAL LOOPED FEH +1EE91 ; NFKC_CF; 0635 # Lo ARABIC MATHEMATICAL LOOPED SAD +1EE92 ; NFKC_CF; 0642 # Lo ARABIC MATHEMATICAL LOOPED QAF +1EE93 ; NFKC_CF; 0631 # Lo ARABIC MATHEMATICAL LOOPED REH +1EE94 ; NFKC_CF; 0634 # Lo ARABIC MATHEMATICAL LOOPED SHEEN +1EE95 ; NFKC_CF; 062A # Lo ARABIC MATHEMATICAL LOOPED TEH +1EE96 ; NFKC_CF; 062B # Lo ARABIC MATHEMATICAL LOOPED THEH +1EE97 ; NFKC_CF; 062E # Lo ARABIC MATHEMATICAL LOOPED KHAH +1EE98 ; NFKC_CF; 0630 # Lo ARABIC MATHEMATICAL LOOPED THAL +1EE99 ; NFKC_CF; 0636 # Lo ARABIC MATHEMATICAL LOOPED DAD +1EE9A ; NFKC_CF; 0638 # Lo ARABIC MATHEMATICAL LOOPED ZAH +1EE9B ; NFKC_CF; 063A # Lo ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1 ; NFKC_CF; 0628 # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK BEH +1EEA2 ; NFKC_CF; 062C # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK JEEM +1EEA3 ; NFKC_CF; 062F # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5 ; NFKC_CF; 0648 # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK WAW +1EEA6 ; NFKC_CF; 0632 # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK ZAIN +1EEA7 ; NFKC_CF; 062D # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK HAH +1EEA8 ; NFKC_CF; 0637 # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK TAH +1EEA9 ; NFKC_CF; 064A # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB ; NFKC_CF; 0644 # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK LAM +1EEAC ; NFKC_CF; 0645 # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK MEEM +1EEAD ; NFKC_CF; 0646 # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK NOON +1EEAE ; NFKC_CF; 0633 # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK SEEN +1EEAF ; NFKC_CF; 0639 # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK AIN +1EEB0 ; NFKC_CF; 0641 # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK FEH +1EEB1 ; NFKC_CF; 0635 # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK SAD +1EEB2 ; NFKC_CF; 0642 # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK QAF +1EEB3 ; NFKC_CF; 0631 # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK REH +1EEB4 ; NFKC_CF; 0634 # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK SHEEN +1EEB5 ; NFKC_CF; 062A # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK TEH +1EEB6 ; NFKC_CF; 062B # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK THEH +1EEB7 ; NFKC_CF; 062E # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK KHAH +1EEB8 ; NFKC_CF; 0630 # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK THAL +1EEB9 ; NFKC_CF; 0636 # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK DAD +1EEBA ; NFKC_CF; 0638 # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK ZAH +1EEBB ; NFKC_CF; 063A # Lo ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 1F100 ; NFKC_CF; 0030 002E # No DIGIT ZERO FULL STOP 1F101 ; NFKC_CF; 0030 002C # No DIGIT ZERO COMMA 1F102 ; NFKC_CF; 0031 002C # No DIGIT ONE COMMA @@ -7581,6 +7803,8 @@ FFF0..FFF8 ; NFKC_CF; # Cn [9] <reserved-FFF0>..<reserved-FF 1F14D ; NFKC_CF; 0073 0073 # So SQUARED SS 1F14E ; NFKC_CF; 0070 0070 0076 # So SQUARED PPV 1F14F ; NFKC_CF; 0077 0063 # So SQUARED WC +1F16A ; NFKC_CF; 006D 0063 # So RAISED MC SIGN +1F16B ; NFKC_CF; 006D 0064 # So RAISED MD SIGN 1F190 ; NFKC_CF; 0064 006A # So SQUARE DJ 1F200 ; NFKC_CF; 307B 304B # So SQUARE HIRAGANA HOKA 1F201 ; NFKC_CF; 30B3 30B3 # So SQUARED KATAKANA KOKO @@ -8179,7 +8403,7 @@ E0080..E00FF ; NFKC_CF; # Cn [128] <reserved-E0080>..<reserved-E E0100..E01EF ; NFKC_CF; # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 E01F0..E0FFF ; NFKC_CF; # Cn [3600] <reserved-E01F0>..<reserved-E0FFF> -# Total code points: 9792 +# Total code points: 9944 # ================================================ @@ -8190,7 +8414,7 @@ E01F0..E0FFF ; NFKC_CF; # Cn [3600] <reserved-E01F0>..<reserved- 0041..005A ; Changes_When_NFKC_Casefolded # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z 00A0 ; Changes_When_NFKC_Casefolded # Zs NO-BREAK SPACE 00A8 ; Changes_When_NFKC_Casefolded # Sk DIAERESIS -00AA ; Changes_When_NFKC_Casefolded # L& FEMININE ORDINAL INDICATOR +00AA ; Changes_When_NFKC_Casefolded # Lo FEMININE ORDINAL INDICATOR 00AD ; Changes_When_NFKC_Casefolded # Cf SOFT HYPHEN 00AF ; Changes_When_NFKC_Casefolded # Sk MACRON 00B2..00B3 ; Changes_When_NFKC_Casefolded # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE @@ -8198,7 +8422,7 @@ E01F0..E0FFF ; NFKC_CF; # Cn [3600] <reserved-E01F0>..<reserved- 00B5 ; Changes_When_NFKC_Casefolded # L& MICRO SIGN 00B8 ; Changes_When_NFKC_Casefolded # Sk CEDILLA 00B9 ; Changes_When_NFKC_Casefolded # No SUPERSCRIPT ONE -00BA ; Changes_When_NFKC_Casefolded # L& MASCULINE ORDINAL INDICATOR +00BA ; Changes_When_NFKC_Casefolded # Lo MASCULINE ORDINAL INDICATOR 00BC..00BE ; Changes_When_NFKC_Casefolded # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS 00C0..00D6 ; Changes_When_NFKC_Casefolded # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS 00D8..00DF ; Changes_When_NFKC_Casefolded # L& [8] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER SHARP S @@ -8503,15 +8727,16 @@ E01F0..E0FFF ; NFKC_CF; # Cn [3600] <reserved-E01F0>..<reserved- 0FAC ; Changes_When_NFKC_Casefolded # Mn TIBETAN SUBJOINED LETTER DZHA 0FB9 ; Changes_When_NFKC_Casefolded # Mn TIBETAN SUBJOINED LETTER KSSA 10A0..10C5 ; Changes_When_NFKC_Casefolded # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Changes_When_NFKC_Casefolded # L& GEORGIAN CAPITAL LETTER YN +10CD ; Changes_When_NFKC_Casefolded # L& GEORGIAN CAPITAL LETTER AEN 10FC ; Changes_When_NFKC_Casefolded # Lm MODIFIER LETTER GEORGIAN NAR 115F..1160 ; Changes_When_NFKC_Casefolded # Lo [2] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG FILLER -17B4..17B5 ; Changes_When_NFKC_Casefolded # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B4..17B5 ; Changes_When_NFKC_Casefolded # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 180B..180D ; Changes_When_NFKC_Casefolded # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE 1D2C..1D2E ; Changes_When_NFKC_Casefolded # Lm [3] MODIFIER LETTER CAPITAL A..MODIFIER LETTER CAPITAL B 1D30..1D3A ; Changes_When_NFKC_Casefolded # Lm [11] MODIFIER LETTER CAPITAL D..MODIFIER LETTER CAPITAL N 1D3C..1D4D ; Changes_When_NFKC_Casefolded # Lm [18] MODIFIER LETTER CAPITAL O..MODIFIER LETTER SMALL G -1D4F..1D61 ; Changes_When_NFKC_Casefolded # Lm [19] MODIFIER LETTER SMALL K..MODIFIER LETTER SMALL CHI -1D62..1D6A ; Changes_When_NFKC_Casefolded # L& [9] LATIN SUBSCRIPT SMALL LETTER I..GREEK SUBSCRIPT SMALL LETTER CHI +1D4F..1D6A ; Changes_When_NFKC_Casefolded # Lm [28] MODIFIER LETTER SMALL K..GREEK SUBSCRIPT SMALL LETTER CHI 1D78 ; Changes_When_NFKC_Casefolded # Lm MODIFIER LETTER CYRILLIC EN 1D9B..1DBF ; Changes_When_NFKC_Casefolded # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA 1E00 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER A WITH RING BELOW @@ -8749,8 +8974,7 @@ E01F0..E0FFF ; NFKC_CF; # Cn [3600] <reserved-E01F0>..<reserved- 2C6D..2C70 ; Changes_When_NFKC_Casefolded # L& [4] LATIN CAPITAL LETTER ALPHA..LATIN CAPITAL LETTER TURNED ALPHA 2C72 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER W WITH HOOK 2C75 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER HALF H -2C7C ; Changes_When_NFKC_Casefolded # L& LATIN SUBSCRIPT SMALL LETTER J -2C7D ; Changes_When_NFKC_Casefolded # Lm MODIFIER LETTER CAPITAL V +2C7C..2C7D ; Changes_When_NFKC_Casefolded # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2C80 ; Changes_When_NFKC_Casefolded # L& [3] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC CAPITAL LETTER ALFA 2C82 ; Changes_When_NFKC_Casefolded # L& COPTIC CAPITAL LETTER VIDA 2C84 ; Changes_When_NFKC_Casefolded # L& COPTIC CAPITAL LETTER GAMMA @@ -8803,6 +9027,7 @@ E01F0..E0FFF ; NFKC_CF; # Cn [3600] <reserved-E01F0>..<reserved- 2CE2 ; Changes_When_NFKC_Casefolded # L& COPTIC CAPITAL LETTER OLD NUBIAN WAU 2CEB ; Changes_When_NFKC_Casefolded # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI 2CED ; Changes_When_NFKC_Casefolded # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA +2CF2 ; Changes_When_NFKC_Casefolded # L& COPTIC CAPITAL LETTER BOHAIRIC KHEI 2D6F ; Changes_When_NFKC_Casefolded # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2E9F ; Changes_When_NFKC_Casefolded # So CJK RADICAL MOTHER 2EF3 ; Changes_When_NFKC_Casefolded # So CJK RADICAL C-SIMPLIFIED TURTLE @@ -8911,11 +9136,14 @@ A786 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER INS A78B ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER SALTILLO A78D ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER TURNED H A790 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER N WITH DESCENDER +A792 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER C WITH BAR A7A0 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER G WITH OBLIQUE STROKE A7A2 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER K WITH OBLIQUE STROKE A7A4 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER N WITH OBLIQUE STROKE A7A6 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER R WITH OBLIQUE STROKE A7A8 ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER S WITH OBLIQUE STROKE +A7AA ; Changes_When_NFKC_Casefolded # L& LATIN CAPITAL LETTER H WITH HOOK +A7F8..A7F9 ; Changes_When_NFKC_Casefolded # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE F900..FA0D ; Changes_When_NFKC_Casefolded # Lo [270] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA0D FA10 ; Changes_When_NFKC_Casefolded # Lo CJK COMPATIBILITY IDEOGRAPH-FA10 FA12 ; Changes_When_NFKC_Casefolded # Lo CJK COMPATIBILITY IDEOGRAPH-FA12 @@ -8923,8 +9151,7 @@ FA15..FA1E ; Changes_When_NFKC_Casefolded # Lo [10] CJK COMPATIBILITY IDEOGR FA20 ; Changes_When_NFKC_Casefolded # Lo CJK COMPATIBILITY IDEOGRAPH-FA20 FA22 ; Changes_When_NFKC_Casefolded # Lo CJK COMPATIBILITY IDEOGRAPH-FA22 FA25..FA26 ; Changes_When_NFKC_Casefolded # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26 -FA2A..FA2D ; Changes_When_NFKC_Casefolded # Lo [4] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; Changes_When_NFKC_Casefolded # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA2A..FA6D ; Changes_When_NFKC_Casefolded # Lo [68] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; Changes_When_NFKC_Casefolded # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB00..FB06 ; Changes_When_NFKC_Casefolded # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; Changes_When_NFKC_Casefolded # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -9085,9 +9312,43 @@ FFF0..FFF8 ; Changes_When_NFKC_Casefolded # Cn [9] <reserved-FFF0>..<reserv 1D7C3 ; Changes_When_NFKC_Casefolded # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL 1D7C4..1D7CB ; Changes_When_NFKC_Casefolded # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1D7CE..1D7FF ; Changes_When_NFKC_Casefolded # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1EE00..1EE03 ; Changes_When_NFKC_Casefolded # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; Changes_When_NFKC_Casefolded # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; Changes_When_NFKC_Casefolded # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; Changes_When_NFKC_Casefolded # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; Changes_When_NFKC_Casefolded # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; Changes_When_NFKC_Casefolded # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; Changes_When_NFKC_Casefolded # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; Changes_When_NFKC_Casefolded # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; Changes_When_NFKC_Casefolded # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; Changes_When_NFKC_Casefolded # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; Changes_When_NFKC_Casefolded # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; Changes_When_NFKC_Casefolded # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; Changes_When_NFKC_Casefolded # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; Changes_When_NFKC_Casefolded # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; Changes_When_NFKC_Casefolded # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; Changes_When_NFKC_Casefolded # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; Changes_When_NFKC_Casefolded # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; Changes_When_NFKC_Casefolded # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; Changes_When_NFKC_Casefolded # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; Changes_When_NFKC_Casefolded # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; Changes_When_NFKC_Casefolded # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; Changes_When_NFKC_Casefolded # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; Changes_When_NFKC_Casefolded # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; Changes_When_NFKC_Casefolded # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; Changes_When_NFKC_Casefolded # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; Changes_When_NFKC_Casefolded # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; Changes_When_NFKC_Casefolded # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; Changes_When_NFKC_Casefolded # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; Changes_When_NFKC_Casefolded # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; Changes_When_NFKC_Casefolded # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; Changes_When_NFKC_Casefolded # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; Changes_When_NFKC_Casefolded # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; Changes_When_NFKC_Casefolded # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 1F100..1F10A ; Changes_When_NFKC_Casefolded # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA 1F110..1F12E ; Changes_When_NFKC_Casefolded # So [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ 1F130..1F14F ; Changes_When_NFKC_Casefolded # So [32] SQUARED LATIN CAPITAL LETTER A..SQUARED WC +1F16A..1F16B ; Changes_When_NFKC_Casefolded # So [2] RAISED MC SIGN..RAISED MD SIGN 1F190 ; Changes_When_NFKC_Casefolded # So SQUARE DJ 1F200..1F202 ; Changes_When_NFKC_Casefolded # So [3] SQUARE HIRAGANA HOKA..SQUARED KATAKANA SA 1F210..1F23A ; Changes_When_NFKC_Casefolded # So [43] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-55B6 @@ -9102,6 +9363,6 @@ E0080..E00FF ; Changes_When_NFKC_Casefolded # Cn [128] <reserved-E0080>..<reser E0100..E01EF ; Changes_When_NFKC_Casefolded # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 E01F0..E0FFF ; Changes_When_NFKC_Casefolded # Cn [3600] <reserved-E01F0>..<reserved-E0FFF> -# Total code points: 9792 +# Total code points: 9944 # EOF diff --git a/lib/unicore/EastAsianWidth.txt b/lib/unicore/EastAsianWidth.txt index d271d90d56..ea38eef618 100644 --- a/lib/unicore/EastAsianWidth.txt +++ b/lib/unicore/EastAsianWidth.txt @@ -1,12 +1,12 @@ -# EastAsianWidth-6.0.0.txt -# Date: 2010-08-17, 12:17:00 PDT [KW] +# EastAsianWidth-6.1.0.txt +# Date: 2011-09-19, 18:46:00 GMT [KW] # # East Asian Width Properties # # This file is an informative contributory data file in the # Unicode Character Database. # -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # # The format is two fields separated by a semicolon. @@ -1432,6 +1432,7 @@ 0587;N # ARMENIAN SMALL LIGATURE ECH YIWN 0589;N # ARMENIAN FULL STOP 058A;N # ARMENIAN HYPHEN +058F;N # ARMENIAN DRAM SIGN 0591;N # HEBREW ACCENT ETNAHTA 0592;N # HEBREW ACCENT SEGOL 0593;N # HEBREW ACCENT SHALSHELET @@ -1523,6 +1524,7 @@ 0601;N # ARABIC SIGN SANAH 0602;N # ARABIC FOOTNOTE MARKER 0603;N # ARABIC SIGN SAFHA +0604;N # ARABIC SIGN SAMVAT 0606;N # ARABIC-INDIC CUBE ROOT 0607;N # ARABIC-INDIC FOURTH ROOT 0608;N # ARABIC RAY @@ -2095,6 +2097,45 @@ 085A;N # MANDAIC VOCALIZATION MARK 085B;N # MANDAIC GEMINATION MARK 085E;N # MANDAIC PUNCTUATION +08A0;N # ARABIC LETTER BEH WITH SMALL V BELOW +08A2;N # ARABIC LETTER JEEM WITH TWO DOTS ABOVE +08A3;N # ARABIC LETTER TAH WITH TWO DOTS ABOVE +08A4;N # ARABIC LETTER FEH WITH DOT BELOW AND THREE DOTS ABOVE +08A5;N # ARABIC LETTER QAF WITH DOT BELOW +08A6;N # ARABIC LETTER LAM WITH DOUBLE BAR +08A7;N # ARABIC LETTER MEEM WITH THREE DOTS ABOVE +08A8;N # ARABIC LETTER YEH WITH TWO DOTS BELOW AND HAMZA ABOVE +08A9;N # ARABIC LETTER YEH WITH TWO DOTS BELOW AND DOT ABOVE +08AA;N # ARABIC LETTER REH WITH LOOP +08AB;N # ARABIC LETTER WAW WITH DOT WITHIN +08AC;N # ARABIC LETTER ROHINGYA YEH +08E4;N # ARABIC CURLY FATHA +08E5;N # ARABIC CURLY DAMMA +08E6;N # ARABIC CURLY KASRA +08E7;N # ARABIC CURLY FATHATAN +08E8;N # ARABIC CURLY DAMMATAN +08E9;N # ARABIC CURLY KASRATAN +08EA;N # ARABIC TONE ONE DOT ABOVE +08EB;N # ARABIC TONE TWO DOTS ABOVE +08EC;N # ARABIC TONE LOOP ABOVE +08ED;N # ARABIC TONE ONE DOT BELOW +08EE;N # ARABIC TONE TWO DOTS BELOW +08EF;N # ARABIC TONE LOOP BELOW +08F0;N # ARABIC OPEN FATHATAN +08F1;N # ARABIC OPEN DAMMATAN +08F2;N # ARABIC OPEN KASRATAN +08F3;N # ARABIC SMALL HIGH WAW +08F4;N # ARABIC FATHA WITH RING +08F5;N # ARABIC FATHA WITH DOT ABOVE +08F6;N # ARABIC KASRA WITH DOT BELOW +08F7;N # ARABIC LEFT ARROWHEAD ABOVE +08F8;N # ARABIC RIGHT ARROWHEAD ABOVE +08F9;N # ARABIC LEFT ARROWHEAD BELOW +08FA;N # ARABIC RIGHT ARROWHEAD BELOW +08FB;N # ARABIC DOUBLE RIGHT ARROWHEAD ABOVE +08FC;N # ARABIC DOUBLE RIGHT ARROWHEAD ABOVE WITH DOT +08FD;N # ARABIC RIGHT ARROWHEAD ABOVE WITH DOT +08FE;N # ARABIC DAMMA WITH DOT 0900;N # DEVANAGARI SIGN INVERTED CANDRABINDU 0901;N # DEVANAGARI SIGN CANDRABINDU 0902;N # DEVANAGARI SIGN ANUSVARA @@ -2475,6 +2516,7 @@ 0AED;N # GUJARATI DIGIT SEVEN 0AEE;N # GUJARATI DIGIT EIGHT 0AEF;N # GUJARATI DIGIT NINE +0AF0;N # GUJARATI ABBREVIATION SIGN 0AF1;N # GUJARATI RUPEE SIGN 0B01;N # ORIYA SIGN CANDRABINDU 0B02;N # ORIYA SIGN ANUSVARA @@ -3147,6 +3189,8 @@ 0ED9;N # LAO DIGIT NINE 0EDC;N # LAO HO NO 0EDD;N # LAO HO MO +0EDE;N # LAO LETTER KHMU GO +0EDF;N # LAO LETTER KHMU NYO 0F00;N # TIBETAN SYLLABLE OM 0F01;N # TIBETAN MARK GTER YIG MGO TRUNCATED A 0F02;N # TIBETAN MARK GTER YIG MGO -UM RNAM BCAD MA @@ -3556,6 +3600,8 @@ 10C3;N # GEORGIAN CAPITAL LETTER WE 10C4;N # GEORGIAN CAPITAL LETTER HAR 10C5;N # GEORGIAN CAPITAL LETTER HOE +10C7;N # GEORGIAN CAPITAL LETTER YN +10CD;N # GEORGIAN CAPITAL LETTER AEN 10D0;N # GEORGIAN LETTER AN 10D1;N # GEORGIAN LETTER BAN 10D2;N # GEORGIAN LETTER GAN @@ -3601,6 +3647,9 @@ 10FA;N # GEORGIAN LETTER AIN 10FB;N # GEORGIAN PARAGRAPH SEPARATOR 10FC;N # MODIFIER LETTER GEORGIAN NAR +10FD;N # GEORGIAN LETTER AEN +10FE;N # GEORGIAN LETTER HARD SIGN +10FF;N # GEORGIAN LETTER LABIAL SIGN 1100;W # HANGUL CHOSEONG KIYEOK 1101;W # HANGUL CHOSEONG SSANGKIYEOK 1102;W # HANGUL CHOSEONG NIEUN @@ -6034,6 +6083,9 @@ 1BA8;N # SUNDANESE VOWEL SIGN PAMEPET 1BA9;N # SUNDANESE VOWEL SIGN PANEULEUNG 1BAA;N # SUNDANESE SIGN PAMAAEH +1BAB;N # SUNDANESE SIGN VIRAMA +1BAC;N # SUNDANESE CONSONANT SIGN PASANGAN MA +1BAD;N # SUNDANESE CONSONANT SIGN PASANGAN WA 1BAE;N # SUNDANESE LETTER KHA 1BAF;N # SUNDANESE LETTER SYA 1BB0;N # SUNDANESE DIGIT ZERO @@ -6046,6 +6098,12 @@ 1BB7;N # SUNDANESE DIGIT SEVEN 1BB8;N # SUNDANESE DIGIT EIGHT 1BB9;N # SUNDANESE DIGIT NINE +1BBA;N # SUNDANESE AVAGRAHA +1BBB;N # SUNDANESE LETTER REU +1BBC;N # SUNDANESE LETTER LEU +1BBD;N # SUNDANESE LETTER BHA +1BBE;N # SUNDANESE LETTER FINAL K +1BBF;N # SUNDANESE LETTER FINAL M 1BC0;N # BATAK LETTER A 1BC1;N # BATAK LETTER SIMALUNGUN A 1BC2;N # BATAK LETTER HA @@ -6224,6 +6282,14 @@ 1C7D;N # OL CHIKI AHAD 1C7E;N # OL CHIKI PUNCTUATION MUCAAD 1C7F;N # OL CHIKI PUNCTUATION DOUBLE MUCAAD +1CC0;N # SUNDANESE PUNCTUATION BINDU SURYA +1CC1;N # SUNDANESE PUNCTUATION BINDU PANGLONG +1CC2;N # SUNDANESE PUNCTUATION BINDU PURNAMA +1CC3;N # SUNDANESE PUNCTUATION BINDU CAKRA +1CC4;N # SUNDANESE PUNCTUATION BINDU LEU SATANGA +1CC5;N # SUNDANESE PUNCTUATION BINDU KA SATANGA +1CC6;N # SUNDANESE PUNCTUATION BINDU DA SATANGA +1CC7;N # SUNDANESE PUNCTUATION BINDU BA SATANGA 1CD0;N # VEDIC TONE KARSHANA 1CD1;N # VEDIC TONE SHARA 1CD2;N # VEDIC TONE PRENKHA @@ -6259,6 +6325,10 @@ 1CF0;N # VEDIC SIGN RTHANG LONG ANUSVARA 1CF1;N # VEDIC SIGN ANUSVARA UBHAYATO MUKHA 1CF2;N # VEDIC SIGN ARDHAVISARGA +1CF3;N # VEDIC SIGN ROTATED ARDHAVISARGA +1CF4;N # VEDIC TONE CANDRA ABOVE +1CF5;N # VEDIC SIGN JIHVAMULIYA +1CF6;N # VEDIC SIGN UPADHMANIYA 1D00;N # LATIN LETTER SMALL CAPITAL A 1D01;N # LATIN LETTER SMALL CAPITAL AE 1D02;N # LATIN SMALL LETTER TURNED AE @@ -8865,7 +8935,9 @@ 27C8;N # REVERSE SOLIDUS PRECEDING SUBSET 27C9;N # SUPERSET PRECEDING SOLIDUS 27CA;N # VERTICAL BAR WITH HORIZONTAL STROKE +27CB;N # MATHEMATICAL RISING DIAGONAL 27CC;N # LONG DIVISION +27CD;N # MATHEMATICAL FALLING DIAGONAL 27CE;N # SQUARED LOGICAL AND 27CF;N # SQUARED LOGICAL OR 27D0;N # WHITE DIAMOND WITH CENTRED DOT @@ -10011,6 +10083,8 @@ 2CEF;N # COPTIC COMBINING NI ABOVE 2CF0;N # COPTIC COMBINING SPIRITUS ASPER 2CF1;N # COPTIC COMBINING SPIRITUS LENIS +2CF2;N # COPTIC CAPITAL LETTER BOHAIRIC KHEI +2CF3;N # COPTIC SMALL LETTER BOHAIRIC KHEI 2CF9;N # COPTIC OLD NUBIAN FULL STOP 2CFA;N # COPTIC OLD NUBIAN DIRECT QUESTION MARK 2CFB;N # COPTIC OLD NUBIAN INDIRECT QUESTION MARK @@ -10056,6 +10130,8 @@ 2D23;N # GEORGIAN SMALL LETTER WE 2D24;N # GEORGIAN SMALL LETTER HAR 2D25;N # GEORGIAN SMALL LETTER HOE +2D27;N # GEORGIAN SMALL LETTER YN +2D2D;N # GEORGIAN SMALL LETTER AEN 2D30;N # TIFINAGH LETTER YA 2D31;N # TIFINAGH LETTER YAB 2D32;N # TIFINAGH LETTER YABH @@ -10110,6 +10186,8 @@ 2D63;N # TIFINAGH LETTER YAZ 2D64;N # TIFINAGH LETTER TAWELLEMET YAZ 2D65;N # TIFINAGH LETTER YAZZ +2D66;N # TIFINAGH LETTER YE +2D67;N # TIFINAGH LETTER YO 2D6F;N # TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D70;N # TIFINAGH SEPARATOR MARK 2D7F;N # TIFINAGH CONSONANT JOINER @@ -10274,6 +10352,16 @@ 2E2F;N # VERTICAL TILDE 2E30;N # RING POINT 2E31;N # WORD SEPARATOR MIDDLE DOT +2E32;N # TURNED COMMA +2E33;N # RAISED DOT +2E34;N # RAISED COMMA +2E35;N # TURNED SEMICOLON +2E36;N # DAGGER WITH LEFT GUARD +2E37;N # DAGGER WITH RIGHT GUARD +2E38;N # TURNED DAGGER +2E39;N # TOP HALF SECTION SIGN +2E3A;N # TWO-EM DASH +2E3B;N # THREE-EM DASH 2E80;W # CJK RADICAL REPEAT 2E81;W # CJK RADICAL CLIFF 2E82;W # CJK RADICAL SECOND ONE @@ -11674,8 +11762,8 @@ 4DFD;N # HEXAGRAM FOR SMALL PREPONDERANCE 4DFE;N # HEXAGRAM FOR AFTER COMPLETION 4DFF;N # HEXAGRAM FOR BEFORE COMPLETION -4E00..9FCB;W # <CJK Ideograph, First>..<CJK Ideograph, Last> -9FCC..9FFF;W # <reserved-9FCC>..<reserved-9FFF> +4E00..9FCC;W # <CJK Ideograph, First>..<CJK Ideograph, Last> +9FCD..9FFF;W # <reserved-9FCD>..<reserved-9FFF> A000;W # YI SYLLABLE IT A001;W # YI SYLLABLE IX A002;W # YI SYLLABLE I @@ -13296,6 +13384,14 @@ A670;N # COMBINING CYRILLIC TEN MILLIONS SIGN A671;N # COMBINING CYRILLIC HUNDRED MILLIONS SIGN A672;N # COMBINING CYRILLIC THOUSAND MILLIONS SIGN A673;N # SLAVONIC ASTERISK +A674;N # COMBINING CYRILLIC LETTER UKRAINIAN IE +A675;N # COMBINING CYRILLIC LETTER I +A676;N # COMBINING CYRILLIC LETTER YI +A677;N # COMBINING CYRILLIC LETTER U +A678;N # COMBINING CYRILLIC LETTER HARD SIGN +A679;N # COMBINING CYRILLIC LETTER YERU +A67A;N # COMBINING CYRILLIC LETTER SOFT SIGN +A67B;N # COMBINING CYRILLIC LETTER OMEGA A67C;N # COMBINING CYRILLIC KAVYKA A67D;N # COMBINING CYRILLIC PAYEROK A67E;N # CYRILLIC KAVYKA @@ -13324,6 +13420,7 @@ A694;N # CYRILLIC CAPITAL LETTER HWE A695;N # CYRILLIC SMALL LETTER HWE A696;N # CYRILLIC CAPITAL LETTER SHWE A697;N # CYRILLIC SMALL LETTER SHWE +A69F;N # COMBINING CYRILLIC LETTER IOTIFIED E A6A0;N # BAMUM LETTER A A6A1;N # BAMUM LETTER KA A6A2;N # BAMUM LETTER U @@ -13557,6 +13654,8 @@ A78D;N # LATIN CAPITAL LETTER TURNED H A78E;N # LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A790;N # LATIN CAPITAL LETTER N WITH DESCENDER A791;N # LATIN SMALL LETTER N WITH DESCENDER +A792;N # LATIN CAPITAL LETTER C WITH BAR +A793;N # LATIN SMALL LETTER C WITH BAR A7A0;N # LATIN CAPITAL LETTER G WITH OBLIQUE STROKE A7A1;N # LATIN SMALL LETTER G WITH OBLIQUE STROKE A7A2;N # LATIN CAPITAL LETTER K WITH OBLIQUE STROKE @@ -13567,6 +13666,9 @@ A7A6;N # LATIN CAPITAL LETTER R WITH OBLIQUE STROKE A7A7;N # LATIN SMALL LETTER R WITH OBLIQUE STROKE A7A8;N # LATIN CAPITAL LETTER S WITH OBLIQUE STROKE A7A9;N # LATIN SMALL LETTER S WITH OBLIQUE STROKE +A7AA;N # LATIN CAPITAL LETTER H WITH HOOK +A7F8;N # MODIFIER LETTER CAPITAL H WITH STROKE +A7F9;N # MODIFIER LETTER SMALL LIGATURE OE A7FA;N # LATIN LETTER SMALL CAPITAL TURNED M A7FB;N # LATIN EPIGRAPHIC LETTER REVERSED F A7FC;N # LATIN EPIGRAPHIC LETTER REVERSED P @@ -14180,6 +14282,29 @@ AADC;N # TAI VIET SYMBOL NUENG AADD;N # TAI VIET SYMBOL SAM AADE;N # TAI VIET SYMBOL HO HOI AADF;N # TAI VIET SYMBOL KOI KOI +AAE0;N # MEETEI MAYEK LETTER E +AAE1;N # MEETEI MAYEK LETTER O +AAE2;N # MEETEI MAYEK LETTER CHA +AAE3;N # MEETEI MAYEK LETTER NYA +AAE4;N # MEETEI MAYEK LETTER TTA +AAE5;N # MEETEI MAYEK LETTER TTHA +AAE6;N # MEETEI MAYEK LETTER DDA +AAE7;N # MEETEI MAYEK LETTER DDHA +AAE8;N # MEETEI MAYEK LETTER NNA +AAE9;N # MEETEI MAYEK LETTER SHA +AAEA;N # MEETEI MAYEK LETTER SSA +AAEB;N # MEETEI MAYEK VOWEL SIGN II +AAEC;N # MEETEI MAYEK VOWEL SIGN UU +AAED;N # MEETEI MAYEK VOWEL SIGN AAI +AAEE;N # MEETEI MAYEK VOWEL SIGN AU +AAEF;N # MEETEI MAYEK VOWEL SIGN AAU +AAF0;N # MEETEI MAYEK CHEIKHAN +AAF1;N # MEETEI MAYEK AHANG KHUDAM +AAF2;N # MEETEI MAYEK ANJI +AAF3;N # MEETEI MAYEK SYLLABLE REPETITION MARK +AAF4;N # MEETEI MAYEK WORD REPETITION MARK +AAF5;N # MEETEI MAYEK VOWEL SIGN VISARGA +AAF6;N # MEETEI MAYEK VIRAMA AB01;N # ETHIOPIC SYLLABLE TTHU AB02;N # ETHIOPIC SYLLABLE TTHI AB03;N # ETHIOPIC SYLLABLE TTHAA @@ -14647,7 +14772,8 @@ FA2A;W # CJK COMPATIBILITY IDEOGRAPH-FA2A FA2B;W # CJK COMPATIBILITY IDEOGRAPH-FA2B FA2C;W # CJK COMPATIBILITY IDEOGRAPH-FA2C FA2D;W # CJK COMPATIBILITY IDEOGRAPH-FA2D -FA2E..FA2F;W # <reserved-FA2E>..<reserved-FA2F> +FA2E;W # CJK COMPATIBILITY IDEOGRAPH-FA2E +FA2F;W # CJK COMPATIBILITY IDEOGRAPH-FA2F FA30;W # CJK COMPATIBILITY IDEOGRAPH-FA30 FA31;W # CJK COMPATIBILITY IDEOGRAPH-FA31 FA32;W # CJK COMPATIBILITY IDEOGRAPH-FA32 @@ -16881,6 +17007,64 @@ FFFD;A # REPLACEMENT CHARACTER 10938;N # LYDIAN LETTER NN 10939;N # LYDIAN LETTER C 1093F;N # LYDIAN TRIANGULAR MARK +10980;N # MEROITIC HIEROGLYPHIC LETTER A +10981;N # MEROITIC HIEROGLYPHIC LETTER E +10982;N # MEROITIC HIEROGLYPHIC LETTER I +10983;N # MEROITIC HIEROGLYPHIC LETTER O +10984;N # MEROITIC HIEROGLYPHIC LETTER YA +10985;N # MEROITIC HIEROGLYPHIC LETTER WA +10986;N # MEROITIC HIEROGLYPHIC LETTER BA +10987;N # MEROITIC HIEROGLYPHIC LETTER BA-2 +10988;N # MEROITIC HIEROGLYPHIC LETTER PA +10989;N # MEROITIC HIEROGLYPHIC LETTER MA +1098A;N # MEROITIC HIEROGLYPHIC LETTER NA +1098B;N # MEROITIC HIEROGLYPHIC LETTER NA-2 +1098C;N # MEROITIC HIEROGLYPHIC LETTER NE +1098D;N # MEROITIC HIEROGLYPHIC LETTER NE-2 +1098E;N # MEROITIC HIEROGLYPHIC LETTER RA +1098F;N # MEROITIC HIEROGLYPHIC LETTER RA-2 +10990;N # MEROITIC HIEROGLYPHIC LETTER LA +10991;N # MEROITIC HIEROGLYPHIC LETTER KHA +10992;N # MEROITIC HIEROGLYPHIC LETTER HHA +10993;N # MEROITIC HIEROGLYPHIC LETTER SA +10994;N # MEROITIC HIEROGLYPHIC LETTER SA-2 +10995;N # MEROITIC HIEROGLYPHIC LETTER SE +10996;N # MEROITIC HIEROGLYPHIC LETTER KA +10997;N # MEROITIC HIEROGLYPHIC LETTER QA +10998;N # MEROITIC HIEROGLYPHIC LETTER TA +10999;N # MEROITIC HIEROGLYPHIC LETTER TA-2 +1099A;N # MEROITIC HIEROGLYPHIC LETTER TE +1099B;N # MEROITIC HIEROGLYPHIC LETTER TE-2 +1099C;N # MEROITIC HIEROGLYPHIC LETTER TO +1099D;N # MEROITIC HIEROGLYPHIC LETTER DA +1099E;N # MEROITIC HIEROGLYPHIC SYMBOL VIDJ +1099F;N # MEROITIC HIEROGLYPHIC SYMBOL VIDJ-2 +109A0;N # MEROITIC CURSIVE LETTER A +109A1;N # MEROITIC CURSIVE LETTER E +109A2;N # MEROITIC CURSIVE LETTER I +109A3;N # MEROITIC CURSIVE LETTER O +109A4;N # MEROITIC CURSIVE LETTER YA +109A5;N # MEROITIC CURSIVE LETTER WA +109A6;N # MEROITIC CURSIVE LETTER BA +109A7;N # MEROITIC CURSIVE LETTER PA +109A8;N # MEROITIC CURSIVE LETTER MA +109A9;N # MEROITIC CURSIVE LETTER NA +109AA;N # MEROITIC CURSIVE LETTER NE +109AB;N # MEROITIC CURSIVE LETTER RA +109AC;N # MEROITIC CURSIVE LETTER LA +109AD;N # MEROITIC CURSIVE LETTER KHA +109AE;N # MEROITIC CURSIVE LETTER HHA +109AF;N # MEROITIC CURSIVE LETTER SA +109B0;N # MEROITIC CURSIVE LETTER ARCHAIC SA +109B1;N # MEROITIC CURSIVE LETTER SE +109B2;N # MEROITIC CURSIVE LETTER KA +109B3;N # MEROITIC CURSIVE LETTER QA +109B4;N # MEROITIC CURSIVE LETTER TA +109B5;N # MEROITIC CURSIVE LETTER TE +109B6;N # MEROITIC CURSIVE LETTER TO +109B7;N # MEROITIC CURSIVE LETTER DA +109BE;N # MEROITIC CURSIVE LOGOGRAM RMT +109BF;N # MEROITIC CURSIVE LOGOGRAM IMN 10A00;N # KHAROSHTHI LETTER A 10A01;N # KHAROSHTHI VOWEL SIGN I 10A02;N # KHAROSHTHI VOWEL SIGN U @@ -17374,6 +17558,257 @@ FFFD;A # REPLACEMENT CHARACTER 110BF;N # KAITHI DOUBLE SECTION MARK 110C0;N # KAITHI DANDA 110C1;N # KAITHI DOUBLE DANDA +110D0;N # SORA SOMPENG LETTER SAH +110D1;N # SORA SOMPENG LETTER TAH +110D2;N # SORA SOMPENG LETTER BAH +110D3;N # SORA SOMPENG LETTER CAH +110D4;N # SORA SOMPENG LETTER DAH +110D5;N # SORA SOMPENG LETTER GAH +110D6;N # SORA SOMPENG LETTER MAH +110D7;N # SORA SOMPENG LETTER NGAH +110D8;N # SORA SOMPENG LETTER LAH +110D9;N # SORA SOMPENG LETTER NAH +110DA;N # SORA SOMPENG LETTER VAH +110DB;N # SORA SOMPENG LETTER PAH +110DC;N # SORA SOMPENG LETTER YAH +110DD;N # SORA SOMPENG LETTER RAH +110DE;N # SORA SOMPENG LETTER HAH +110DF;N # SORA SOMPENG LETTER KAH +110E0;N # SORA SOMPENG LETTER JAH +110E1;N # SORA SOMPENG LETTER NYAH +110E2;N # SORA SOMPENG LETTER AH +110E3;N # SORA SOMPENG LETTER EEH +110E4;N # SORA SOMPENG LETTER IH +110E5;N # SORA SOMPENG LETTER UH +110E6;N # SORA SOMPENG LETTER OH +110E7;N # SORA SOMPENG LETTER EH +110E8;N # SORA SOMPENG LETTER MAE +110F0;N # SORA SOMPENG DIGIT ZERO +110F1;N # SORA SOMPENG DIGIT ONE +110F2;N # SORA SOMPENG DIGIT TWO +110F3;N # SORA SOMPENG DIGIT THREE +110F4;N # SORA SOMPENG DIGIT FOUR +110F5;N # SORA SOMPENG DIGIT FIVE +110F6;N # SORA SOMPENG DIGIT SIX +110F7;N # SORA SOMPENG DIGIT SEVEN +110F8;N # SORA SOMPENG DIGIT EIGHT +110F9;N # SORA SOMPENG DIGIT NINE +11100;N # CHAKMA SIGN CANDRABINDU +11101;N # CHAKMA SIGN ANUSVARA +11102;N # CHAKMA SIGN VISARGA +11103;N # CHAKMA LETTER AA +11104;N # CHAKMA LETTER I +11105;N # CHAKMA LETTER U +11106;N # CHAKMA LETTER E +11107;N # CHAKMA LETTER KAA +11108;N # CHAKMA LETTER KHAA +11109;N # CHAKMA LETTER GAA +1110A;N # CHAKMA LETTER GHAA +1110B;N # CHAKMA LETTER NGAA +1110C;N # CHAKMA LETTER CAA +1110D;N # CHAKMA LETTER CHAA +1110E;N # CHAKMA LETTER JAA +1110F;N # CHAKMA LETTER JHAA +11110;N # CHAKMA LETTER NYAA +11111;N # CHAKMA LETTER TTAA +11112;N # CHAKMA LETTER TTHAA +11113;N # CHAKMA LETTER DDAA +11114;N # CHAKMA LETTER DDHAA +11115;N # CHAKMA LETTER NNAA +11116;N # CHAKMA LETTER TAA +11117;N # CHAKMA LETTER THAA +11118;N # CHAKMA LETTER DAA +11119;N # CHAKMA LETTER DHAA +1111A;N # CHAKMA LETTER NAA +1111B;N # CHAKMA LETTER PAA +1111C;N # CHAKMA LETTER PHAA +1111D;N # CHAKMA LETTER BAA +1111E;N # CHAKMA LETTER BHAA +1111F;N # CHAKMA LETTER MAA +11120;N # CHAKMA LETTER YYAA +11121;N # CHAKMA LETTER YAA +11122;N # CHAKMA LETTER RAA +11123;N # CHAKMA LETTER LAA +11124;N # CHAKMA LETTER WAA +11125;N # CHAKMA LETTER SAA +11126;N # CHAKMA LETTER HAA +11127;N # CHAKMA VOWEL SIGN A +11128;N # CHAKMA VOWEL SIGN I +11129;N # CHAKMA VOWEL SIGN II +1112A;N # CHAKMA VOWEL SIGN U +1112B;N # CHAKMA VOWEL SIGN UU +1112C;N # CHAKMA VOWEL SIGN E +1112D;N # CHAKMA VOWEL SIGN AI +1112E;N # CHAKMA VOWEL SIGN O +1112F;N # CHAKMA VOWEL SIGN AU +11130;N # CHAKMA VOWEL SIGN OI +11131;N # CHAKMA O MARK +11132;N # CHAKMA AU MARK +11133;N # CHAKMA VIRAMA +11134;N # CHAKMA MAAYYAA +11136;N # CHAKMA DIGIT ZERO +11137;N # CHAKMA DIGIT ONE +11138;N # CHAKMA DIGIT TWO +11139;N # CHAKMA DIGIT THREE +1113A;N # CHAKMA DIGIT FOUR +1113B;N # CHAKMA DIGIT FIVE +1113C;N # CHAKMA DIGIT SIX +1113D;N # CHAKMA DIGIT SEVEN +1113E;N # CHAKMA DIGIT EIGHT +1113F;N # CHAKMA DIGIT NINE +11140;N # CHAKMA SECTION MARK +11141;N # CHAKMA DANDA +11142;N # CHAKMA DOUBLE DANDA +11143;N # CHAKMA QUESTION MARK +11180;N # SHARADA SIGN CANDRABINDU +11181;N # SHARADA SIGN ANUSVARA +11182;N # SHARADA SIGN VISARGA +11183;N # SHARADA LETTER A +11184;N # SHARADA LETTER AA +11185;N # SHARADA LETTER I +11186;N # SHARADA LETTER II +11187;N # SHARADA LETTER U +11188;N # SHARADA LETTER UU +11189;N # SHARADA LETTER VOCALIC R +1118A;N # SHARADA LETTER VOCALIC RR +1118B;N # SHARADA LETTER VOCALIC L +1118C;N # SHARADA LETTER VOCALIC LL +1118D;N # SHARADA LETTER E +1118E;N # SHARADA LETTER AI +1118F;N # SHARADA LETTER O +11190;N # SHARADA LETTER AU +11191;N # SHARADA LETTER KA +11192;N # SHARADA LETTER KHA +11193;N # SHARADA LETTER GA +11194;N # SHARADA LETTER GHA +11195;N # SHARADA LETTER NGA +11196;N # SHARADA LETTER CA +11197;N # SHARADA LETTER CHA +11198;N # SHARADA LETTER JA +11199;N # SHARADA LETTER JHA +1119A;N # SHARADA LETTER NYA +1119B;N # SHARADA LETTER TTA +1119C;N # SHARADA LETTER TTHA +1119D;N # SHARADA LETTER DDA +1119E;N # SHARADA LETTER DDHA +1119F;N # SHARADA LETTER NNA +111A0;N # SHARADA LETTER TA +111A1;N # SHARADA LETTER THA +111A2;N # SHARADA LETTER DA +111A3;N # SHARADA LETTER DHA +111A4;N # SHARADA LETTER NA +111A5;N # SHARADA LETTER PA +111A6;N # SHARADA LETTER PHA +111A7;N # SHARADA LETTER BA +111A8;N # SHARADA LETTER BHA +111A9;N # SHARADA LETTER MA +111AA;N # SHARADA LETTER YA +111AB;N # SHARADA LETTER RA +111AC;N # SHARADA LETTER LA +111AD;N # SHARADA LETTER LLA +111AE;N # SHARADA LETTER VA +111AF;N # SHARADA LETTER SHA +111B0;N # SHARADA LETTER SSA +111B1;N # SHARADA LETTER SA +111B2;N # SHARADA LETTER HA +111B3;N # SHARADA VOWEL SIGN AA +111B4;N # SHARADA VOWEL SIGN I +111B5;N # SHARADA VOWEL SIGN II +111B6;N # SHARADA VOWEL SIGN U +111B7;N # SHARADA VOWEL SIGN UU +111B8;N # SHARADA VOWEL SIGN VOCALIC R +111B9;N # SHARADA VOWEL SIGN VOCALIC RR +111BA;N # SHARADA VOWEL SIGN VOCALIC L +111BB;N # SHARADA VOWEL SIGN VOCALIC LL +111BC;N # SHARADA VOWEL SIGN E +111BD;N # SHARADA VOWEL SIGN AI +111BE;N # SHARADA VOWEL SIGN O +111BF;N # SHARADA VOWEL SIGN AU +111C0;N # SHARADA SIGN VIRAMA +111C1;N # SHARADA SIGN AVAGRAHA +111C2;N # SHARADA SIGN JIHVAMULIYA +111C3;N # SHARADA SIGN UPADHMANIYA +111C4;N # SHARADA OM +111C5;N # SHARADA DANDA +111C6;N # SHARADA DOUBLE DANDA +111C7;N # SHARADA ABBREVIATION SIGN +111C8;N # SHARADA SEPARATOR +111D0;N # SHARADA DIGIT ZERO +111D1;N # SHARADA DIGIT ONE +111D2;N # SHARADA DIGIT TWO +111D3;N # SHARADA DIGIT THREE +111D4;N # SHARADA DIGIT FOUR +111D5;N # SHARADA DIGIT FIVE +111D6;N # SHARADA DIGIT SIX +111D7;N # SHARADA DIGIT SEVEN +111D8;N # SHARADA DIGIT EIGHT +111D9;N # SHARADA DIGIT NINE +11680;N # TAKRI LETTER A +11681;N # TAKRI LETTER AA +11682;N # TAKRI LETTER I +11683;N # TAKRI LETTER II +11684;N # TAKRI LETTER U +11685;N # TAKRI LETTER UU +11686;N # TAKRI LETTER E +11687;N # TAKRI LETTER AI +11688;N # TAKRI LETTER O +11689;N # TAKRI LETTER AU +1168A;N # TAKRI LETTER KA +1168B;N # TAKRI LETTER KHA +1168C;N # TAKRI LETTER GA +1168D;N # TAKRI LETTER GHA +1168E;N # TAKRI LETTER NGA +1168F;N # TAKRI LETTER CA +11690;N # TAKRI LETTER CHA +11691;N # TAKRI LETTER JA +11692;N # TAKRI LETTER JHA +11693;N # TAKRI LETTER NYA +11694;N # TAKRI LETTER TTA +11695;N # TAKRI LETTER TTHA +11696;N # TAKRI LETTER DDA +11697;N # TAKRI LETTER DDHA +11698;N # TAKRI LETTER NNA +11699;N # TAKRI LETTER TA +1169A;N # TAKRI LETTER THA +1169B;N # TAKRI LETTER DA +1169C;N # TAKRI LETTER DHA +1169D;N # TAKRI LETTER NA +1169E;N # TAKRI LETTER PA +1169F;N # TAKRI LETTER PHA +116A0;N # TAKRI LETTER BA +116A1;N # TAKRI LETTER BHA +116A2;N # TAKRI LETTER MA +116A3;N # TAKRI LETTER YA +116A4;N # TAKRI LETTER RA +116A5;N # TAKRI LETTER LA +116A6;N # TAKRI LETTER VA +116A7;N # TAKRI LETTER SHA +116A8;N # TAKRI LETTER SA +116A9;N # TAKRI LETTER HA +116AA;N # TAKRI LETTER RRA +116AB;N # TAKRI SIGN ANUSVARA +116AC;N # TAKRI SIGN VISARGA +116AD;N # TAKRI VOWEL SIGN AA +116AE;N # TAKRI VOWEL SIGN I +116AF;N # TAKRI VOWEL SIGN II +116B0;N # TAKRI VOWEL SIGN U +116B1;N # TAKRI VOWEL SIGN UU +116B2;N # TAKRI VOWEL SIGN E +116B3;N # TAKRI VOWEL SIGN AI +116B4;N # TAKRI VOWEL SIGN O +116B5;N # TAKRI VOWEL SIGN AU +116B6;N # TAKRI SIGN VIRAMA +116B7;N # TAKRI SIGN NUKTA +116C0;N # TAKRI DIGIT ZERO +116C1;N # TAKRI DIGIT ONE +116C2;N # TAKRI DIGIT TWO +116C3;N # TAKRI DIGIT THREE +116C4;N # TAKRI DIGIT FOUR +116C5;N # TAKRI DIGIT FIVE +116C6;N # TAKRI DIGIT SIX +116C7;N # TAKRI DIGIT SEVEN +116C8;N # TAKRI DIGIT EIGHT +116C9;N # TAKRI DIGIT NINE 12000;N # CUNEIFORM SIGN A 12001;N # CUNEIFORM SIGN A TIMES A 12002;N # CUNEIFORM SIGN A TIMES BAD @@ -19996,6 +20431,139 @@ FFFD;A # REPLACEMENT CHARACTER 16A36;N # BAMUM LETTER PHASE-F KPA 16A37;N # BAMUM LETTER PHASE-F SAMBA 16A38;N # BAMUM LETTER PHASE-F VUEQ +16F00;N # MIAO LETTER PA +16F01;N # MIAO LETTER BA +16F02;N # MIAO LETTER YI PA +16F03;N # MIAO LETTER PLA +16F04;N # MIAO LETTER MA +16F05;N # MIAO LETTER MHA +16F06;N # MIAO LETTER ARCHAIC MA +16F07;N # MIAO LETTER FA +16F08;N # MIAO LETTER VA +16F09;N # MIAO LETTER VFA +16F0A;N # MIAO LETTER TA +16F0B;N # MIAO LETTER DA +16F0C;N # MIAO LETTER YI TTA +16F0D;N # MIAO LETTER YI TA +16F0E;N # MIAO LETTER TTA +16F0F;N # MIAO LETTER DDA +16F10;N # MIAO LETTER NA +16F11;N # MIAO LETTER NHA +16F12;N # MIAO LETTER YI NNA +16F13;N # MIAO LETTER ARCHAIC NA +16F14;N # MIAO LETTER NNA +16F15;N # MIAO LETTER NNHA +16F16;N # MIAO LETTER LA +16F17;N # MIAO LETTER LYA +16F18;N # MIAO LETTER LHA +16F19;N # MIAO LETTER LHYA +16F1A;N # MIAO LETTER TLHA +16F1B;N # MIAO LETTER DLHA +16F1C;N # MIAO LETTER TLHYA +16F1D;N # MIAO LETTER DLHYA +16F1E;N # MIAO LETTER KA +16F1F;N # MIAO LETTER GA +16F20;N # MIAO LETTER YI KA +16F21;N # MIAO LETTER QA +16F22;N # MIAO LETTER QGA +16F23;N # MIAO LETTER NGA +16F24;N # MIAO LETTER NGHA +16F25;N # MIAO LETTER ARCHAIC NGA +16F26;N # MIAO LETTER HA +16F27;N # MIAO LETTER XA +16F28;N # MIAO LETTER GHA +16F29;N # MIAO LETTER GHHA +16F2A;N # MIAO LETTER TSSA +16F2B;N # MIAO LETTER DZZA +16F2C;N # MIAO LETTER NYA +16F2D;N # MIAO LETTER NYHA +16F2E;N # MIAO LETTER TSHA +16F2F;N # MIAO LETTER DZHA +16F30;N # MIAO LETTER YI TSHA +16F31;N # MIAO LETTER YI DZHA +16F32;N # MIAO LETTER REFORMED TSHA +16F33;N # MIAO LETTER SHA +16F34;N # MIAO LETTER SSA +16F35;N # MIAO LETTER ZHA +16F36;N # MIAO LETTER ZSHA +16F37;N # MIAO LETTER TSA +16F38;N # MIAO LETTER DZA +16F39;N # MIAO LETTER YI TSA +16F3A;N # MIAO LETTER SA +16F3B;N # MIAO LETTER ZA +16F3C;N # MIAO LETTER ZSA +16F3D;N # MIAO LETTER ZZA +16F3E;N # MIAO LETTER ZZSA +16F3F;N # MIAO LETTER ARCHAIC ZZA +16F40;N # MIAO LETTER ZZYA +16F41;N # MIAO LETTER ZZSYA +16F42;N # MIAO LETTER WA +16F43;N # MIAO LETTER AH +16F44;N # MIAO LETTER HHA +16F50;N # MIAO LETTER NASALIZATION +16F51;N # MIAO SIGN ASPIRATION +16F52;N # MIAO SIGN REFORMED VOICING +16F53;N # MIAO SIGN REFORMED ASPIRATION +16F54;N # MIAO VOWEL SIGN A +16F55;N # MIAO VOWEL SIGN AA +16F56;N # MIAO VOWEL SIGN AHH +16F57;N # MIAO VOWEL SIGN AN +16F58;N # MIAO VOWEL SIGN ANG +16F59;N # MIAO VOWEL SIGN O +16F5A;N # MIAO VOWEL SIGN OO +16F5B;N # MIAO VOWEL SIGN WO +16F5C;N # MIAO VOWEL SIGN W +16F5D;N # MIAO VOWEL SIGN E +16F5E;N # MIAO VOWEL SIGN EN +16F5F;N # MIAO VOWEL SIGN ENG +16F60;N # MIAO VOWEL SIGN OEY +16F61;N # MIAO VOWEL SIGN I +16F62;N # MIAO VOWEL SIGN IA +16F63;N # MIAO VOWEL SIGN IAN +16F64;N # MIAO VOWEL SIGN IANG +16F65;N # MIAO VOWEL SIGN IO +16F66;N # MIAO VOWEL SIGN IE +16F67;N # MIAO VOWEL SIGN II +16F68;N # MIAO VOWEL SIGN IU +16F69;N # MIAO VOWEL SIGN ING +16F6A;N # MIAO VOWEL SIGN U +16F6B;N # MIAO VOWEL SIGN UA +16F6C;N # MIAO VOWEL SIGN UAN +16F6D;N # MIAO VOWEL SIGN UANG +16F6E;N # MIAO VOWEL SIGN UU +16F6F;N # MIAO VOWEL SIGN UEI +16F70;N # MIAO VOWEL SIGN UNG +16F71;N # MIAO VOWEL SIGN Y +16F72;N # MIAO VOWEL SIGN YI +16F73;N # MIAO VOWEL SIGN AE +16F74;N # MIAO VOWEL SIGN AEE +16F75;N # MIAO VOWEL SIGN ERR +16F76;N # MIAO VOWEL SIGN ROUNDED ERR +16F77;N # MIAO VOWEL SIGN ER +16F78;N # MIAO VOWEL SIGN ROUNDED ER +16F79;N # MIAO VOWEL SIGN AI +16F7A;N # MIAO VOWEL SIGN EI +16F7B;N # MIAO VOWEL SIGN AU +16F7C;N # MIAO VOWEL SIGN OU +16F7D;N # MIAO VOWEL SIGN N +16F7E;N # MIAO VOWEL SIGN NG +16F8F;N # MIAO TONE RIGHT +16F90;N # MIAO TONE TOP RIGHT +16F91;N # MIAO TONE ABOVE +16F92;N # MIAO TONE BELOW +16F93;N # MIAO LETTER TONE-2 +16F94;N # MIAO LETTER TONE-3 +16F95;N # MIAO LETTER TONE-4 +16F96;N # MIAO LETTER TONE-5 +16F97;N # MIAO LETTER TONE-6 +16F98;N # MIAO LETTER TONE-7 +16F99;N # MIAO LETTER TONE-8 +16F9A;N # MIAO LETTER REFORMED TONE-1 +16F9B;N # MIAO LETTER REFORMED TONE-2 +16F9C;N # MIAO LETTER REFORMED TONE-4 +16F9D;N # MIAO LETTER REFORMED TONE-5 +16F9E;N # MIAO LETTER REFORMED TONE-6 +16F9F;N # MIAO LETTER REFORMED TONE-8 1B000;W # KATAKANA LETTER ARCHAIC E 1B001;W # HIRAGANA LETTER ARCHAIC YE 1D000;N # BYZANTINE MUSICAL SYMBOL PSILI @@ -21635,6 +22203,149 @@ FFFD;A # REPLACEMENT CHARACTER 1D7FD;N # MATHEMATICAL MONOSPACE DIGIT SEVEN 1D7FE;N # MATHEMATICAL MONOSPACE DIGIT EIGHT 1D7FF;N # MATHEMATICAL MONOSPACE DIGIT NINE +1EE00;N # ARABIC MATHEMATICAL ALEF +1EE01;N # ARABIC MATHEMATICAL BEH +1EE02;N # ARABIC MATHEMATICAL JEEM +1EE03;N # ARABIC MATHEMATICAL DAL +1EE05;N # ARABIC MATHEMATICAL WAW +1EE06;N # ARABIC MATHEMATICAL ZAIN +1EE07;N # ARABIC MATHEMATICAL HAH +1EE08;N # ARABIC MATHEMATICAL TAH +1EE09;N # ARABIC MATHEMATICAL YEH +1EE0A;N # ARABIC MATHEMATICAL KAF +1EE0B;N # ARABIC MATHEMATICAL LAM +1EE0C;N # ARABIC MATHEMATICAL MEEM +1EE0D;N # ARABIC MATHEMATICAL NOON +1EE0E;N # ARABIC MATHEMATICAL SEEN +1EE0F;N # ARABIC MATHEMATICAL AIN +1EE10;N # ARABIC MATHEMATICAL FEH +1EE11;N # ARABIC MATHEMATICAL SAD +1EE12;N # ARABIC MATHEMATICAL QAF +1EE13;N # ARABIC MATHEMATICAL REH +1EE14;N # ARABIC MATHEMATICAL SHEEN +1EE15;N # ARABIC MATHEMATICAL TEH +1EE16;N # ARABIC MATHEMATICAL THEH +1EE17;N # ARABIC MATHEMATICAL KHAH +1EE18;N # ARABIC MATHEMATICAL THAL +1EE19;N # ARABIC MATHEMATICAL DAD +1EE1A;N # ARABIC MATHEMATICAL ZAH +1EE1B;N # ARABIC MATHEMATICAL GHAIN +1EE1C;N # ARABIC MATHEMATICAL DOTLESS BEH +1EE1D;N # ARABIC MATHEMATICAL DOTLESS NOON +1EE1E;N # ARABIC MATHEMATICAL DOTLESS FEH +1EE1F;N # ARABIC MATHEMATICAL DOTLESS QAF +1EE21;N # ARABIC MATHEMATICAL INITIAL BEH +1EE22;N # ARABIC MATHEMATICAL INITIAL JEEM +1EE24;N # ARABIC MATHEMATICAL INITIAL HEH +1EE27;N # ARABIC MATHEMATICAL INITIAL HAH +1EE29;N # ARABIC MATHEMATICAL INITIAL YEH +1EE2A;N # ARABIC MATHEMATICAL INITIAL KAF +1EE2B;N # ARABIC MATHEMATICAL INITIAL LAM +1EE2C;N # ARABIC MATHEMATICAL INITIAL MEEM +1EE2D;N # ARABIC MATHEMATICAL INITIAL NOON +1EE2E;N # ARABIC MATHEMATICAL INITIAL SEEN +1EE2F;N # ARABIC MATHEMATICAL INITIAL AIN +1EE30;N # ARABIC MATHEMATICAL INITIAL FEH +1EE31;N # ARABIC MATHEMATICAL INITIAL SAD +1EE32;N # ARABIC MATHEMATICAL INITIAL QAF +1EE34;N # ARABIC MATHEMATICAL INITIAL SHEEN +1EE35;N # ARABIC MATHEMATICAL INITIAL TEH +1EE36;N # ARABIC MATHEMATICAL INITIAL THEH +1EE37;N # ARABIC MATHEMATICAL INITIAL KHAH +1EE39;N # ARABIC MATHEMATICAL INITIAL DAD +1EE3B;N # ARABIC MATHEMATICAL INITIAL GHAIN +1EE42;N # ARABIC MATHEMATICAL TAILED JEEM +1EE47;N # ARABIC MATHEMATICAL TAILED HAH +1EE49;N # ARABIC MATHEMATICAL TAILED YEH +1EE4B;N # ARABIC MATHEMATICAL TAILED LAM +1EE4D;N # ARABIC MATHEMATICAL TAILED NOON +1EE4E;N # ARABIC MATHEMATICAL TAILED SEEN +1EE4F;N # ARABIC MATHEMATICAL TAILED AIN +1EE51;N # ARABIC MATHEMATICAL TAILED SAD +1EE52;N # ARABIC MATHEMATICAL TAILED QAF +1EE54;N # ARABIC MATHEMATICAL TAILED SHEEN +1EE57;N # ARABIC MATHEMATICAL TAILED KHAH +1EE59;N # ARABIC MATHEMATICAL TAILED DAD +1EE5B;N # ARABIC MATHEMATICAL TAILED GHAIN +1EE5D;N # ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F;N # ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61;N # ARABIC MATHEMATICAL STRETCHED BEH +1EE62;N # ARABIC MATHEMATICAL STRETCHED JEEM +1EE64;N # ARABIC MATHEMATICAL STRETCHED HEH +1EE67;N # ARABIC MATHEMATICAL STRETCHED HAH +1EE68;N # ARABIC MATHEMATICAL STRETCHED TAH +1EE69;N # ARABIC MATHEMATICAL STRETCHED YEH +1EE6A;N # ARABIC MATHEMATICAL STRETCHED KAF +1EE6C;N # ARABIC MATHEMATICAL STRETCHED MEEM +1EE6D;N # ARABIC MATHEMATICAL STRETCHED NOON +1EE6E;N # ARABIC MATHEMATICAL STRETCHED SEEN +1EE6F;N # ARABIC MATHEMATICAL STRETCHED AIN +1EE70;N # ARABIC MATHEMATICAL STRETCHED FEH +1EE71;N # ARABIC MATHEMATICAL STRETCHED SAD +1EE72;N # ARABIC MATHEMATICAL STRETCHED QAF +1EE74;N # ARABIC MATHEMATICAL STRETCHED SHEEN +1EE75;N # ARABIC MATHEMATICAL STRETCHED TEH +1EE76;N # ARABIC MATHEMATICAL STRETCHED THEH +1EE77;N # ARABIC MATHEMATICAL STRETCHED KHAH +1EE79;N # ARABIC MATHEMATICAL STRETCHED DAD +1EE7A;N # ARABIC MATHEMATICAL STRETCHED ZAH +1EE7B;N # ARABIC MATHEMATICAL STRETCHED GHAIN +1EE7C;N # ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E;N # ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80;N # ARABIC MATHEMATICAL LOOPED ALEF +1EE81;N # ARABIC MATHEMATICAL LOOPED BEH +1EE82;N # ARABIC MATHEMATICAL LOOPED JEEM +1EE83;N # ARABIC MATHEMATICAL LOOPED DAL +1EE84;N # ARABIC MATHEMATICAL LOOPED HEH +1EE85;N # ARABIC MATHEMATICAL LOOPED WAW +1EE86;N # ARABIC MATHEMATICAL LOOPED ZAIN +1EE87;N # ARABIC MATHEMATICAL LOOPED HAH +1EE88;N # ARABIC MATHEMATICAL LOOPED TAH +1EE89;N # ARABIC MATHEMATICAL LOOPED YEH +1EE8B;N # ARABIC MATHEMATICAL LOOPED LAM +1EE8C;N # ARABIC MATHEMATICAL LOOPED MEEM +1EE8D;N # ARABIC MATHEMATICAL LOOPED NOON +1EE8E;N # ARABIC MATHEMATICAL LOOPED SEEN +1EE8F;N # ARABIC MATHEMATICAL LOOPED AIN +1EE90;N # ARABIC MATHEMATICAL LOOPED FEH +1EE91;N # ARABIC MATHEMATICAL LOOPED SAD +1EE92;N # ARABIC MATHEMATICAL LOOPED QAF +1EE93;N # ARABIC MATHEMATICAL LOOPED REH +1EE94;N # ARABIC MATHEMATICAL LOOPED SHEEN +1EE95;N # ARABIC MATHEMATICAL LOOPED TEH +1EE96;N # ARABIC MATHEMATICAL LOOPED THEH +1EE97;N # ARABIC MATHEMATICAL LOOPED KHAH +1EE98;N # ARABIC MATHEMATICAL LOOPED THAL +1EE99;N # ARABIC MATHEMATICAL LOOPED DAD +1EE9A;N # ARABIC MATHEMATICAL LOOPED ZAH +1EE9B;N # ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1;N # ARABIC MATHEMATICAL DOUBLE-STRUCK BEH +1EEA2;N # ARABIC MATHEMATICAL DOUBLE-STRUCK JEEM +1EEA3;N # ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5;N # ARABIC MATHEMATICAL DOUBLE-STRUCK WAW +1EEA6;N # ARABIC MATHEMATICAL DOUBLE-STRUCK ZAIN +1EEA7;N # ARABIC MATHEMATICAL DOUBLE-STRUCK HAH +1EEA8;N # ARABIC MATHEMATICAL DOUBLE-STRUCK TAH +1EEA9;N # ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB;N # ARABIC MATHEMATICAL DOUBLE-STRUCK LAM +1EEAC;N # ARABIC MATHEMATICAL DOUBLE-STRUCK MEEM +1EEAD;N # ARABIC MATHEMATICAL DOUBLE-STRUCK NOON +1EEAE;N # ARABIC MATHEMATICAL DOUBLE-STRUCK SEEN +1EEAF;N # ARABIC MATHEMATICAL DOUBLE-STRUCK AIN +1EEB0;N # ARABIC MATHEMATICAL DOUBLE-STRUCK FEH +1EEB1;N # ARABIC MATHEMATICAL DOUBLE-STRUCK SAD +1EEB2;N # ARABIC MATHEMATICAL DOUBLE-STRUCK QAF +1EEB3;N # ARABIC MATHEMATICAL DOUBLE-STRUCK REH +1EEB4;N # ARABIC MATHEMATICAL DOUBLE-STRUCK SHEEN +1EEB5;N # ARABIC MATHEMATICAL DOUBLE-STRUCK TEH +1EEB6;N # ARABIC MATHEMATICAL DOUBLE-STRUCK THEH +1EEB7;N # ARABIC MATHEMATICAL DOUBLE-STRUCK KHAH +1EEB8;N # ARABIC MATHEMATICAL DOUBLE-STRUCK THAL +1EEB9;N # ARABIC MATHEMATICAL DOUBLE-STRUCK DAD +1EEBA;N # ARABIC MATHEMATICAL DOUBLE-STRUCK ZAH +1EEBB;N # ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1EEF0;N # ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL +1EEF1;N # ARABIC MATHEMATICAL OPERATOR HAH WITH DAL 1F000;N # MAHJONG TILE EAST WIND 1F001;N # MAHJONG TILE SOUTH WIND 1F002;N # MAHJONG TILE WEST WIND @@ -21938,6 +22649,8 @@ FFFD;A # REPLACEMENT CHARACTER 1F167;A # NEGATIVE CIRCLED LATIN CAPITAL LETTER X 1F168;A # NEGATIVE CIRCLED LATIN CAPITAL LETTER Y 1F169;A # NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F16A;N # RAISED MC SIGN +1F16B;N # RAISED MD SIGN 1F170;A # NEGATIVE SQUARED LATIN CAPITAL LETTER A 1F171;A # NEGATIVE SQUARED LATIN CAPITAL LETTER B 1F172;A # NEGATIVE SQUARED LATIN CAPITAL LETTER C @@ -22564,6 +23277,10 @@ FFFD;A # REPLACEMENT CHARACTER 1F53B;N # DOWN-POINTING RED TRIANGLE 1F53C;N # UP-POINTING SMALL RED TRIANGLE 1F53D;N # DOWN-POINTING SMALL RED TRIANGLE +1F540;N # CIRCLED CROSS POMMEE +1F541;N # CROSS POMMEE WITH HALF-CIRCLE BELOW +1F542;N # CROSS POMMEE +1F543;N # NOTCHED LEFT SEMICIRCLE WITH THREE DOTS 1F550;N # CLOCK FACE ONE OCLOCK 1F551;N # CLOCK FACE TWO OCLOCK 1F552;N # CLOCK FACE THREE OCLOCK @@ -22593,6 +23310,7 @@ FFFD;A # REPLACEMENT CHARACTER 1F5FD;N # STATUE OF LIBERTY 1F5FE;N # SILHOUETTE OF JAPAN 1F5FF;N # MOYAI +1F600;N # GRINNING FACE 1F601;N # GRINNING FACE WITH SMILING EYES 1F602;N # FACE WITH TEARS OF JOY 1F603;N # SMILING FACE WITH OPEN MOUTH @@ -22609,30 +23327,42 @@ FFFD;A # REPLACEMENT CHARACTER 1F60E;N # SMILING FACE WITH SUNGLASSES 1F60F;N # SMIRKING FACE 1F610;N # NEUTRAL FACE +1F611;N # EXPRESSIONLESS FACE 1F612;N # UNAMUSED FACE 1F613;N # FACE WITH COLD SWEAT 1F614;N # PENSIVE FACE +1F615;N # CONFUSED FACE 1F616;N # CONFOUNDED FACE +1F617;N # KISSING FACE 1F618;N # FACE THROWING A KISS +1F619;N # KISSING FACE WITH SMILING EYES 1F61A;N # KISSING FACE WITH CLOSED EYES +1F61B;N # FACE WITH STUCK-OUT TONGUE 1F61C;N # FACE WITH STUCK-OUT TONGUE AND WINKING EYE 1F61D;N # FACE WITH STUCK-OUT TONGUE AND TIGHTLY-CLOSED EYES 1F61E;N # DISAPPOINTED FACE +1F61F;N # WORRIED FACE 1F620;N # ANGRY FACE 1F621;N # POUTING FACE 1F622;N # CRYING FACE 1F623;N # PERSEVERING FACE 1F624;N # FACE WITH LOOK OF TRIUMPH 1F625;N # DISAPPOINTED BUT RELIEVED FACE +1F626;N # FROWNING FACE WITH OPEN MOUTH +1F627;N # ANGUISHED FACE 1F628;N # FEARFUL FACE 1F629;N # WEARY FACE 1F62A;N # SLEEPY FACE 1F62B;N # TIRED FACE +1F62C;N # GRIMACING FACE 1F62D;N # LOUDLY CRYING FACE +1F62E;N # FACE WITH OPEN MOUTH +1F62F;N # HUSHED FACE 1F630;N # FACE WITH OPEN MOUTH AND COLD SWEAT 1F631;N # FACE SCREAMING IN FEAR 1F632;N # ASTONISHED FACE 1F633;N # FLUSHED FACE +1F634;N # SLEEPING FACE 1F635;N # DIZZY FACE 1F636;N # FACE WITHOUT MOUTH 1F637;N # FACE WITH MEDICAL MASK @@ -22845,7 +23575,7 @@ FFFD;A # REPLACEMENT CHARACTER 20000..2A6D6;W # <CJK Ideograph Extension B, First>..<CJK Ideograph Extension B, Last> 2A6D7..2A6FF;W # <reserved-2A6D7>..<reserved-2A6FF> 2A700..2B734;W # <CJK Ideograph Extension C, First>..<CJK Ideograph Extension C, Last> -2B735..2F73F;W # <reserved-2B735>..<reserved-2F73F> +2B735..2B73F;W # <reserved-2B735>..<reserved-2B73F> 2B740..2B81D;W # <CJK Ideograph Extension D, First>..<CJK Ideograph Extension D, Last> 2B81E..2F7FF;W # <reserved-2B735>..<reserved-2F7FF> 2F800;W # CJK COMPATIBILITY IDEOGRAPH-2F800 diff --git a/lib/unicore/EmojiSources.txt b/lib/unicore/EmojiSources.txt index 6f7161e296..c360c5e253 100644 --- a/lib/unicore/EmojiSources.txt +++ b/lib/unicore/EmojiSources.txt @@ -1,8 +1,8 @@ -# EmojiSources-6.0.0.txt -# Date: 2010-04-24, 00:00:00 GMT [MS] +# EmojiSources-6.1.0.txt +# Date: 2011-08-30, 23:30:00 GMT [MS, KW] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # @@ -15,6 +15,9 @@ # Note: It is possible that future versions of this file will include # additional data columns providing mappings for additional vendors. # +# Created for Unicode 6.0 by Marcus Scherer. +# Updated for Unicode 6.1 by Ken Whistler. +# # Format: Semicolon-delimited file with a fixed number of fields. # The number of fields may increase in the future. # diff --git a/lib/unicore/HangulSyllableType.txt b/lib/unicore/HangulSyllableType.txt index eaafd20b79..8b457daaba 100644 --- a/lib/unicore/HangulSyllableType.txt +++ b/lib/unicore/HangulSyllableType.txt @@ -1,8 +1,8 @@ -# HangulSyllableType-6.0.0.txt -# Date: 2010-05-18, 00:49:27 GMT [MD] +# HangulSyllableType-6.1.0.txt +# Date: 2011-08-25, 00:02:18 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ diff --git a/lib/unicore/Index.txt b/lib/unicore/Index.txt index 4dbd2ce602..50e1c9d5b9 100644 --- a/lib/unicore/Index.txt +++ b/lib/unicore/Index.txt @@ -249,6 +249,7 @@ Arabic Contextual Form Glyphs FB50 Arabic Contextual Form Glyphs FE80 ARABIC DATE SEPARATOR 060D ARABIC DECIMAL SEPARATOR 066B +Arabic Extended-A 08A0 Arabic Extensions 0671 ARABIC FULL STOP 06D4 Arabic Harakat 064B @@ -261,6 +262,7 @@ Arabic Letters, Extended 0671 Arabic Letters, Extended 0750 ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM FDFD Arabic Ligatures FBEA +Arabic Mathematical Alphabetic Symbols 1EE00 arabic phrase separator 066C Arabic Points, Glyphs for Spacing Forms of FE70 Arabic Presentation Forms-A FB50 @@ -769,7 +771,7 @@ cartesian product 00D7 CASTLE 26EB CAT 1F408 CAT FACE 1F431 -Cat faces 1F638 +Cat Faces 1F638 CAUTION SIGN 2621 cd 1F4BF CEDI SIGN 20B5 @@ -792,6 +794,7 @@ CENTRELINE LOW LINE FE4E CENTRELINE OVERLINE FE4A cgj 034F CHAINS 26D3 +Chakma 11100 Cham AA00 chandrakkala, malayalam 0D4D CHARACTER INTRODUCER, SINGLE 009A @@ -1224,6 +1227,7 @@ Cross Dingbats 2719 CROSS MARK 274C CROSS OF JERUSALEM 2629 CROSS OF LORRAINE 2628 +CROSS POMMEE 1F542 cross ratio 211E cross, constantine's 2627 CROSS, DOTTED 205C @@ -1339,8 +1343,10 @@ DASH, CIRCLED 229D DASH, EM 2014 DASH, EN 2013 DASH, FIGURE 2012 +dash, omission 2E3A dash, quotation 2015 DASH, SWUNG 2053 +DASH, TWO-EM 2E3A DASH, WAVE 301C DASH, WAVY 3030 DASHED LOW LINE FE4D @@ -2227,6 +2233,7 @@ Greek, Diacritics for 0342 Greek, Precomposed Polytonic 1F00 GROUND, EARTH 23DA group lock 21F0 +group select (ISO 9995-7) 21E8 group separator 001D GROUP SEPARATOR, SYMBOL FOR 241D GUARANI SIGN 20B2 @@ -2987,6 +2994,7 @@ LESS-THAN, VERY MUCH 22D8 Letterlike Mathematical Symbols, Hebrew 2135 Letterlike Symbols 2100 level 2 lock 21EB +level 2 select (ISO 9995-7) 21E7 level 3 lock 21EF level 3 select 21EE LEZH, LATIN SMALL LETTER 026E @@ -3141,6 +3149,7 @@ MACRON, COMBINING DOUBLE 035E MACRON, MODIFIER LETTER 02C9 MACRON, MODIFIER LETTER LOW 02CD macron, spacing 00AF +Magnetic Ink Character Recognition, MICR 2446 MAGNIFYING GLASS, LEFT-POINTING 1F50D MAGNIFYING GLASS, RIGHT-POINTING 1F50E Mahjong Tiles 1F000 @@ -3181,6 +3190,8 @@ marker, line 2319 Markers, Go 2686 Marks, Combining Diacritical 0300 Marks, Combining Half FE20 +marque de commerce 1F16A +marque deposee 1F16B MARRIAGE SYMBOL 26AD mars 2642 MASCULINE ORDINAL INDICATOR 00BA @@ -3222,6 +3233,8 @@ Mathematical Symbols, Script 1D49C Mathematical Symbols-A, Miscellaneous 27C0 Mathematical Symbols-B, Miscellaneous 2980 MATRIX, HERMITIAN CONJUGATE 22B9 +MC SIGN, RAISED 1F16A +MD SIGN, RAISED 1F16B MEASURED ANGLE 2221 Measured Angles, Angles and 299B MEASURED BY 225E @@ -3238,6 +3251,7 @@ MEDIUM WHITE CIRCLE 26AA MEDIUM, END OF 0019 MEDIUM, SYMBOL FOR END OF 2419 Meetei Mayek ABC0 +Meetei Mayek Extensions AAE0 MEMBER, CONTAINS AS 220B MEMBER, DOES NOT CONTAIN AS 220C MEMBER, SMALL CONTAINS AS 220D @@ -3246,11 +3260,15 @@ MEMBERSHIP, Z NOTATION BAG 22FF MEN HOLDING HANDS, TWO 1F46C MERCURY 263F merge 2A07 +Meroitic Cursive 109A0 +Meroitic Hieroglyphs 10980 merpadi, tamil 0BF8 MESSAGE WAITING 0095 MESSAGE, PRIVACY 009E Metrical Symbols 23D1 mho 2127 +Miao 16F00 +MICR, Magnetic Ink Character Recognition 2446 MICRO SIGN 00B5 mid space 2005 MIDDLE DOT 00B7 @@ -3599,6 +3617,7 @@ OM, DEVANAGARI 0950 OM, TIBETAN SYLLABLE 0F00 omega pi 03D6 OMEGA, LATIN SMALL LETTER CLOSED 0277 +omission dash 2E3A ONE DOT LEADER 2024 ONE HALF, VULGAR FRACTION 00BD ONE QUARTER, VULGAR FRACTION 00BC @@ -3622,6 +3641,7 @@ opening curly bracket 007B opening parenthesis 0028 opening square bracket 005B OPERATING SYSTEM COMMAND 009D +operating system key (ISO 9995-7) 2318 Operators Supplement, Mathematical 2A00 Operators, Database Theory 27D5 Operators, Dotted Mathematical 2234 @@ -3728,6 +3748,7 @@ page up 21DE PAGE, NEXT 2398 PAGE, PREVIOUS 2397 Pahlavi 10B60 +Palaeotype Transliteration Symbols 2E32 PALATAL HOOK, LATIN SMALL LETTER T WITH 01AB PALATALIZED HOOK BELOW, COMBINING 0321 PALM BRANCH 2E19 @@ -4498,6 +4519,7 @@ SHADOWED WHITE CIRCLE 274D SHAMROCK 2618 shamrock 2663 Shapes, Geometric 25A0 +Sharada 11180 SHARP S, LATIN SMALL LETTER 00DF SHARP SIGN, MUSIC 266F Shavian 10450 @@ -4640,6 +4662,7 @@ SOLIDUS OVERLAY, COMBINING SHORT 0337 SOLIDUS, BIG 29F8 SOLIDUS, BIG REVERSE 29F9 SOLIDUS, REVERSE 005C +Sora Sompeng 110D0 sound 1F50A SOUND RECORDING COPYRIGHT 2117 SOURCE, INFORMATION 2139 @@ -4805,6 +4828,7 @@ SUN BEHIND CLOUD 26C5 SUN WITH RAYS, BLACK 2600 SUN WITH RAYS, WHITE 263C Sundanese 1B80 +Sundanese Supplement 1CC0 sunna, telugu 0C02 Superscript Digits 2070 Superscript Letter Diacritics, Latin Medieval 1DD3 @@ -4995,6 +5019,7 @@ Tai Xuan Jing Symbols 1D300 Tails, Fish 297C tainome japanese bullet 25C9 TAKE, PRESCRIPTION 211E +Takri 11680 Tamil 0B80 TAMIL AS ABOVE SIGN 0BF8 tamil aytham 0B83 @@ -5275,6 +5300,7 @@ TWO ASTERISKS ALIGNED VERTICALLY 2051 TWO DOT LEADER 2025 TWO DOT PUNCTUATION 205A TWO, SUPERSCRIPT 00B2 +TWO-EM DASH 2E3A U BAR, LATIN CAPITAL LETTER 0244 U BAR, LATIN SMALL LETTER 0289 U WITH ACUTE, LATIN CAPITAL LETTER 00DA @@ -5369,9 +5395,9 @@ URANUS 2645 uranus 26E2 urdu paragraph separator 203B URN, FUNERAL 26B1 -User interface Input Status Symbols 1F520 +User Interface Input Status Symbols 1F520 User Interface Symbols 1F500 -User interface Symbols 1F53A +User Interface Symbols 1F53A v above 030C V WITH DOT BELOW, LATIN SMALL LETTER 1E7F V WITH HOOK, LATIN CAPITAL LETTER 01B2 diff --git a/lib/unicore/IndicMatraCategory.txt b/lib/unicore/IndicMatraCategory.txt index c5f2e11e58..68cbd09350 100644 --- a/lib/unicore/IndicMatraCategory.txt +++ b/lib/unicore/IndicMatraCategory.txt @@ -1,8 +1,8 @@ -# IndicMatraCategory-6.0.0.txt -# Date: 2010-07-14, 15:03:00 PDT [KW] +# IndicMatraCategory-6.1.0.txt +# Date: 2011-08-31, 23:50:00 GMT [KW] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see UAX #44. # @@ -63,7 +63,7 @@ # Tagalog, Hanunoo, Buhid, Tagbanwa, Khmer, Limbu, New Tai Lue, # Buginese, Tai Tham, Balinese, Sundanese, Batak, Lepcha, # Syloti Nagri, Saurashtra, Rejang, Javanese, Cham, Tai Viet, -# Meetei Mayek, Karoshthi, Brahmi, Kaithi +# Meetei Mayek, Kharoshthi, Brahmi, Kaithi, Chakma, Sharada, Takri # # All characters for all other scripts not in that list # take the default value for this property. @@ -157,12 +157,17 @@ A9B4..A9B5 ; Right # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN AAB1 ; Right # Lo TAI VIET VOWEL AA AABA ; Right # Lo TAI VIET VOWEL UA AABD ; Right # Lo TAI VIET VOWEL AN +AAEF ; Right # Mc MEETEI MAYEK VOWEL SIGN AAU ABE3..ABE4 ; Right # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP ABE6..ABE7 ; Right # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP ABE9..ABEA ; Right # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG 110B0 ; Right # Mc KAITHI VOWEL SIGN AA 110B2 ; Right # Mc KAITHI VOWEL SIGN II 110B7..110B8 ; Right # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +111B3 ; Right # Mc SHARADA VOWEL SIGN AA +111B5 ; Right # Mc SHARADA VOWEL SIGN II +111C0 ; Right # Mc SHARADA SIGN VIRAMA +116AF ; Right # Mc TAKRI VOWEL SIGN II # Indic_Matra_Category=Left @@ -190,7 +195,12 @@ ABE9..ABEA ; Right # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK V 1C27..1C28 ; Left # Mc [2] LEPCHA VOWEL SIGN I..LEPCHA VOWEL SIGN O A9BA..A9BB ; Left # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE AA2F..AA30 ; Left # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI +AAEB ; Left # Mc MEETEI MAYEK VOWEL SIGN II +AAEE ; Left # Mc MEETEI MAYEK VOWEL SIGN AU 110B1 ; Left # Mc KAITHI VOWEL SIGN I +1112C ; Left # Mc CHAKMA VOWEL SIGN E +111B4 ; Left # Mc SHARADA VOWEL SIGN I +116AE ; Left # Mc TAKRI VOWEL SIGN I # Indic_Matra_Category=Visual_Order_Left @@ -289,12 +299,21 @@ AAB0 ; Top # Mn TAI VIET MAI KANG AAB2..AAB3 ; Top # Mn [2] TAI VIET VOWEL I..TAI VIET VOWEL UE AAB7..AAB8 ; Top # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA AABE ; Top # Mn TAI VIET VOWEL AM +AAED ; Top # Mn MEETEI MAYEK VOWEL SIGN AAI ABE5 ; Top # Mn MEETEI MAYEK VOWEL SIGN ANAP 10A05 ; Top # Mn KHAROSHTHI VOWEL SIGN E 11038..1103B ; Top # Mn [4] BRAHMI VOWEL SIGN AA..BRAHMI VOWEL SIGN II 11042..11045 ; Top # Mn [4] BRAHMI VOWEL SIGN E..BRAHMI VOWEL SIGN AU 11046 ; Top # Mn BRAHMI VIRAMA 110B5..110B6 ; Top # Mn [2] KAITHI VOWEL SIGN E..KAITHI VOWEL SIGN AI +11127..11129 ; Top # Mn [3] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN II +1112D ; Top # Mn CHAKMA VOWEL SIGN AI +11130 ; Top # Mn CHAKMA VOWEL SIGN OI +11134 ; Top # Mn CHAKMA MAAYYAA +111BC..111BE ; Top # Mn [3] SHARADA VOWEL SIGN E..SHARADA VOWEL SIGN O +116AD ; Top # Mn TAKRI VOWEL SIGN AA +116B2..116B5 ; Top # Mn [4] TAKRI VOWEL SIGN E..TAKRI VOWEL SIGN AU +116B6 ; Top # Mn TAKRI SIGN VIRAMA # Indic_Matra_Category=Bottom @@ -352,6 +371,7 @@ A9B8..A9B9 ; Bottom # Mn [2] JAVANESE VOWEL SIGN SUKU..JAVANESE VOWEL SIGN AA2D ; Bottom # Mn CHAM VOWEL SIGN U AA32 ; Bottom # Mn CHAM VOWEL SIGN UE AAB4 ; Bottom # Mn TAI VIET VOWEL U +AAEC ; Bottom # Mn MEETEI MAYEK VOWEL SIGN UU ABE8 ; Bottom # Mn MEETEI MAYEK VOWEL SIGN UNAP ABED ; Bottom # Mn MEETEI MAYEK APUN IYEK 10A02..10A03 ; Bottom # Mn [2] KHAROSHTHI VOWEL SIGN U..KHAROSHTHI VOWEL SIGN VOCALIC R @@ -359,6 +379,10 @@ ABED ; Bottom # Mn MEETEI MAYEK APUN IYEK 1103C..11041 ; Bottom # Mn [6] BRAHMI VOWEL SIGN U..BRAHMI VOWEL SIGN VOCALIC LL 110B3..110B4 ; Bottom # Mn [2] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN UU 110B9 ; Bottom # Mn KAITHI SIGN VIRAMA +1112A..1112B ; Bottom # Mn [2] CHAKMA VOWEL SIGN U..CHAKMA VOWEL SIGN UU +11131..11132 ; Bottom # Mn [2] CHAKMA O MARK..CHAKMA AU MARK +111B6..111BB ; Bottom # Mn [6] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN VOCALIC LL +116B0..116B1 ; Bottom # Mn [2] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN UU # Indic_Matra_Category=Top_And_Bottom @@ -367,6 +391,7 @@ ABED ; Bottom # Mn MEETEI MAYEK APUN IYEK 0F76..0F79 ; Top_And_Bottom # Mn [4] TIBETAN VOWEL SIGN VOCALIC R..TIBETAN VOWEL SIGN VOCALIC LL 0F81 ; Top_And_Bottom # Mn TIBETAN VOWEL SIGN REVERSED II 1B3C ; Top_And_Bottom # Mn BALINESE VOWEL SIGN LA LENGA +1112E..1112F ; Top_And_Bottom # Mn [2] CHAKMA VOWEL SIGN O..CHAKMA VOWEL SIGN AU # Indic_Matra_Category=Top_And_Right @@ -377,6 +402,7 @@ ABED ; Bottom # Mn MEETEI MAYEK APUN IYEK 0CCA..0CCB ; Top_And_Right # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 1925..1926 ; Top_And_Right # Mc [2] LIMBU VOWEL SIGN OO..LIMBU VOWEL SIGN AU 1B43 ; Top_And_Right # Mc BALINESE VOWEL SIGN PEPET TEDUNG +111BF ; Top_And_Right # Mc SHARADA VOWEL SIGN AU # Indic_Matra_Category=Top_And_Left @@ -413,6 +439,8 @@ A9C0 ; Bottom_And_Right # Mc JAVANESE PANGKON 1039 ; Invisible # Mn MYANMAR SIGN VIRAMA 17D2 ; Invisible # Mn KHMER SIGN COENG 1A60 ; Invisible # Mn TAI THAM SIGN SAKOT +AAF6 ; Invisible # Mn MEETEI MAYEK VIRAMA 10A3F ; Invisible # Mn KHAROSHTHI VIRAMA +11133 ; Invisible # Mn CHAKMA VIRAMA # EOF diff --git a/lib/unicore/IndicSyllabicCategory.txt b/lib/unicore/IndicSyllabicCategory.txt index 674c4def38..9d771bacc0 100644 --- a/lib/unicore/IndicSyllabicCategory.txt +++ b/lib/unicore/IndicSyllabicCategory.txt @@ -1,8 +1,8 @@ -# IndicSyllabicCategory-6.0.0.txt -# Date: 2010-05-25, 11:45:00 PDT [KW] +# IndicSyllabicCategory-6.1.0.txt +# Date: 2011-08-31, 23:54:00 GMT [KW] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see UAX #44. # @@ -43,7 +43,7 @@ # Tagalog, Hanunoo, Buhid, Tagbanwa, Khmer, Limbu, Tai Le, New Tai Lue, # Buginese, Tai Tham, Balinese, Sundanese, Batak, Lepcha, # Syloti Nagri, Phags-Pa, Saurashtra, Kayah Li, Rejang, Javanese, Cham, Tai Viet, -# Meetei Mayek, Karoshthi, Brahmi, Kaithi +# Meetei Mayek, Kharoshthi, Brahmi, Kaithi, Chakma, Sharada, Takri # # All characters for all other scripts not in that list # take the default value for this property, unless they @@ -99,12 +99,18 @@ A880 ; Bindu # Mc SAURASHTRA SIGN ANUSVARA 11000 ; Bindu # Mc BRAHMI SIGN CANDRABINDU 11001 ; Bindu # Mn BRAHMI SIGN ANUSVARA 11080..11081 ; Bindu # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA +11100..11101 ; Bindu # Mn CHAKMA SIGN CANDRABINDU..CHAKMA SIGN ANUSVARA +11180..11181 ; Bindu # Mn SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +116AB ; Bindu # Mn TAKRI SIGN ANUSVARA # ================================================ # Indic_Syllabic_Category=Visarga # Visarga (-h) +# Includes specialized case for Sanskrit: ardhavisarga +# Excludes letters for jihvamuliya and upadhmaniya, which are +# related, but structured somewhat differently. # [Not derivable] @@ -122,11 +128,17 @@ A880 ; Bindu # Mc SAURASHTRA SIGN ANUSVARA 17C7 ; Visarga # Mc KHMER SIGN REAHMUK 1B04 ; Visarga # Mc BALINESE SIGN BISAH 1B82 ; Visarga # Mc SUNDANESE SIGN PANGWISAD +1CF2 ; Visarga # Mc VEDIC SIGN ARDHAVISARGA +1CF3 ; Visarga # Mc VEDIC SIGN ROTATED ARDHAVISARGA A881 ; Visarga # Mc SAURASHTRA SIGN VISARGA A983 ; Visarga # Mc JAVANESE SIGN WIGNYAN +AAF5 ; Visarga # Mc MEETEI MAYEK VOWEL SIGN VISARGA 10A0F ; Visarga # Mn KHAROSHTHI SIGN VISARGA 11002 ; Visarga # Mc BRAHMI SIGN VISARGA 11082 ; Visarga # Mc KAITHI SIGN VISARGA +11102 ; Visarga # Mn CHAKMA SIGN VISARGA +11182 ; Visarga # Mn SHARADA SIGN VISARGA +116AC ; Visarga # Mc TAKRI SIGN VISARGA # ================================================ @@ -145,6 +157,8 @@ A983 ; Visarga # Mc JAVANESE SIGN WIGNYAN 0D3D ; Avagraha # Lo MALAYALAM SIGN AVAGRAHA 0F85 ; Avagraha # Po TIBETAN MARK PALUTA 17DC ; Avagraha # Lo KHMER SIGN AVAKRAHASANYA +1BBA ; Avagraha # Lo SUNDANESE AVAGRAHA +111C1 ; Avagraha # Lo SHARADA SIGN AVAGRAHA # ================================================ @@ -165,6 +179,7 @@ A983 ; Visarga # Mc JAVANESE SIGN WIGNYAN 1C37 ; Nukta # Mn LEPCHA SIGN NUKTA A9B3 ; Nukta # Mn JAVANESE SIGN CECAK TELU 110BA ; Nukta # Mn KAITHI SIGN NUKTA +116B7 ; Nukta # Mn TAKRI SIGN NUKTA # ================================================ @@ -196,15 +211,20 @@ A9B3 ; Nukta # Mn JAVANESE SIGN CECAK TELU 1A60 ; Virama # Mn TAI THAM SIGN SAKOT 1B44 ; Virama # Mc BALINESE ADEG ADEG 1BAA ; Virama # Mc SUNDANESE SIGN PAMAAEH +1BAB ; Virama # Mc SUNDANESE SIGN VIRAMA 1BF2..1BF3 ; Virama # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN A806 ; Virama # Mn SYLOTI NAGRI SIGN HASANTA A8C4 ; Virama # Mn SAURASHTRA SIGN VIRAMA A953 ; Virama # Mc REJANG VIRAMA A9C0 ; Virama # Mc JAVANESE PANGKON +AAF6 ; Virama # Mn MEETEI MAYEK VIRAMA ABED ; Virama # Mn MEETEI MAYEK APUN IYEK 10A3F ; Virama # Mn KHAROSHTHI VIRAMA 11046 ; Virama # Mn BRAHMI VIRAMA 110B9 ; Virama # Mn KAITHI SIGN VIRAMA +11133..11134 ; Virama # Mn CHAKMA VIRAMA..CHAKMA MAAYYAA +111C0 ; Virama # Mc SHARADA SIGN VIRAMA +116B6 ; Virama # Mn TAKRI SIGN VIRAMA # ================================================ @@ -265,8 +285,14 @@ A882..A891 ; Vowel_Independent # Lo [16] SAURASHTRA LETTER A..SAURASHTRA LET A984..A988 ; Vowel_Independent # Lo [5] JAVANESE LETTER A..JAVANESE LETTER U A98C..A98E ; Vowel_Independent # Lo [3] JAVANESE LETTER E..JAVANESE LETTER O AA00..AA05 ; Vowel_Independent # Lo [6] CHAM LETTER A..CHAM LETTER O +AAE0..AAE1 ; Vowel_Independent # Lo [2] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER O +ABCE..ABCF ; Vowel_Independent # Lo [2] MEETEI MAYEK LETTER UN..MEETEI MAYEK LETTER I +ABD1 ; Vowel_Independent # Lo MEETEI MAYEK LETTER ATIYA 11005..11012 ; Vowel_Independent # Lo [14] BRAHMI LETTER A..BRAHMI LETTER AU 11083..1108C ; Vowel_Independent # Lo [10] KAITHI LETTER A..KAITHI LETTER AU +11103..11106 ; Vowel_Independent # Lo [4] CHAKMA LETTER AA..CHAKMA LETTER E +11183..11190 ; Vowel_Independent # Lo [14] SHARADA LETTER A..SHARADA LETTER AU +11680..11689 ; Vowel_Independent # Lo [10] TAKRI LETTER A..TAKRI LETTER AU # ================================================ @@ -426,6 +452,7 @@ AAB5..AAB6 ; Vowel_Dependent # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O AAB7..AAB8 ; Vowel_Dependent # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA AAB9..AABD ; Vowel_Dependent # Lo [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN AABE ; Vowel_Dependent # Mn TAI VIET VOWEL AM +AAEB..AAEF ; Vowel_Dependent # Mc [5] MEETEI MAYEK VOWEL SIGN II..MEETEI MAYEK VOWEL SIGN AAU ABE3..ABE4 ; Vowel_Dependent # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP ABE5 ; Vowel_Dependent # Mn MEETEI MAYEK VOWEL SIGN ANAP ABE6..ABE7 ; Vowel_Dependent # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP @@ -438,6 +465,9 @@ ABE9..ABEA ; Vowel_Dependent # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEET 110B0..110B2 ; Vowel_Dependent # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II 110B3..110B6 ; Vowel_Dependent # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B7..110B8 ; Vowel_Dependent # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +11127..11132 ; Vowel_Dependent # Mn [12] CHAKMA VOWEL SIGN A..CHAKMA AU MARK +111B3..111BF ; Vowel_Dependent # Mn [13] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN AU +116AD..116B5 ; Vowel_Dependent # Mn [9] TAKRI VOWEL SIGN AA..TAKRI VOWEL SIGN AU # ================================================ @@ -568,6 +598,7 @@ A926..A92A ; Vowel # Mn [5] KAYAH LI VOWEL UE..KAYAH LI VOWEL O 1B45..1B4B ; Consonant # Lo [7] BALINESE LETTER KAF SASAK..BALINESE LETTER ASYURA SASAK 1B8A..1BA0 ; Consonant # Lo [23] SUNDANESE LETTER KA..SUNDANESE LETTER HA 1BAE..1BAF ; Consonant # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA +1BBB..1BBD ; Consonant # Lo [3] SUNDANESE LETTER REU..SUNDANESE LETTER BHA 1BC0..1BE3 ; Consonant # Lo [36] BATAK LETTER A..BATAK LETTER MBA 1C00..1C23 ; Consonant # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A 1C4D..1C4F ; Consonant # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA @@ -587,13 +618,19 @@ AA60..AA6F ; Consonant # Lo [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KH AA71..AA73 ; Consonant # Lo [3] MYANMAR LETTER KHAMTI XA..MYAMNAR LETTER KHAMTI RA AA7A ; Consonant # Lo MYANMAR LETTER AITON RA AA80..AAAF ; Consonant # Lo [48] TAI VIET LETTER LOW KO..TAI VIET LETTER HIGH O -ABC0..ABDA ; Consonant # Lo [27] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER BHAM +AAE2..AAEA ; Consonant # Lo [9] MEETEI MAYEK LETTER CHA..MEETEI MAYEK LETTER SSA +ABC0..ABCD ; Consonant # Lo [14] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER HUK +ABD0 ; Consonant # Lo MEETEI MAYEK LETTER PHAM +ABD2..ABDA ; Consonant # Lo [9] MEETEI MAYEK LETTER GOK..MEETEI MAYEK LETTER BHAM 10A00 ; Consonant # Lo KHAROSHTHI LETTER A 10A10..10A13 ; Consonant # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA 10A15..10A17 ; Consonant # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA 10A19..10A33 ; Consonant # Lo [27] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER TTTHA 11013..11037 ; Consonant # Lo [37] BRAHMI LETTER KA..BRAHMI LETTER OLD TAMIL NNNA 1108D..110AF ; Consonant # Lo [35] KAITHI LETTER KA..KAITHI LETTER HA +11107..11126 ; Consonant # Lo [32] CHAKMA LETTER KAA..CHAKMA LETTER HAA +11191..111B2 ; Consonant # Lo [34] SHARADA LETTER KA..SHARADA LETTER HA +1168A..116AA ; Consonant # Lo [34] TAKRI LETTER KA..TAKRI LETTER RRA # ================================================ @@ -633,6 +670,7 @@ A982 ; Consonant_Repha # Mn JAVANESE SIGN LAYAR 1929..192B ; Consonant_Subjoined # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA 1BA1 ; Consonant_Subjoined # Mc SUNDANESE CONSONANT SIGN PAMINGKAL 1BA2..1BA3 ; Consonant_Subjoined # Mn [2] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE CONSONANT SIGN PANYIKU +1BAC..1BAD ; Consonant_Subjoined # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA 1C24..1C25 ; Consonant_Subjoined # Mc [2] LEPCHA SUBJOINED LETTER YA..LEPCHA SUBJOINED LETTER RA A867..A868 ; Consonant_Subjoined # Lo [2] PHAGS-PA SUBJOINED LETTER WA..PHAGS-PA SUBJOINED LETTER YA A871 ; Consonant_Subjoined # Lo PHAGS-PA SUBJOINED LETTER RA @@ -672,6 +710,7 @@ AA35..AA36 ; Consonant_Medial # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONA 19C1..19C7 ; Consonant_Final # Lo [7] NEW TAI LUE LETTER FINAL V..NEW TAI LUE LETTER FINAL B 1A57 ; Consonant_Final # Mc TAI THAM CONSONANT SIGN LA TANG LAI 1A58..1A5E ; Consonant_Final # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA +1BBE..1BBF ; Consonant_Final # Lo [2] SUNDANESE LETTER FINAL K..SUNDANESE LETTER FINAL M 1BF0..1BF1 ; Consonant_Final # Mn [2] BATAK CONSONANT SIGN NG..BATAK CONSONANT SIGN H 1C2D..1C33 ; Consonant_Final # Mn [7] LEPCHA CONSONANT SIGN K..LEPCHA CONSONANT SIGN T A8B4 ; Consonant_Final # Mc SAURASHTRA CONSONANT SIGN HAARU diff --git a/lib/unicore/Jamo.txt b/lib/unicore/Jamo.txt index b5df928191..3f325dee1f 100644 --- a/lib/unicore/Jamo.txt +++ b/lib/unicore/Jamo.txt @@ -1,22 +1,22 @@ -# Jamo-6.0.0.txt -# Date: 2010-05-19, 11:19:00 PDT [KW] +# Jamo-6.1.0.txt +# Date: 2011-06-22, 23:07:00 GMT [KW, LI] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # -# This file defines the Jamo Short Name property. +# This file defines the Jamo_Short_Name property. # -# See Section 3.12 of The Unicode Standard, Version 6.0 +# See Section 3.12 of The Unicode Standard, Version 6.1 # for more information. # # Each line contains two fields, separated by a semicolon. # # The first field gives the code point, in 4-digit hexadecimal -# form, of a combining jamo character that participates in -# the algorithmic determination Hangul syllable character names. -# The second field gives the Jamo Short Name as a one-, two-, +# form, of a conjoining jamo character that participates in the +# algorithmic determination of Hangul syllable character names. +# The second field gives the Jamo_Short_Name as a one-, two-, # or three-character ASCII string (or in one case, for U+110B, # the null string). # diff --git a/lib/unicore/LineBreak.txt b/lib/unicore/LineBreak.txt index 10a6d0e5b2..98e9671f66 100644 --- a/lib/unicore/LineBreak.txt +++ b/lib/unicore/LineBreak.txt @@ -1,5 +1,5 @@ -# LineBreak-6.0.0.txt -# Date: 2010-08-18, 17:25:00 PDT [KW] +# LineBreak-6.1.0.txt +# Date: 2011-11-08, 20:25:00 GMT [KW] # # Line Break Properties # @@ -7,7 +7,7 @@ # Unicode Character Database. # It contains both normative and informative data. # -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # # The format is two fields separated by a semicolon. @@ -19,7 +19,7 @@ # Informative: # "XX", "OP", "CL", "CP", "QU", "NS", "EX", "SY", # "IS", "PR", "PO", "NU", "AL", "ID", "IN", "HY", -# "BB", "BA", "SA", "AI", "B2" +# "BB", "BA", "SA", "AI", "B2", "HL", "CJ" # - All code points, assigned and unassigned, that are not listed # explicitly are given the value "XX". # The unassigned code points that default to "ID" include ranges in the @@ -1439,6 +1439,7 @@ 0587;AL # ARMENIAN SMALL LIGATURE ECH YIWN 0589;IS # ARMENIAN FULL STOP 058A;BA # ARMENIAN HYPHEN +058F;PR # ARMENIAN DRAM SIGN 0591;CM # HEBREW ACCENT ETNAHTA 0592;CM # HEBREW ACCENT SEGOL 0593;CM # HEBREW ACCENT SHALSHELET @@ -1494,42 +1495,43 @@ 05C5;CM # HEBREW MARK LOWER DOT 05C6;EX # HEBREW PUNCTUATION NUN HAFUKHA 05C7;CM # HEBREW POINT QAMATS QATAN -05D0;AL # HEBREW LETTER ALEF -05D1;AL # HEBREW LETTER BET -05D2;AL # HEBREW LETTER GIMEL -05D3;AL # HEBREW LETTER DALET -05D4;AL # HEBREW LETTER HE -05D5;AL # HEBREW LETTER VAV -05D6;AL # HEBREW LETTER ZAYIN -05D7;AL # HEBREW LETTER HET -05D8;AL # HEBREW LETTER TET -05D9;AL # HEBREW LETTER YOD -05DA;AL # HEBREW LETTER FINAL KAF -05DB;AL # HEBREW LETTER KAF -05DC;AL # HEBREW LETTER LAMED -05DD;AL # HEBREW LETTER FINAL MEM -05DE;AL # HEBREW LETTER MEM -05DF;AL # HEBREW LETTER FINAL NUN -05E0;AL # HEBREW LETTER NUN -05E1;AL # HEBREW LETTER SAMEKH -05E2;AL # HEBREW LETTER AYIN -05E3;AL # HEBREW LETTER FINAL PE -05E4;AL # HEBREW LETTER PE -05E5;AL # HEBREW LETTER FINAL TSADI -05E6;AL # HEBREW LETTER TSADI -05E7;AL # HEBREW LETTER QOF -05E8;AL # HEBREW LETTER RESH -05E9;AL # HEBREW LETTER SHIN -05EA;AL # HEBREW LETTER TAV -05F0;AL # HEBREW LIGATURE YIDDISH DOUBLE VAV -05F1;AL # HEBREW LIGATURE YIDDISH VAV YOD -05F2;AL # HEBREW LIGATURE YIDDISH DOUBLE YOD +05D0;HL # HEBREW LETTER ALEF +05D1;HL # HEBREW LETTER BET +05D2;HL # HEBREW LETTER GIMEL +05D3;HL # HEBREW LETTER DALET +05D4;HL # HEBREW LETTER HE +05D5;HL # HEBREW LETTER VAV +05D6;HL # HEBREW LETTER ZAYIN +05D7;HL # HEBREW LETTER HET +05D8;HL # HEBREW LETTER TET +05D9;HL # HEBREW LETTER YOD +05DA;HL # HEBREW LETTER FINAL KAF +05DB;HL # HEBREW LETTER KAF +05DC;HL # HEBREW LETTER LAMED +05DD;HL # HEBREW LETTER FINAL MEM +05DE;HL # HEBREW LETTER MEM +05DF;HL # HEBREW LETTER FINAL NUN +05E0;HL # HEBREW LETTER NUN +05E1;HL # HEBREW LETTER SAMEKH +05E2;HL # HEBREW LETTER AYIN +05E3;HL # HEBREW LETTER FINAL PE +05E4;HL # HEBREW LETTER PE +05E5;HL # HEBREW LETTER FINAL TSADI +05E6;HL # HEBREW LETTER TSADI +05E7;HL # HEBREW LETTER QOF +05E8;HL # HEBREW LETTER RESH +05E9;HL # HEBREW LETTER SHIN +05EA;HL # HEBREW LETTER TAV +05F0;HL # HEBREW LIGATURE YIDDISH DOUBLE VAV +05F1;HL # HEBREW LIGATURE YIDDISH VAV YOD +05F2;HL # HEBREW LIGATURE YIDDISH DOUBLE YOD 05F3;AL # HEBREW PUNCTUATION GERESH 05F4;AL # HEBREW PUNCTUATION GERSHAYIM 0600;AL # ARABIC NUMBER SIGN 0601;AL # ARABIC SIGN SANAH 0602;AL # ARABIC FOOTNOTE MARKER 0603;AL # ARABIC SIGN SAFHA +0604;AL # ARABIC SIGN SAMVAT 0606;AL # ARABIC-INDIC CUBE ROOT 0607;AL # ARABIC-INDIC FOURTH ROOT 0608;AL # ARABIC RAY @@ -2102,6 +2104,45 @@ 085A;CM # MANDAIC VOCALIZATION MARK 085B;CM # MANDAIC GEMINATION MARK 085E;AL # MANDAIC PUNCTUATION +08A0;AL # ARABIC LETTER BEH WITH SMALL V BELOW +08A2;AL # ARABIC LETTER JEEM WITH TWO DOTS ABOVE +08A3;AL # ARABIC LETTER TAH WITH TWO DOTS ABOVE +08A4;AL # ARABIC LETTER FEH WITH DOT BELOW AND THREE DOTS ABOVE +08A5;AL # ARABIC LETTER QAF WITH DOT BELOW +08A6;AL # ARABIC LETTER LAM WITH DOUBLE BAR +08A7;AL # ARABIC LETTER MEEM WITH THREE DOTS ABOVE +08A8;AL # ARABIC LETTER YEH WITH TWO DOTS BELOW AND HAMZA ABOVE +08A9;AL # ARABIC LETTER YEH WITH TWO DOTS BELOW AND DOT ABOVE +08AA;AL # ARABIC LETTER REH WITH LOOP +08AB;AL # ARABIC LETTER WAW WITH DOT WITHIN +08AC;AL # ARABIC LETTER ROHINGYA YEH +08E4;CM # ARABIC CURLY FATHA +08E5;CM # ARABIC CURLY DAMMA +08E6;CM # ARABIC CURLY KASRA +08E7;CM # ARABIC CURLY FATHATAN +08E8;CM # ARABIC CURLY DAMMATAN +08E9;CM # ARABIC CURLY KASRATAN +08EA;CM # ARABIC TONE ONE DOT ABOVE +08EB;CM # ARABIC TONE TWO DOTS ABOVE +08EC;CM # ARABIC TONE LOOP ABOVE +08ED;CM # ARABIC TONE ONE DOT BELOW +08EE;CM # ARABIC TONE TWO DOTS BELOW +08EF;CM # ARABIC TONE LOOP BELOW +08F0;CM # ARABIC OPEN FATHATAN +08F1;CM # ARABIC OPEN DAMMATAN +08F2;CM # ARABIC OPEN KASRATAN +08F3;CM # ARABIC SMALL HIGH WAW +08F4;CM # ARABIC FATHA WITH RING +08F5;CM # ARABIC FATHA WITH DOT ABOVE +08F6;CM # ARABIC KASRA WITH DOT BELOW +08F7;CM # ARABIC LEFT ARROWHEAD ABOVE +08F8;CM # ARABIC RIGHT ARROWHEAD ABOVE +08F9;CM # ARABIC LEFT ARROWHEAD BELOW +08FA;CM # ARABIC RIGHT ARROWHEAD BELOW +08FB;CM # ARABIC DOUBLE RIGHT ARROWHEAD ABOVE +08FC;CM # ARABIC DOUBLE RIGHT ARROWHEAD ABOVE WITH DOT +08FD;CM # ARABIC RIGHT ARROWHEAD ABOVE WITH DOT +08FE;CM # ARABIC DAMMA WITH DOT 0900;CM # DEVANAGARI SIGN INVERTED CANDRABINDU 0901;CM # DEVANAGARI SIGN CANDRABINDU 0902;CM # DEVANAGARI SIGN ANUSVARA @@ -2482,6 +2523,7 @@ 0AED;NU # GUJARATI DIGIT SEVEN 0AEE;NU # GUJARATI DIGIT EIGHT 0AEF;NU # GUJARATI DIGIT NINE +0AF0;AL # GUJARATI ABBREVIATION SIGN 0AF1;PR # GUJARATI RUPEE SIGN 0B01;CM # ORIYA SIGN CANDRABINDU 0B02;CM # ORIYA SIGN ANUSVARA @@ -3154,6 +3196,8 @@ 0ED9;NU # LAO DIGIT NINE 0EDC;SA # LAO HO NO 0EDD;SA # LAO HO MO +0EDE;SA # LAO LETTER KHMU GO +0EDF;SA # LAO LETTER KHMU NYO 0F00;AL # TIBETAN SYLLABLE OM 0F01;BB # TIBETAN MARK GTER YIG MGO TRUNCATED A 0F02;BB # TIBETAN MARK GTER YIG MGO -UM RNAM BCAD MA @@ -3563,6 +3607,8 @@ 10C3;AL # GEORGIAN CAPITAL LETTER WE 10C4;AL # GEORGIAN CAPITAL LETTER HAR 10C5;AL # GEORGIAN CAPITAL LETTER HOE +10C7;AL # GEORGIAN CAPITAL LETTER YN +10CD;AL # GEORGIAN CAPITAL LETTER AEN 10D0;AL # GEORGIAN LETTER AN 10D1;AL # GEORGIAN LETTER BAN 10D2;AL # GEORGIAN LETTER GAN @@ -3608,6 +3654,9 @@ 10FA;AL # GEORGIAN LETTER AIN 10FB;AL # GEORGIAN PARAGRAPH SEPARATOR 10FC;AL # MODIFIER LETTER GEORGIAN NAR +10FD;AL # GEORGIAN LETTER AEN +10FE;AL # GEORGIAN LETTER HARD SIGN +10FF;AL # GEORGIAN LETTER LABIAL SIGN 1100;JL # HANGUL CHOSEONG KIYEOK 1101;JL # HANGUL CHOSEONG SSANGKIYEOK 1102;JL # HANGUL CHOSEONG NIEUN @@ -6041,6 +6090,9 @@ 1BA8;CM # SUNDANESE VOWEL SIGN PAMEPET 1BA9;CM # SUNDANESE VOWEL SIGN PANEULEUNG 1BAA;CM # SUNDANESE SIGN PAMAAEH +1BAB;CM # SUNDANESE SIGN VIRAMA +1BAC;CM # SUNDANESE CONSONANT SIGN PASANGAN MA +1BAD;CM # SUNDANESE CONSONANT SIGN PASANGAN WA 1BAE;AL # SUNDANESE LETTER KHA 1BAF;AL # SUNDANESE LETTER SYA 1BB0;NU # SUNDANESE DIGIT ZERO @@ -6053,6 +6105,12 @@ 1BB7;NU # SUNDANESE DIGIT SEVEN 1BB8;NU # SUNDANESE DIGIT EIGHT 1BB9;NU # SUNDANESE DIGIT NINE +1BBA;AL # SUNDANESE AVAGRAHA +1BBB;AL # SUNDANESE LETTER REU +1BBC;AL # SUNDANESE LETTER LEU +1BBD;AL # SUNDANESE LETTER BHA +1BBE;AL # SUNDANESE LETTER FINAL K +1BBF;AL # SUNDANESE LETTER FINAL M 1BC0;AL # BATAK LETTER A 1BC1;AL # BATAK LETTER SIMALUNGUN A 1BC2;AL # BATAK LETTER HA @@ -6231,6 +6289,14 @@ 1C7D;AL # OL CHIKI AHAD 1C7E;BA # OL CHIKI PUNCTUATION MUCAAD 1C7F;BA # OL CHIKI PUNCTUATION DOUBLE MUCAAD +1CC0;AL # SUNDANESE PUNCTUATION BINDU SURYA +1CC1;AL # SUNDANESE PUNCTUATION BINDU PANGLONG +1CC2;AL # SUNDANESE PUNCTUATION BINDU PURNAMA +1CC3;AL # SUNDANESE PUNCTUATION BINDU CAKRA +1CC4;AL # SUNDANESE PUNCTUATION BINDU LEU SATANGA +1CC5;AL # SUNDANESE PUNCTUATION BINDU KA SATANGA +1CC6;AL # SUNDANESE PUNCTUATION BINDU DA SATANGA +1CC7;AL # SUNDANESE PUNCTUATION BINDU BA SATANGA 1CD0;CM # VEDIC TONE KARSHANA 1CD1;CM # VEDIC TONE SHARA 1CD2;CM # VEDIC TONE PRENKHA @@ -6266,6 +6332,10 @@ 1CF0;AL # VEDIC SIGN RTHANG LONG ANUSVARA 1CF1;AL # VEDIC SIGN ANUSVARA UBHAYATO MUKHA 1CF2;CM # VEDIC SIGN ARDHAVISARGA +1CF3;CM # VEDIC SIGN ROTATED ARDHAVISARGA +1CF4;CM # VEDIC TONE CANDRA ABOVE +1CF5;AL # VEDIC SIGN JIHVAMULIYA +1CF6;AL # VEDIC SIGN UPADHMANIYA 1D00;AL # LATIN LETTER SMALL CAPITAL A 1D01;AL # LATIN LETTER SMALL CAPITAL AE 1D02;AL # LATIN SMALL LETTER TURNED AE @@ -8872,7 +8942,9 @@ 27C8;AL # REVERSE SOLIDUS PRECEDING SUBSET 27C9;AL # SUPERSET PRECEDING SOLIDUS 27CA;AL # VERTICAL BAR WITH HORIZONTAL STROKE +27CB;AL # MATHEMATICAL RISING DIAGONAL 27CC;AL # LONG DIVISION +27CD;AL # MATHEMATICAL FALLING DIAGONAL 27CE;AL # SQUARED LOGICAL AND 27CF;AL # SQUARED LOGICAL OR 27D0;AL # WHITE DIAMOND WITH CENTRED DOT @@ -10018,6 +10090,8 @@ 2CEF;CM # COPTIC COMBINING NI ABOVE 2CF0;CM # COPTIC COMBINING SPIRITUS ASPER 2CF1;CM # COPTIC COMBINING SPIRITUS LENIS +2CF2;AL # COPTIC CAPITAL LETTER BOHAIRIC KHEI +2CF3;AL # COPTIC SMALL LETTER BOHAIRIC KHEI 2CF9;EX # COPTIC OLD NUBIAN FULL STOP 2CFA;BA # COPTIC OLD NUBIAN DIRECT QUESTION MARK 2CFB;BA # COPTIC OLD NUBIAN INDIRECT QUESTION MARK @@ -10063,6 +10137,8 @@ 2D23;AL # GEORGIAN SMALL LETTER WE 2D24;AL # GEORGIAN SMALL LETTER HAR 2D25;AL # GEORGIAN SMALL LETTER HOE +2D27;AL # GEORGIAN SMALL LETTER YN +2D2D;AL # GEORGIAN SMALL LETTER AEN 2D30;AL # TIFINAGH LETTER YA 2D31;AL # TIFINAGH LETTER YAB 2D32;AL # TIFINAGH LETTER YABH @@ -10117,6 +10193,8 @@ 2D63;AL # TIFINAGH LETTER YAZ 2D64;AL # TIFINAGH LETTER TAWELLEMET YAZ 2D65;AL # TIFINAGH LETTER YAZZ +2D66;AL # TIFINAGH LETTER YE +2D67;AL # TIFINAGH LETTER YO 2D6F;AL # TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D70;BA # TIFINAGH SEPARATOR MARK 2D7F;CM # TIFINAGH CONSONANT JOINER @@ -10281,6 +10359,16 @@ 2E2F;AL # VERTICAL TILDE 2E30;BA # RING POINT 2E31;BA # WORD SEPARATOR MIDDLE DOT +2E32;AL # TURNED COMMA +2E33;BA # RAISED DOT +2E34;BA # RAISED COMMA +2E35;AL # TURNED SEMICOLON +2E36;AL # DAGGER WITH LEFT GUARD +2E37;AL # DAGGER WITH RIGHT GUARD +2E38;AL # TURNED DAGGER +2E39;AL # TOP HALF SECTION SIGN +2E3A;B2 # TWO-EM DASH +2E3B;B2 # THREE-EM DASH 2E80;ID # CJK RADICAL REPEAT 2E81;ID # CJK RADICAL CLIFF 2E82;ID # CJK RADICAL SECOND ONE @@ -10686,15 +10774,15 @@ 303D;ID # PART ALTERNATION MARK 303E;ID # IDEOGRAPHIC VARIATION INDICATOR 303F;ID # IDEOGRAPHIC HALF FILL SPACE -3041;NS # HIRAGANA LETTER SMALL A +3041;CJ # HIRAGANA LETTER SMALL A 3042;ID # HIRAGANA LETTER A -3043;NS # HIRAGANA LETTER SMALL I +3043;CJ # HIRAGANA LETTER SMALL I 3044;ID # HIRAGANA LETTER I -3045;NS # HIRAGANA LETTER SMALL U +3045;CJ # HIRAGANA LETTER SMALL U 3046;ID # HIRAGANA LETTER U -3047;NS # HIRAGANA LETTER SMALL E +3047;CJ # HIRAGANA LETTER SMALL E 3048;ID # HIRAGANA LETTER E -3049;NS # HIRAGANA LETTER SMALL O +3049;CJ # HIRAGANA LETTER SMALL O 304A;ID # HIRAGANA LETTER O 304B;ID # HIRAGANA LETTER KA 304C;ID # HIRAGANA LETTER GA @@ -10720,7 +10808,7 @@ 3060;ID # HIRAGANA LETTER DA 3061;ID # HIRAGANA LETTER TI 3062;ID # HIRAGANA LETTER DI -3063;NS # HIRAGANA LETTER SMALL TU +3063;CJ # HIRAGANA LETTER SMALL TU 3064;ID # HIRAGANA LETTER TU 3065;ID # HIRAGANA LETTER DU 3066;ID # HIRAGANA LETTER TE @@ -10752,26 +10840,26 @@ 3080;ID # HIRAGANA LETTER MU 3081;ID # HIRAGANA LETTER ME 3082;ID # HIRAGANA LETTER MO -3083;NS # HIRAGANA LETTER SMALL YA +3083;CJ # HIRAGANA LETTER SMALL YA 3084;ID # HIRAGANA LETTER YA -3085;NS # HIRAGANA LETTER SMALL YU +3085;CJ # HIRAGANA LETTER SMALL YU 3086;ID # HIRAGANA LETTER YU -3087;NS # HIRAGANA LETTER SMALL YO +3087;CJ # HIRAGANA LETTER SMALL YO 3088;ID # HIRAGANA LETTER YO 3089;ID # HIRAGANA LETTER RA 308A;ID # HIRAGANA LETTER RI 308B;ID # HIRAGANA LETTER RU 308C;ID # HIRAGANA LETTER RE 308D;ID # HIRAGANA LETTER RO -308E;NS # HIRAGANA LETTER SMALL WA +308E;CJ # HIRAGANA LETTER SMALL WA 308F;ID # HIRAGANA LETTER WA 3090;ID # HIRAGANA LETTER WI 3091;ID # HIRAGANA LETTER WE 3092;ID # HIRAGANA LETTER WO 3093;ID # HIRAGANA LETTER N 3094;ID # HIRAGANA LETTER VU -3095;NS # HIRAGANA LETTER SMALL KA -3096;NS # HIRAGANA LETTER SMALL KE +3095;CJ # HIRAGANA LETTER SMALL KA +3096;CJ # HIRAGANA LETTER SMALL KE 3099;CM # COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK 309A;CM # COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK 309B;NS # KATAKANA-HIRAGANA VOICED SOUND MARK @@ -10780,15 +10868,15 @@ 309E;NS # HIRAGANA VOICED ITERATION MARK 309F;ID # HIRAGANA DIGRAPH YORI 30A0;NS # KATAKANA-HIRAGANA DOUBLE HYPHEN -30A1;NS # KATAKANA LETTER SMALL A +30A1;CJ # KATAKANA LETTER SMALL A 30A2;ID # KATAKANA LETTER A -30A3;NS # KATAKANA LETTER SMALL I +30A3;CJ # KATAKANA LETTER SMALL I 30A4;ID # KATAKANA LETTER I -30A5;NS # KATAKANA LETTER SMALL U +30A5;CJ # KATAKANA LETTER SMALL U 30A6;ID # KATAKANA LETTER U -30A7;NS # KATAKANA LETTER SMALL E +30A7;CJ # KATAKANA LETTER SMALL E 30A8;ID # KATAKANA LETTER E -30A9;NS # KATAKANA LETTER SMALL O +30A9;CJ # KATAKANA LETTER SMALL O 30AA;ID # KATAKANA LETTER O 30AB;ID # KATAKANA LETTER KA 30AC;ID # KATAKANA LETTER GA @@ -10814,7 +10902,7 @@ 30C0;ID # KATAKANA LETTER DA 30C1;ID # KATAKANA LETTER TI 30C2;ID # KATAKANA LETTER DI -30C3;NS # KATAKANA LETTER SMALL TU +30C3;CJ # KATAKANA LETTER SMALL TU 30C4;ID # KATAKANA LETTER TU 30C5;ID # KATAKANA LETTER DU 30C6;ID # KATAKANA LETTER TE @@ -10846,32 +10934,32 @@ 30E0;ID # KATAKANA LETTER MU 30E1;ID # KATAKANA LETTER ME 30E2;ID # KATAKANA LETTER MO -30E3;NS # KATAKANA LETTER SMALL YA +30E3;CJ # KATAKANA LETTER SMALL YA 30E4;ID # KATAKANA LETTER YA -30E5;NS # KATAKANA LETTER SMALL YU +30E5;CJ # KATAKANA LETTER SMALL YU 30E6;ID # KATAKANA LETTER YU -30E7;NS # KATAKANA LETTER SMALL YO +30E7;CJ # KATAKANA LETTER SMALL YO 30E8;ID # KATAKANA LETTER YO 30E9;ID # KATAKANA LETTER RA 30EA;ID # KATAKANA LETTER RI 30EB;ID # KATAKANA LETTER RU 30EC;ID # KATAKANA LETTER RE 30ED;ID # KATAKANA LETTER RO -30EE;NS # KATAKANA LETTER SMALL WA +30EE;CJ # KATAKANA LETTER SMALL WA 30EF;ID # KATAKANA LETTER WA 30F0;ID # KATAKANA LETTER WI 30F1;ID # KATAKANA LETTER WE 30F2;ID # KATAKANA LETTER WO 30F3;ID # KATAKANA LETTER N 30F4;ID # KATAKANA LETTER VU -30F5;NS # KATAKANA LETTER SMALL KA -30F6;NS # KATAKANA LETTER SMALL KE +30F5;CJ # KATAKANA LETTER SMALL KA +30F6;CJ # KATAKANA LETTER SMALL KE 30F7;ID # KATAKANA LETTER VA 30F8;ID # KATAKANA LETTER VI 30F9;ID # KATAKANA LETTER VE 30FA;ID # KATAKANA LETTER VO 30FB;NS # KATAKANA MIDDLE DOT -30FC;NS # KATAKANA-HIRAGANA PROLONGED SOUND MARK +30FC;CJ # KATAKANA-HIRAGANA PROLONGED SOUND MARK 30FD;NS # KATAKANA ITERATION MARK 30FE;NS # KATAKANA VOICED ITERATION MARK 30FF;ID # KATAKANA DIGRAPH KOTO @@ -11089,22 +11177,22 @@ 31E1;ID # CJK STROKE HZZZG 31E2;ID # CJK STROKE PG 31E3;ID # CJK STROKE Q -31F0;NS # KATAKANA LETTER SMALL KU -31F1;NS # KATAKANA LETTER SMALL SI -31F2;NS # KATAKANA LETTER SMALL SU -31F3;NS # KATAKANA LETTER SMALL TO -31F4;NS # KATAKANA LETTER SMALL NU -31F5;NS # KATAKANA LETTER SMALL HA -31F6;NS # KATAKANA LETTER SMALL HI -31F7;NS # KATAKANA LETTER SMALL HU -31F8;NS # KATAKANA LETTER SMALL HE -31F9;NS # KATAKANA LETTER SMALL HO -31FA;NS # KATAKANA LETTER SMALL MU -31FB;NS # KATAKANA LETTER SMALL RA -31FC;NS # KATAKANA LETTER SMALL RI -31FD;NS # KATAKANA LETTER SMALL RU -31FE;NS # KATAKANA LETTER SMALL RE -31FF;NS # KATAKANA LETTER SMALL RO +31F0;CJ # KATAKANA LETTER SMALL KU +31F1;CJ # KATAKANA LETTER SMALL SI +31F2;CJ # KATAKANA LETTER SMALL SU +31F3;CJ # KATAKANA LETTER SMALL TO +31F4;CJ # KATAKANA LETTER SMALL NU +31F5;CJ # KATAKANA LETTER SMALL HA +31F6;CJ # KATAKANA LETTER SMALL HI +31F7;CJ # KATAKANA LETTER SMALL HU +31F8;CJ # KATAKANA LETTER SMALL HE +31F9;CJ # KATAKANA LETTER SMALL HO +31FA;CJ # KATAKANA LETTER SMALL MU +31FB;CJ # KATAKANA LETTER SMALL RA +31FC;CJ # KATAKANA LETTER SMALL RI +31FD;CJ # KATAKANA LETTER SMALL RU +31FE;CJ # KATAKANA LETTER SMALL RE +31FF;CJ # KATAKANA LETTER SMALL RO 3200;ID # PARENTHESIZED HANGUL KIYEOK 3201;ID # PARENTHESIZED HANGUL NIEUN 3202;ID # PARENTHESIZED HANGUL TIKEUT @@ -11681,8 +11769,8 @@ 4DFD;AL # HEXAGRAM FOR SMALL PREPONDERANCE 4DFE;AL # HEXAGRAM FOR AFTER COMPLETION 4DFF;AL # HEXAGRAM FOR BEFORE COMPLETION -4E00..9FCB;ID # <CJK Ideograph, First>..<CJK Ideograph, Last> -9FCC..9FFF;ID # <reserved-9FCC>..<reserved-9FFF> +4E00..9FCC;ID # <CJK Ideograph, First>..<CJK Ideograph, Last> +9FCD..9FFF;ID # <reserved-9FCD>..<reserved-9FFF> A000;ID # YI SYLLABLE IT A001;ID # YI SYLLABLE IX A002;ID # YI SYLLABLE I @@ -13303,6 +13391,14 @@ A670;CM # COMBINING CYRILLIC TEN MILLIONS SIGN A671;CM # COMBINING CYRILLIC HUNDRED MILLIONS SIGN A672;CM # COMBINING CYRILLIC THOUSAND MILLIONS SIGN A673;AL # SLAVONIC ASTERISK +A674;CM # COMBINING CYRILLIC LETTER UKRAINIAN IE +A675;CM # COMBINING CYRILLIC LETTER I +A676;CM # COMBINING CYRILLIC LETTER YI +A677;CM # COMBINING CYRILLIC LETTER U +A678;CM # COMBINING CYRILLIC LETTER HARD SIGN +A679;CM # COMBINING CYRILLIC LETTER YERU +A67A;CM # COMBINING CYRILLIC LETTER SOFT SIGN +A67B;CM # COMBINING CYRILLIC LETTER OMEGA A67C;CM # COMBINING CYRILLIC KAVYKA A67D;CM # COMBINING CYRILLIC PAYEROK A67E;AL # CYRILLIC KAVYKA @@ -13331,6 +13427,7 @@ A694;AL # CYRILLIC CAPITAL LETTER HWE A695;AL # CYRILLIC SMALL LETTER HWE A696;AL # CYRILLIC CAPITAL LETTER SHWE A697;AL # CYRILLIC SMALL LETTER SHWE +A69F;CM # COMBINING CYRILLIC LETTER IOTIFIED E A6A0;AL # BAMUM LETTER A A6A1;AL # BAMUM LETTER KA A6A2;AL # BAMUM LETTER U @@ -13564,6 +13661,8 @@ A78D;AL # LATIN CAPITAL LETTER TURNED H A78E;AL # LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A790;AL # LATIN CAPITAL LETTER N WITH DESCENDER A791;AL # LATIN SMALL LETTER N WITH DESCENDER +A792;AL # LATIN CAPITAL LETTER C WITH BAR +A793;AL # LATIN SMALL LETTER C WITH BAR A7A0;AL # LATIN CAPITAL LETTER G WITH OBLIQUE STROKE A7A1;AL # LATIN SMALL LETTER G WITH OBLIQUE STROKE A7A2;AL # LATIN CAPITAL LETTER K WITH OBLIQUE STROKE @@ -13574,6 +13673,9 @@ A7A6;AL # LATIN CAPITAL LETTER R WITH OBLIQUE STROKE A7A7;AL # LATIN SMALL LETTER R WITH OBLIQUE STROKE A7A8;AL # LATIN CAPITAL LETTER S WITH OBLIQUE STROKE A7A9;AL # LATIN SMALL LETTER S WITH OBLIQUE STROKE +A7AA;AL # LATIN CAPITAL LETTER H WITH HOOK +A7F8;AL # MODIFIER LETTER CAPITAL H WITH STROKE +A7F9;AL # MODIFIER LETTER SMALL LIGATURE OE A7FA;AL # LATIN LETTER SMALL CAPITAL TURNED M A7FB;AL # LATIN EPIGRAPHIC LETTER REVERSED F A7FC;AL # LATIN EPIGRAPHIC LETTER REVERSED P @@ -14187,6 +14289,29 @@ AADC;SA # TAI VIET SYMBOL NUENG AADD;SA # TAI VIET SYMBOL SAM AADE;SA # TAI VIET SYMBOL HO HOI AADF;SA # TAI VIET SYMBOL KOI KOI +AAE0;AL # MEETEI MAYEK LETTER E +AAE1;AL # MEETEI MAYEK LETTER O +AAE2;AL # MEETEI MAYEK LETTER CHA +AAE3;AL # MEETEI MAYEK LETTER NYA +AAE4;AL # MEETEI MAYEK LETTER TTA +AAE5;AL # MEETEI MAYEK LETTER TTHA +AAE6;AL # MEETEI MAYEK LETTER DDA +AAE7;AL # MEETEI MAYEK LETTER DDHA +AAE8;AL # MEETEI MAYEK LETTER NNA +AAE9;AL # MEETEI MAYEK LETTER SHA +AAEA;AL # MEETEI MAYEK LETTER SSA +AAEB;CM # MEETEI MAYEK VOWEL SIGN II +AAEC;CM # MEETEI MAYEK VOWEL SIGN UU +AAED;CM # MEETEI MAYEK VOWEL SIGN AAI +AAEE;CM # MEETEI MAYEK VOWEL SIGN AU +AAEF;CM # MEETEI MAYEK VOWEL SIGN AAU +AAF0;BA # MEETEI MAYEK CHEIKHAN +AAF1;BA # MEETEI MAYEK AHANG KHUDAM +AAF2;AL # MEETEI MAYEK ANJI +AAF3;AL # MEETEI MAYEK SYLLABLE REPETITION MARK +AAF4;AL # MEETEI MAYEK WORD REPETITION MARK +AAF5;CM # MEETEI MAYEK VOWEL SIGN VISARGA +AAF6;CM # MEETEI MAYEK VIRAMA AB01;AL # ETHIOPIC SYLLABLE TTHU AB02;AL # ETHIOPIC SYLLABLE TTHI AB03;AL # ETHIOPIC SYLLABLE TTHAA @@ -15451,7 +15576,8 @@ FA2A;ID # CJK COMPATIBILITY IDEOGRAPH-FA2A FA2B;ID # CJK COMPATIBILITY IDEOGRAPH-FA2B FA2C;ID # CJK COMPATIBILITY IDEOGRAPH-FA2C FA2D;ID # CJK COMPATIBILITY IDEOGRAPH-FA2D -FA2E..FA2F;ID # <reserved-FA2E>..<reserved-FA2F> +FA2E;ID # CJK COMPATIBILITY IDEOGRAPH-FA2E +FA2F;ID # CJK COMPATIBILITY IDEOGRAPH-FA2F FA30;ID # CJK COMPATIBILITY IDEOGRAPH-FA30 FA31;ID # CJK COMPATIBILITY IDEOGRAPH-FA31 FA32;ID # CJK COMPATIBILITY IDEOGRAPH-FA32 @@ -15634,52 +15760,52 @@ FB14;AL # ARMENIAN SMALL LIGATURE MEN ECH FB15;AL # ARMENIAN SMALL LIGATURE MEN INI FB16;AL # ARMENIAN SMALL LIGATURE VEW NOW FB17;AL # ARMENIAN SMALL LIGATURE MEN XEH -FB1D;AL # HEBREW LETTER YOD WITH HIRIQ +FB1D;HL # HEBREW LETTER YOD WITH HIRIQ FB1E;CM # HEBREW POINT JUDEO-SPANISH VARIKA -FB1F;AL # HEBREW LIGATURE YIDDISH YOD YOD PATAH -FB20;AL # HEBREW LETTER ALTERNATIVE AYIN -FB21;AL # HEBREW LETTER WIDE ALEF -FB22;AL # HEBREW LETTER WIDE DALET -FB23;AL # HEBREW LETTER WIDE HE -FB24;AL # HEBREW LETTER WIDE KAF -FB25;AL # HEBREW LETTER WIDE LAMED -FB26;AL # HEBREW LETTER WIDE FINAL MEM -FB27;AL # HEBREW LETTER WIDE RESH -FB28;AL # HEBREW LETTER WIDE TAV +FB1F;HL # HEBREW LIGATURE YIDDISH YOD YOD PATAH +FB20;HL # HEBREW LETTER ALTERNATIVE AYIN +FB21;HL # HEBREW LETTER WIDE ALEF +FB22;HL # HEBREW LETTER WIDE DALET +FB23;HL # HEBREW LETTER WIDE HE +FB24;HL # HEBREW LETTER WIDE KAF +FB25;HL # HEBREW LETTER WIDE LAMED +FB26;HL # HEBREW LETTER WIDE FINAL MEM +FB27;HL # HEBREW LETTER WIDE RESH +FB28;HL # HEBREW LETTER WIDE TAV FB29;AL # HEBREW LETTER ALTERNATIVE PLUS SIGN -FB2A;AL # HEBREW LETTER SHIN WITH SHIN DOT -FB2B;AL # HEBREW LETTER SHIN WITH SIN DOT -FB2C;AL # HEBREW LETTER SHIN WITH DAGESH AND SHIN DOT -FB2D;AL # HEBREW LETTER SHIN WITH DAGESH AND SIN DOT -FB2E;AL # HEBREW LETTER ALEF WITH PATAH -FB2F;AL # HEBREW LETTER ALEF WITH QAMATS -FB30;AL # HEBREW LETTER ALEF WITH MAPIQ -FB31;AL # HEBREW LETTER BET WITH DAGESH -FB32;AL # HEBREW LETTER GIMEL WITH DAGESH -FB33;AL # HEBREW LETTER DALET WITH DAGESH -FB34;AL # HEBREW LETTER HE WITH MAPIQ -FB35;AL # HEBREW LETTER VAV WITH DAGESH -FB36;AL # HEBREW LETTER ZAYIN WITH DAGESH -FB38;AL # HEBREW LETTER TET WITH DAGESH -FB39;AL # HEBREW LETTER YOD WITH DAGESH -FB3A;AL # HEBREW LETTER FINAL KAF WITH DAGESH -FB3B;AL # HEBREW LETTER KAF WITH DAGESH -FB3C;AL # HEBREW LETTER LAMED WITH DAGESH -FB3E;AL # HEBREW LETTER MEM WITH DAGESH -FB40;AL # HEBREW LETTER NUN WITH DAGESH -FB41;AL # HEBREW LETTER SAMEKH WITH DAGESH -FB43;AL # HEBREW LETTER FINAL PE WITH DAGESH -FB44;AL # HEBREW LETTER PE WITH DAGESH -FB46;AL # HEBREW LETTER TSADI WITH DAGESH -FB47;AL # HEBREW LETTER QOF WITH DAGESH -FB48;AL # HEBREW LETTER RESH WITH DAGESH -FB49;AL # HEBREW LETTER SHIN WITH DAGESH -FB4A;AL # HEBREW LETTER TAV WITH DAGESH -FB4B;AL # HEBREW LETTER VAV WITH HOLAM -FB4C;AL # HEBREW LETTER BET WITH RAFE -FB4D;AL # HEBREW LETTER KAF WITH RAFE -FB4E;AL # HEBREW LETTER PE WITH RAFE -FB4F;AL # HEBREW LIGATURE ALEF LAMED +FB2A;HL # HEBREW LETTER SHIN WITH SHIN DOT +FB2B;HL # HEBREW LETTER SHIN WITH SIN DOT +FB2C;HL # HEBREW LETTER SHIN WITH DAGESH AND SHIN DOT +FB2D;HL # HEBREW LETTER SHIN WITH DAGESH AND SIN DOT +FB2E;HL # HEBREW LETTER ALEF WITH PATAH +FB2F;HL # HEBREW LETTER ALEF WITH QAMATS +FB30;HL # HEBREW LETTER ALEF WITH MAPIQ +FB31;HL # HEBREW LETTER BET WITH DAGESH +FB32;HL # HEBREW LETTER GIMEL WITH DAGESH +FB33;HL # HEBREW LETTER DALET WITH DAGESH +FB34;HL # HEBREW LETTER HE WITH MAPIQ +FB35;HL # HEBREW LETTER VAV WITH DAGESH +FB36;HL # HEBREW LETTER ZAYIN WITH DAGESH +FB38;HL # HEBREW LETTER TET WITH DAGESH +FB39;HL # HEBREW LETTER YOD WITH DAGESH +FB3A;HL # HEBREW LETTER FINAL KAF WITH DAGESH +FB3B;HL # HEBREW LETTER KAF WITH DAGESH +FB3C;HL # HEBREW LETTER LAMED WITH DAGESH +FB3E;HL # HEBREW LETTER MEM WITH DAGESH +FB40;HL # HEBREW LETTER NUN WITH DAGESH +FB41;HL # HEBREW LETTER SAMEKH WITH DAGESH +FB43;HL # HEBREW LETTER FINAL PE WITH DAGESH +FB44;HL # HEBREW LETTER PE WITH DAGESH +FB46;HL # HEBREW LETTER TSADI WITH DAGESH +FB47;HL # HEBREW LETTER QOF WITH DAGESH +FB48;HL # HEBREW LETTER RESH WITH DAGESH +FB49;HL # HEBREW LETTER SHIN WITH DAGESH +FB4A;HL # HEBREW LETTER TAV WITH DAGESH +FB4B;HL # HEBREW LETTER VAV WITH HOLAM +FB4C;HL # HEBREW LETTER BET WITH RAFE +FB4D;HL # HEBREW LETTER KAF WITH RAFE +FB4E;HL # HEBREW LETTER PE WITH RAFE +FB4F;HL # HEBREW LIGATURE ALEF LAMED FB50;AL # ARABIC LETTER ALEF WASLA ISOLATED FORM FB51;AL # ARABIC LETTER ALEF WASLA FINAL FORM FB52;AL # ARABIC LETTER BEEH ISOLATED FORM @@ -16625,16 +16751,16 @@ FF63;CL # HALFWIDTH RIGHT CORNER BRACKET FF64;CL # HALFWIDTH IDEOGRAPHIC COMMA FF65;NS # HALFWIDTH KATAKANA MIDDLE DOT FF66;AL # HALFWIDTH KATAKANA LETTER WO -FF67;NS # HALFWIDTH KATAKANA LETTER SMALL A -FF68;NS # HALFWIDTH KATAKANA LETTER SMALL I -FF69;NS # HALFWIDTH KATAKANA LETTER SMALL U -FF6A;NS # HALFWIDTH KATAKANA LETTER SMALL E -FF6B;NS # HALFWIDTH KATAKANA LETTER SMALL O -FF6C;NS # HALFWIDTH KATAKANA LETTER SMALL YA -FF6D;NS # HALFWIDTH KATAKANA LETTER SMALL YU -FF6E;NS # HALFWIDTH KATAKANA LETTER SMALL YO -FF6F;NS # HALFWIDTH KATAKANA LETTER SMALL TU -FF70;NS # HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF67;CJ # HALFWIDTH KATAKANA LETTER SMALL A +FF68;CJ # HALFWIDTH KATAKANA LETTER SMALL I +FF69;CJ # HALFWIDTH KATAKANA LETTER SMALL U +FF6A;CJ # HALFWIDTH KATAKANA LETTER SMALL E +FF6B;CJ # HALFWIDTH KATAKANA LETTER SMALL O +FF6C;CJ # HALFWIDTH KATAKANA LETTER SMALL YA +FF6D;CJ # HALFWIDTH KATAKANA LETTER SMALL YU +FF6E;CJ # HALFWIDTH KATAKANA LETTER SMALL YO +FF6F;CJ # HALFWIDTH KATAKANA LETTER SMALL TU +FF70;CJ # HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK FF71;AL # HALFWIDTH KATAKANA LETTER A FF72;AL # HALFWIDTH KATAKANA LETTER I FF73;AL # HALFWIDTH KATAKANA LETTER U @@ -17685,6 +17811,64 @@ FFFD;AI # REPLACEMENT CHARACTER 10938;AL # LYDIAN LETTER NN 10939;AL # LYDIAN LETTER C 1093F;AL # LYDIAN TRIANGULAR MARK +10980;AL # MEROITIC HIEROGLYPHIC LETTER A +10981;AL # MEROITIC HIEROGLYPHIC LETTER E +10982;AL # MEROITIC HIEROGLYPHIC LETTER I +10983;AL # MEROITIC HIEROGLYPHIC LETTER O +10984;AL # MEROITIC HIEROGLYPHIC LETTER YA +10985;AL # MEROITIC HIEROGLYPHIC LETTER WA +10986;AL # MEROITIC HIEROGLYPHIC LETTER BA +10987;AL # MEROITIC HIEROGLYPHIC LETTER BA-2 +10988;AL # MEROITIC HIEROGLYPHIC LETTER PA +10989;AL # MEROITIC HIEROGLYPHIC LETTER MA +1098A;AL # MEROITIC HIEROGLYPHIC LETTER NA +1098B;AL # MEROITIC HIEROGLYPHIC LETTER NA-2 +1098C;AL # MEROITIC HIEROGLYPHIC LETTER NE +1098D;AL # MEROITIC HIEROGLYPHIC LETTER NE-2 +1098E;AL # MEROITIC HIEROGLYPHIC LETTER RA +1098F;AL # MEROITIC HIEROGLYPHIC LETTER RA-2 +10990;AL # MEROITIC HIEROGLYPHIC LETTER LA +10991;AL # MEROITIC HIEROGLYPHIC LETTER KHA +10992;AL # MEROITIC HIEROGLYPHIC LETTER HHA +10993;AL # MEROITIC HIEROGLYPHIC LETTER SA +10994;AL # MEROITIC HIEROGLYPHIC LETTER SA-2 +10995;AL # MEROITIC HIEROGLYPHIC LETTER SE +10996;AL # MEROITIC HIEROGLYPHIC LETTER KA +10997;AL # MEROITIC HIEROGLYPHIC LETTER QA +10998;AL # MEROITIC HIEROGLYPHIC LETTER TA +10999;AL # MEROITIC HIEROGLYPHIC LETTER TA-2 +1099A;AL # MEROITIC HIEROGLYPHIC LETTER TE +1099B;AL # MEROITIC HIEROGLYPHIC LETTER TE-2 +1099C;AL # MEROITIC HIEROGLYPHIC LETTER TO +1099D;AL # MEROITIC HIEROGLYPHIC LETTER DA +1099E;AL # MEROITIC HIEROGLYPHIC SYMBOL VIDJ +1099F;AL # MEROITIC HIEROGLYPHIC SYMBOL VIDJ-2 +109A0;AL # MEROITIC CURSIVE LETTER A +109A1;AL # MEROITIC CURSIVE LETTER E +109A2;AL # MEROITIC CURSIVE LETTER I +109A3;AL # MEROITIC CURSIVE LETTER O +109A4;AL # MEROITIC CURSIVE LETTER YA +109A5;AL # MEROITIC CURSIVE LETTER WA +109A6;AL # MEROITIC CURSIVE LETTER BA +109A7;AL # MEROITIC CURSIVE LETTER PA +109A8;AL # MEROITIC CURSIVE LETTER MA +109A9;AL # MEROITIC CURSIVE LETTER NA +109AA;AL # MEROITIC CURSIVE LETTER NE +109AB;AL # MEROITIC CURSIVE LETTER RA +109AC;AL # MEROITIC CURSIVE LETTER LA +109AD;AL # MEROITIC CURSIVE LETTER KHA +109AE;AL # MEROITIC CURSIVE LETTER HHA +109AF;AL # MEROITIC CURSIVE LETTER SA +109B0;AL # MEROITIC CURSIVE LETTER ARCHAIC SA +109B1;AL # MEROITIC CURSIVE LETTER SE +109B2;AL # MEROITIC CURSIVE LETTER KA +109B3;AL # MEROITIC CURSIVE LETTER QA +109B4;AL # MEROITIC CURSIVE LETTER TA +109B5;AL # MEROITIC CURSIVE LETTER TE +109B6;AL # MEROITIC CURSIVE LETTER TO +109B7;AL # MEROITIC CURSIVE LETTER DA +109BE;AL # MEROITIC CURSIVE LOGOGRAM RMT +109BF;AL # MEROITIC CURSIVE LOGOGRAM IMN 10A00;AL # KHAROSHTHI LETTER A 10A01;CM # KHAROSHTHI VOWEL SIGN I 10A02;CM # KHAROSHTHI VOWEL SIGN U @@ -18178,6 +18362,257 @@ FFFD;AI # REPLACEMENT CHARACTER 110BF;BA # KAITHI DOUBLE SECTION MARK 110C0;BA # KAITHI DANDA 110C1;BA # KAITHI DOUBLE DANDA +110D0;AL # SORA SOMPENG LETTER SAH +110D1;AL # SORA SOMPENG LETTER TAH +110D2;AL # SORA SOMPENG LETTER BAH +110D3;AL # SORA SOMPENG LETTER CAH +110D4;AL # SORA SOMPENG LETTER DAH +110D5;AL # SORA SOMPENG LETTER GAH +110D6;AL # SORA SOMPENG LETTER MAH +110D7;AL # SORA SOMPENG LETTER NGAH +110D8;AL # SORA SOMPENG LETTER LAH +110D9;AL # SORA SOMPENG LETTER NAH +110DA;AL # SORA SOMPENG LETTER VAH +110DB;AL # SORA SOMPENG LETTER PAH +110DC;AL # SORA SOMPENG LETTER YAH +110DD;AL # SORA SOMPENG LETTER RAH +110DE;AL # SORA SOMPENG LETTER HAH +110DF;AL # SORA SOMPENG LETTER KAH +110E0;AL # SORA SOMPENG LETTER JAH +110E1;AL # SORA SOMPENG LETTER NYAH +110E2;AL # SORA SOMPENG LETTER AH +110E3;AL # SORA SOMPENG LETTER EEH +110E4;AL # SORA SOMPENG LETTER IH +110E5;AL # SORA SOMPENG LETTER UH +110E6;AL # SORA SOMPENG LETTER OH +110E7;AL # SORA SOMPENG LETTER EH +110E8;AL # SORA SOMPENG LETTER MAE +110F0;NU # SORA SOMPENG DIGIT ZERO +110F1;NU # SORA SOMPENG DIGIT ONE +110F2;NU # SORA SOMPENG DIGIT TWO +110F3;NU # SORA SOMPENG DIGIT THREE +110F4;NU # SORA SOMPENG DIGIT FOUR +110F5;NU # SORA SOMPENG DIGIT FIVE +110F6;NU # SORA SOMPENG DIGIT SIX +110F7;NU # SORA SOMPENG DIGIT SEVEN +110F8;NU # SORA SOMPENG DIGIT EIGHT +110F9;NU # SORA SOMPENG DIGIT NINE +11100;CM # CHAKMA SIGN CANDRABINDU +11101;CM # CHAKMA SIGN ANUSVARA +11102;CM # CHAKMA SIGN VISARGA +11103;AL # CHAKMA LETTER AA +11104;AL # CHAKMA LETTER I +11105;AL # CHAKMA LETTER U +11106;AL # CHAKMA LETTER E +11107;AL # CHAKMA LETTER KAA +11108;AL # CHAKMA LETTER KHAA +11109;AL # CHAKMA LETTER GAA +1110A;AL # CHAKMA LETTER GHAA +1110B;AL # CHAKMA LETTER NGAA +1110C;AL # CHAKMA LETTER CAA +1110D;AL # CHAKMA LETTER CHAA +1110E;AL # CHAKMA LETTER JAA +1110F;AL # CHAKMA LETTER JHAA +11110;AL # CHAKMA LETTER NYAA +11111;AL # CHAKMA LETTER TTAA +11112;AL # CHAKMA LETTER TTHAA +11113;AL # CHAKMA LETTER DDAA +11114;AL # CHAKMA LETTER DDHAA +11115;AL # CHAKMA LETTER NNAA +11116;AL # CHAKMA LETTER TAA +11117;AL # CHAKMA LETTER THAA +11118;AL # CHAKMA LETTER DAA +11119;AL # CHAKMA LETTER DHAA +1111A;AL # CHAKMA LETTER NAA +1111B;AL # CHAKMA LETTER PAA +1111C;AL # CHAKMA LETTER PHAA +1111D;AL # CHAKMA LETTER BAA +1111E;AL # CHAKMA LETTER BHAA +1111F;AL # CHAKMA LETTER MAA +11120;AL # CHAKMA LETTER YYAA +11121;AL # CHAKMA LETTER YAA +11122;AL # CHAKMA LETTER RAA +11123;AL # CHAKMA LETTER LAA +11124;AL # CHAKMA LETTER WAA +11125;AL # CHAKMA LETTER SAA +11126;AL # CHAKMA LETTER HAA +11127;CM # CHAKMA VOWEL SIGN A +11128;CM # CHAKMA VOWEL SIGN I +11129;CM # CHAKMA VOWEL SIGN II +1112A;CM # CHAKMA VOWEL SIGN U +1112B;CM # CHAKMA VOWEL SIGN UU +1112C;CM # CHAKMA VOWEL SIGN E +1112D;CM # CHAKMA VOWEL SIGN AI +1112E;CM # CHAKMA VOWEL SIGN O +1112F;CM # CHAKMA VOWEL SIGN AU +11130;CM # CHAKMA VOWEL SIGN OI +11131;CM # CHAKMA O MARK +11132;CM # CHAKMA AU MARK +11133;CM # CHAKMA VIRAMA +11134;CM # CHAKMA MAAYYAA +11136;NU # CHAKMA DIGIT ZERO +11137;NU # CHAKMA DIGIT ONE +11138;NU # CHAKMA DIGIT TWO +11139;NU # CHAKMA DIGIT THREE +1113A;NU # CHAKMA DIGIT FOUR +1113B;NU # CHAKMA DIGIT FIVE +1113C;NU # CHAKMA DIGIT SIX +1113D;NU # CHAKMA DIGIT SEVEN +1113E;NU # CHAKMA DIGIT EIGHT +1113F;NU # CHAKMA DIGIT NINE +11140;BA # CHAKMA SECTION MARK +11141;BA # CHAKMA DANDA +11142;BA # CHAKMA DOUBLE DANDA +11143;BA # CHAKMA QUESTION MARK +11180;CM # SHARADA SIGN CANDRABINDU +11181;CM # SHARADA SIGN ANUSVARA +11182;CM # SHARADA SIGN VISARGA +11183;AL # SHARADA LETTER A +11184;AL # SHARADA LETTER AA +11185;AL # SHARADA LETTER I +11186;AL # SHARADA LETTER II +11187;AL # SHARADA LETTER U +11188;AL # SHARADA LETTER UU +11189;AL # SHARADA LETTER VOCALIC R +1118A;AL # SHARADA LETTER VOCALIC RR +1118B;AL # SHARADA LETTER VOCALIC L +1118C;AL # SHARADA LETTER VOCALIC LL +1118D;AL # SHARADA LETTER E +1118E;AL # SHARADA LETTER AI +1118F;AL # SHARADA LETTER O +11190;AL # SHARADA LETTER AU +11191;AL # SHARADA LETTER KA +11192;AL # SHARADA LETTER KHA +11193;AL # SHARADA LETTER GA +11194;AL # SHARADA LETTER GHA +11195;AL # SHARADA LETTER NGA +11196;AL # SHARADA LETTER CA +11197;AL # SHARADA LETTER CHA +11198;AL # SHARADA LETTER JA +11199;AL # SHARADA LETTER JHA +1119A;AL # SHARADA LETTER NYA +1119B;AL # SHARADA LETTER TTA +1119C;AL # SHARADA LETTER TTHA +1119D;AL # SHARADA LETTER DDA +1119E;AL # SHARADA LETTER DDHA +1119F;AL # SHARADA LETTER NNA +111A0;AL # SHARADA LETTER TA +111A1;AL # SHARADA LETTER THA +111A2;AL # SHARADA LETTER DA +111A3;AL # SHARADA LETTER DHA +111A4;AL # SHARADA LETTER NA +111A5;AL # SHARADA LETTER PA +111A6;AL # SHARADA LETTER PHA +111A7;AL # SHARADA LETTER BA +111A8;AL # SHARADA LETTER BHA +111A9;AL # SHARADA LETTER MA +111AA;AL # SHARADA LETTER YA +111AB;AL # SHARADA LETTER RA +111AC;AL # SHARADA LETTER LA +111AD;AL # SHARADA LETTER LLA +111AE;AL # SHARADA LETTER VA +111AF;AL # SHARADA LETTER SHA +111B0;AL # SHARADA LETTER SSA +111B1;AL # SHARADA LETTER SA +111B2;AL # SHARADA LETTER HA +111B3;CM # SHARADA VOWEL SIGN AA +111B4;CM # SHARADA VOWEL SIGN I +111B5;CM # SHARADA VOWEL SIGN II +111B6;CM # SHARADA VOWEL SIGN U +111B7;CM # SHARADA VOWEL SIGN UU +111B8;CM # SHARADA VOWEL SIGN VOCALIC R +111B9;CM # SHARADA VOWEL SIGN VOCALIC RR +111BA;CM # SHARADA VOWEL SIGN VOCALIC L +111BB;CM # SHARADA VOWEL SIGN VOCALIC LL +111BC;CM # SHARADA VOWEL SIGN E +111BD;CM # SHARADA VOWEL SIGN AI +111BE;CM # SHARADA VOWEL SIGN O +111BF;CM # SHARADA VOWEL SIGN AU +111C0;CM # SHARADA SIGN VIRAMA +111C1;AL # SHARADA SIGN AVAGRAHA +111C2;AL # SHARADA SIGN JIHVAMULIYA +111C3;AL # SHARADA SIGN UPADHMANIYA +111C4;AL # SHARADA OM +111C5;BA # SHARADA DANDA +111C6;BA # SHARADA DOUBLE DANDA +111C7;AL # SHARADA ABBREVIATION SIGN +111C8;BA # SHARADA SEPARATOR +111D0;NU # SHARADA DIGIT ZERO +111D1;NU # SHARADA DIGIT ONE +111D2;NU # SHARADA DIGIT TWO +111D3;NU # SHARADA DIGIT THREE +111D4;NU # SHARADA DIGIT FOUR +111D5;NU # SHARADA DIGIT FIVE +111D6;NU # SHARADA DIGIT SIX +111D7;NU # SHARADA DIGIT SEVEN +111D8;NU # SHARADA DIGIT EIGHT +111D9;NU # SHARADA DIGIT NINE +11680;AL # TAKRI LETTER A +11681;AL # TAKRI LETTER AA +11682;AL # TAKRI LETTER I +11683;AL # TAKRI LETTER II +11684;AL # TAKRI LETTER U +11685;AL # TAKRI LETTER UU +11686;AL # TAKRI LETTER E +11687;AL # TAKRI LETTER AI +11688;AL # TAKRI LETTER O +11689;AL # TAKRI LETTER AU +1168A;AL # TAKRI LETTER KA +1168B;AL # TAKRI LETTER KHA +1168C;AL # TAKRI LETTER GA +1168D;AL # TAKRI LETTER GHA +1168E;AL # TAKRI LETTER NGA +1168F;AL # TAKRI LETTER CA +11690;AL # TAKRI LETTER CHA +11691;AL # TAKRI LETTER JA +11692;AL # TAKRI LETTER JHA +11693;AL # TAKRI LETTER NYA +11694;AL # TAKRI LETTER TTA +11695;AL # TAKRI LETTER TTHA +11696;AL # TAKRI LETTER DDA +11697;AL # TAKRI LETTER DDHA +11698;AL # TAKRI LETTER NNA +11699;AL # TAKRI LETTER TA +1169A;AL # TAKRI LETTER THA +1169B;AL # TAKRI LETTER DA +1169C;AL # TAKRI LETTER DHA +1169D;AL # TAKRI LETTER NA +1169E;AL # TAKRI LETTER PA +1169F;AL # TAKRI LETTER PHA +116A0;AL # TAKRI LETTER BA +116A1;AL # TAKRI LETTER BHA +116A2;AL # TAKRI LETTER MA +116A3;AL # TAKRI LETTER YA +116A4;AL # TAKRI LETTER RA +116A5;AL # TAKRI LETTER LA +116A6;AL # TAKRI LETTER VA +116A7;AL # TAKRI LETTER SHA +116A8;AL # TAKRI LETTER SA +116A9;AL # TAKRI LETTER HA +116AA;AL # TAKRI LETTER RRA +116AB;CM # TAKRI SIGN ANUSVARA +116AC;CM # TAKRI SIGN VISARGA +116AD;CM # TAKRI VOWEL SIGN AA +116AE;CM # TAKRI VOWEL SIGN I +116AF;CM # TAKRI VOWEL SIGN II +116B0;CM # TAKRI VOWEL SIGN U +116B1;CM # TAKRI VOWEL SIGN UU +116B2;CM # TAKRI VOWEL SIGN E +116B3;CM # TAKRI VOWEL SIGN AI +116B4;CM # TAKRI VOWEL SIGN O +116B5;CM # TAKRI VOWEL SIGN AU +116B6;CM # TAKRI SIGN VIRAMA +116B7;CM # TAKRI SIGN NUKTA +116C0;NU # TAKRI DIGIT ZERO +116C1;NU # TAKRI DIGIT ONE +116C2;NU # TAKRI DIGIT TWO +116C3;NU # TAKRI DIGIT THREE +116C4;NU # TAKRI DIGIT FOUR +116C5;NU # TAKRI DIGIT FIVE +116C6;NU # TAKRI DIGIT SIX +116C7;NU # TAKRI DIGIT SEVEN +116C8;NU # TAKRI DIGIT EIGHT +116C9;NU # TAKRI DIGIT NINE 12000;AL # CUNEIFORM SIGN A 12001;AL # CUNEIFORM SIGN A TIMES A 12002;AL # CUNEIFORM SIGN A TIMES BAD @@ -20800,6 +21235,139 @@ FFFD;AI # REPLACEMENT CHARACTER 16A36;AL # BAMUM LETTER PHASE-F KPA 16A37;AL # BAMUM LETTER PHASE-F SAMBA 16A38;AL # BAMUM LETTER PHASE-F VUEQ +16F00;AL # MIAO LETTER PA +16F01;AL # MIAO LETTER BA +16F02;AL # MIAO LETTER YI PA +16F03;AL # MIAO LETTER PLA +16F04;AL # MIAO LETTER MA +16F05;AL # MIAO LETTER MHA +16F06;AL # MIAO LETTER ARCHAIC MA +16F07;AL # MIAO LETTER FA +16F08;AL # MIAO LETTER VA +16F09;AL # MIAO LETTER VFA +16F0A;AL # MIAO LETTER TA +16F0B;AL # MIAO LETTER DA +16F0C;AL # MIAO LETTER YI TTA +16F0D;AL # MIAO LETTER YI TA +16F0E;AL # MIAO LETTER TTA +16F0F;AL # MIAO LETTER DDA +16F10;AL # MIAO LETTER NA +16F11;AL # MIAO LETTER NHA +16F12;AL # MIAO LETTER YI NNA +16F13;AL # MIAO LETTER ARCHAIC NA +16F14;AL # MIAO LETTER NNA +16F15;AL # MIAO LETTER NNHA +16F16;AL # MIAO LETTER LA +16F17;AL # MIAO LETTER LYA +16F18;AL # MIAO LETTER LHA +16F19;AL # MIAO LETTER LHYA +16F1A;AL # MIAO LETTER TLHA +16F1B;AL # MIAO LETTER DLHA +16F1C;AL # MIAO LETTER TLHYA +16F1D;AL # MIAO LETTER DLHYA +16F1E;AL # MIAO LETTER KA +16F1F;AL # MIAO LETTER GA +16F20;AL # MIAO LETTER YI KA +16F21;AL # MIAO LETTER QA +16F22;AL # MIAO LETTER QGA +16F23;AL # MIAO LETTER NGA +16F24;AL # MIAO LETTER NGHA +16F25;AL # MIAO LETTER ARCHAIC NGA +16F26;AL # MIAO LETTER HA +16F27;AL # MIAO LETTER XA +16F28;AL # MIAO LETTER GHA +16F29;AL # MIAO LETTER GHHA +16F2A;AL # MIAO LETTER TSSA +16F2B;AL # MIAO LETTER DZZA +16F2C;AL # MIAO LETTER NYA +16F2D;AL # MIAO LETTER NYHA +16F2E;AL # MIAO LETTER TSHA +16F2F;AL # MIAO LETTER DZHA +16F30;AL # MIAO LETTER YI TSHA +16F31;AL # MIAO LETTER YI DZHA +16F32;AL # MIAO LETTER REFORMED TSHA +16F33;AL # MIAO LETTER SHA +16F34;AL # MIAO LETTER SSA +16F35;AL # MIAO LETTER ZHA +16F36;AL # MIAO LETTER ZSHA +16F37;AL # MIAO LETTER TSA +16F38;AL # MIAO LETTER DZA +16F39;AL # MIAO LETTER YI TSA +16F3A;AL # MIAO LETTER SA +16F3B;AL # MIAO LETTER ZA +16F3C;AL # MIAO LETTER ZSA +16F3D;AL # MIAO LETTER ZZA +16F3E;AL # MIAO LETTER ZZSA +16F3F;AL # MIAO LETTER ARCHAIC ZZA +16F40;AL # MIAO LETTER ZZYA +16F41;AL # MIAO LETTER ZZSYA +16F42;AL # MIAO LETTER WA +16F43;AL # MIAO LETTER AH +16F44;AL # MIAO LETTER HHA +16F50;AL # MIAO LETTER NASALIZATION +16F51;CM # MIAO SIGN ASPIRATION +16F52;CM # MIAO SIGN REFORMED VOICING +16F53;CM # MIAO SIGN REFORMED ASPIRATION +16F54;CM # MIAO VOWEL SIGN A +16F55;CM # MIAO VOWEL SIGN AA +16F56;CM # MIAO VOWEL SIGN AHH +16F57;CM # MIAO VOWEL SIGN AN +16F58;CM # MIAO VOWEL SIGN ANG +16F59;CM # MIAO VOWEL SIGN O +16F5A;CM # MIAO VOWEL SIGN OO +16F5B;CM # MIAO VOWEL SIGN WO +16F5C;CM # MIAO VOWEL SIGN W +16F5D;CM # MIAO VOWEL SIGN E +16F5E;CM # MIAO VOWEL SIGN EN +16F5F;CM # MIAO VOWEL SIGN ENG +16F60;CM # MIAO VOWEL SIGN OEY +16F61;CM # MIAO VOWEL SIGN I +16F62;CM # MIAO VOWEL SIGN IA +16F63;CM # MIAO VOWEL SIGN IAN +16F64;CM # MIAO VOWEL SIGN IANG +16F65;CM # MIAO VOWEL SIGN IO +16F66;CM # MIAO VOWEL SIGN IE +16F67;CM # MIAO VOWEL SIGN II +16F68;CM # MIAO VOWEL SIGN IU +16F69;CM # MIAO VOWEL SIGN ING +16F6A;CM # MIAO VOWEL SIGN U +16F6B;CM # MIAO VOWEL SIGN UA +16F6C;CM # MIAO VOWEL SIGN UAN +16F6D;CM # MIAO VOWEL SIGN UANG +16F6E;CM # MIAO VOWEL SIGN UU +16F6F;CM # MIAO VOWEL SIGN UEI +16F70;CM # MIAO VOWEL SIGN UNG +16F71;CM # MIAO VOWEL SIGN Y +16F72;CM # MIAO VOWEL SIGN YI +16F73;CM # MIAO VOWEL SIGN AE +16F74;CM # MIAO VOWEL SIGN AEE +16F75;CM # MIAO VOWEL SIGN ERR +16F76;CM # MIAO VOWEL SIGN ROUNDED ERR +16F77;CM # MIAO VOWEL SIGN ER +16F78;CM # MIAO VOWEL SIGN ROUNDED ER +16F79;CM # MIAO VOWEL SIGN AI +16F7A;CM # MIAO VOWEL SIGN EI +16F7B;CM # MIAO VOWEL SIGN AU +16F7C;CM # MIAO VOWEL SIGN OU +16F7D;CM # MIAO VOWEL SIGN N +16F7E;CM # MIAO VOWEL SIGN NG +16F8F;CM # MIAO TONE RIGHT +16F90;CM # MIAO TONE TOP RIGHT +16F91;CM # MIAO TONE ABOVE +16F92;CM # MIAO TONE BELOW +16F93;AL # MIAO LETTER TONE-2 +16F94;AL # MIAO LETTER TONE-3 +16F95;AL # MIAO LETTER TONE-4 +16F96;AL # MIAO LETTER TONE-5 +16F97;AL # MIAO LETTER TONE-6 +16F98;AL # MIAO LETTER TONE-7 +16F99;AL # MIAO LETTER TONE-8 +16F9A;AL # MIAO LETTER REFORMED TONE-1 +16F9B;AL # MIAO LETTER REFORMED TONE-2 +16F9C;AL # MIAO LETTER REFORMED TONE-4 +16F9D;AL # MIAO LETTER REFORMED TONE-5 +16F9E;AL # MIAO LETTER REFORMED TONE-6 +16F9F;AL # MIAO LETTER REFORMED TONE-8 1B000;ID # KATAKANA LETTER ARCHAIC E 1B001;ID # HIRAGANA LETTER ARCHAIC YE 1D000;AL # BYZANTINE MUSICAL SYMBOL PSILI @@ -22439,6 +23007,149 @@ FFFD;AI # REPLACEMENT CHARACTER 1D7FD;NU # MATHEMATICAL MONOSPACE DIGIT SEVEN 1D7FE;NU # MATHEMATICAL MONOSPACE DIGIT EIGHT 1D7FF;NU # MATHEMATICAL MONOSPACE DIGIT NINE +1EE00;AL # ARABIC MATHEMATICAL ALEF +1EE01;AL # ARABIC MATHEMATICAL BEH +1EE02;AL # ARABIC MATHEMATICAL JEEM +1EE03;AL # ARABIC MATHEMATICAL DAL +1EE05;AL # ARABIC MATHEMATICAL WAW +1EE06;AL # ARABIC MATHEMATICAL ZAIN +1EE07;AL # ARABIC MATHEMATICAL HAH +1EE08;AL # ARABIC MATHEMATICAL TAH +1EE09;AL # ARABIC MATHEMATICAL YEH +1EE0A;AL # ARABIC MATHEMATICAL KAF +1EE0B;AL # ARABIC MATHEMATICAL LAM +1EE0C;AL # ARABIC MATHEMATICAL MEEM +1EE0D;AL # ARABIC MATHEMATICAL NOON +1EE0E;AL # ARABIC MATHEMATICAL SEEN +1EE0F;AL # ARABIC MATHEMATICAL AIN +1EE10;AL # ARABIC MATHEMATICAL FEH +1EE11;AL # ARABIC MATHEMATICAL SAD +1EE12;AL # ARABIC MATHEMATICAL QAF +1EE13;AL # ARABIC MATHEMATICAL REH +1EE14;AL # ARABIC MATHEMATICAL SHEEN +1EE15;AL # ARABIC MATHEMATICAL TEH +1EE16;AL # ARABIC MATHEMATICAL THEH +1EE17;AL # ARABIC MATHEMATICAL KHAH +1EE18;AL # ARABIC MATHEMATICAL THAL +1EE19;AL # ARABIC MATHEMATICAL DAD +1EE1A;AL # ARABIC MATHEMATICAL ZAH +1EE1B;AL # ARABIC MATHEMATICAL GHAIN +1EE1C;AL # ARABIC MATHEMATICAL DOTLESS BEH +1EE1D;AL # ARABIC MATHEMATICAL DOTLESS NOON +1EE1E;AL # ARABIC MATHEMATICAL DOTLESS FEH +1EE1F;AL # ARABIC MATHEMATICAL DOTLESS QAF +1EE21;AL # ARABIC MATHEMATICAL INITIAL BEH +1EE22;AL # ARABIC MATHEMATICAL INITIAL JEEM +1EE24;AL # ARABIC MATHEMATICAL INITIAL HEH +1EE27;AL # ARABIC MATHEMATICAL INITIAL HAH +1EE29;AL # ARABIC MATHEMATICAL INITIAL YEH +1EE2A;AL # ARABIC MATHEMATICAL INITIAL KAF +1EE2B;AL # ARABIC MATHEMATICAL INITIAL LAM +1EE2C;AL # ARABIC MATHEMATICAL INITIAL MEEM +1EE2D;AL # ARABIC MATHEMATICAL INITIAL NOON +1EE2E;AL # ARABIC MATHEMATICAL INITIAL SEEN +1EE2F;AL # ARABIC MATHEMATICAL INITIAL AIN +1EE30;AL # ARABIC MATHEMATICAL INITIAL FEH +1EE31;AL # ARABIC MATHEMATICAL INITIAL SAD +1EE32;AL # ARABIC MATHEMATICAL INITIAL QAF +1EE34;AL # ARABIC MATHEMATICAL INITIAL SHEEN +1EE35;AL # ARABIC MATHEMATICAL INITIAL TEH +1EE36;AL # ARABIC MATHEMATICAL INITIAL THEH +1EE37;AL # ARABIC MATHEMATICAL INITIAL KHAH +1EE39;AL # ARABIC MATHEMATICAL INITIAL DAD +1EE3B;AL # ARABIC MATHEMATICAL INITIAL GHAIN +1EE42;AL # ARABIC MATHEMATICAL TAILED JEEM +1EE47;AL # ARABIC MATHEMATICAL TAILED HAH +1EE49;AL # ARABIC MATHEMATICAL TAILED YEH +1EE4B;AL # ARABIC MATHEMATICAL TAILED LAM +1EE4D;AL # ARABIC MATHEMATICAL TAILED NOON +1EE4E;AL # ARABIC MATHEMATICAL TAILED SEEN +1EE4F;AL # ARABIC MATHEMATICAL TAILED AIN +1EE51;AL # ARABIC MATHEMATICAL TAILED SAD +1EE52;AL # ARABIC MATHEMATICAL TAILED QAF +1EE54;AL # ARABIC MATHEMATICAL TAILED SHEEN +1EE57;AL # ARABIC MATHEMATICAL TAILED KHAH +1EE59;AL # ARABIC MATHEMATICAL TAILED DAD +1EE5B;AL # ARABIC MATHEMATICAL TAILED GHAIN +1EE5D;AL # ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F;AL # ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61;AL # ARABIC MATHEMATICAL STRETCHED BEH +1EE62;AL # ARABIC MATHEMATICAL STRETCHED JEEM +1EE64;AL # ARABIC MATHEMATICAL STRETCHED HEH +1EE67;AL # ARABIC MATHEMATICAL STRETCHED HAH +1EE68;AL # ARABIC MATHEMATICAL STRETCHED TAH +1EE69;AL # ARABIC MATHEMATICAL STRETCHED YEH +1EE6A;AL # ARABIC MATHEMATICAL STRETCHED KAF +1EE6C;AL # ARABIC MATHEMATICAL STRETCHED MEEM +1EE6D;AL # ARABIC MATHEMATICAL STRETCHED NOON +1EE6E;AL # ARABIC MATHEMATICAL STRETCHED SEEN +1EE6F;AL # ARABIC MATHEMATICAL STRETCHED AIN +1EE70;AL # ARABIC MATHEMATICAL STRETCHED FEH +1EE71;AL # ARABIC MATHEMATICAL STRETCHED SAD +1EE72;AL # ARABIC MATHEMATICAL STRETCHED QAF +1EE74;AL # ARABIC MATHEMATICAL STRETCHED SHEEN +1EE75;AL # ARABIC MATHEMATICAL STRETCHED TEH +1EE76;AL # ARABIC MATHEMATICAL STRETCHED THEH +1EE77;AL # ARABIC MATHEMATICAL STRETCHED KHAH +1EE79;AL # ARABIC MATHEMATICAL STRETCHED DAD +1EE7A;AL # ARABIC MATHEMATICAL STRETCHED ZAH +1EE7B;AL # ARABIC MATHEMATICAL STRETCHED GHAIN +1EE7C;AL # ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E;AL # ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80;AL # ARABIC MATHEMATICAL LOOPED ALEF +1EE81;AL # ARABIC MATHEMATICAL LOOPED BEH +1EE82;AL # ARABIC MATHEMATICAL LOOPED JEEM +1EE83;AL # ARABIC MATHEMATICAL LOOPED DAL +1EE84;AL # ARABIC MATHEMATICAL LOOPED HEH +1EE85;AL # ARABIC MATHEMATICAL LOOPED WAW +1EE86;AL # ARABIC MATHEMATICAL LOOPED ZAIN +1EE87;AL # ARABIC MATHEMATICAL LOOPED HAH +1EE88;AL # ARABIC MATHEMATICAL LOOPED TAH +1EE89;AL # ARABIC MATHEMATICAL LOOPED YEH +1EE8B;AL # ARABIC MATHEMATICAL LOOPED LAM +1EE8C;AL # ARABIC MATHEMATICAL LOOPED MEEM +1EE8D;AL # ARABIC MATHEMATICAL LOOPED NOON +1EE8E;AL # ARABIC MATHEMATICAL LOOPED SEEN +1EE8F;AL # ARABIC MATHEMATICAL LOOPED AIN +1EE90;AL # ARABIC MATHEMATICAL LOOPED FEH +1EE91;AL # ARABIC MATHEMATICAL LOOPED SAD +1EE92;AL # ARABIC MATHEMATICAL LOOPED QAF +1EE93;AL # ARABIC MATHEMATICAL LOOPED REH +1EE94;AL # ARABIC MATHEMATICAL LOOPED SHEEN +1EE95;AL # ARABIC MATHEMATICAL LOOPED TEH +1EE96;AL # ARABIC MATHEMATICAL LOOPED THEH +1EE97;AL # ARABIC MATHEMATICAL LOOPED KHAH +1EE98;AL # ARABIC MATHEMATICAL LOOPED THAL +1EE99;AL # ARABIC MATHEMATICAL LOOPED DAD +1EE9A;AL # ARABIC MATHEMATICAL LOOPED ZAH +1EE9B;AL # ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK BEH +1EEA2;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK JEEM +1EEA3;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK WAW +1EEA6;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK ZAIN +1EEA7;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK HAH +1EEA8;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK TAH +1EEA9;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK LAM +1EEAC;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK MEEM +1EEAD;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK NOON +1EEAE;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK SEEN +1EEAF;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK AIN +1EEB0;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK FEH +1EEB1;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK SAD +1EEB2;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK QAF +1EEB3;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK REH +1EEB4;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK SHEEN +1EEB5;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK TEH +1EEB6;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK THEH +1EEB7;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK KHAH +1EEB8;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK THAL +1EEB9;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK DAD +1EEBA;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK ZAH +1EEBB;AL # ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1EEF0;AL # ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL +1EEF1;AL # ARABIC MATHEMATICAL OPERATOR HAH WITH DAL 1F000;AL # MAHJONG TILE EAST WIND 1F001;AL # MAHJONG TILE SOUTH WIND 1F002;AL # MAHJONG TILE WEST WIND @@ -22742,6 +23453,8 @@ FFFD;AI # REPLACEMENT CHARACTER 1F167;AI # NEGATIVE CIRCLED LATIN CAPITAL LETTER X 1F168;AI # NEGATIVE CIRCLED LATIN CAPITAL LETTER Y 1F169;AI # NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F16A;AL # RAISED MC SIGN +1F16B;AL # RAISED MD SIGN 1F170;AI # NEGATIVE SQUARED LATIN CAPITAL LETTER A 1F171;AI # NEGATIVE SQUARED LATIN CAPITAL LETTER B 1F172;AI # NEGATIVE SQUARED LATIN CAPITAL LETTER C @@ -23368,6 +24081,10 @@ FFFD;AI # REPLACEMENT CHARACTER 1F53B;AL # DOWN-POINTING RED TRIANGLE 1F53C;AL # UP-POINTING SMALL RED TRIANGLE 1F53D;AL # DOWN-POINTING SMALL RED TRIANGLE +1F540;AL # CIRCLED CROSS POMMEE +1F541;AL # CROSS POMMEE WITH HALF-CIRCLE BELOW +1F542;AL # CROSS POMMEE +1F543;AL # NOTCHED LEFT SEMICIRCLE WITH THREE DOTS 1F550;AL # CLOCK FACE ONE OCLOCK 1F551;AL # CLOCK FACE TWO OCLOCK 1F552;AL # CLOCK FACE THREE OCLOCK @@ -23397,6 +24114,7 @@ FFFD;AI # REPLACEMENT CHARACTER 1F5FD;AL # STATUE OF LIBERTY 1F5FE;AL # SILHOUETTE OF JAPAN 1F5FF;AL # MOYAI +1F600;AL # GRINNING FACE 1F601;AL # GRINNING FACE WITH SMILING EYES 1F602;AL # FACE WITH TEARS OF JOY 1F603;AL # SMILING FACE WITH OPEN MOUTH @@ -23413,30 +24131,42 @@ FFFD;AI # REPLACEMENT CHARACTER 1F60E;AL # SMILING FACE WITH SUNGLASSES 1F60F;AL # SMIRKING FACE 1F610;AL # NEUTRAL FACE +1F611;AL # EXPRESSIONLESS FACE 1F612;AL # UNAMUSED FACE 1F613;AL # FACE WITH COLD SWEAT 1F614;AL # PENSIVE FACE +1F615;AL # CONFUSED FACE 1F616;AL # CONFOUNDED FACE +1F617;AL # KISSING FACE 1F618;AL # FACE THROWING A KISS +1F619;AL # KISSING FACE WITH SMILING EYES 1F61A;AL # KISSING FACE WITH CLOSED EYES +1F61B;AL # FACE WITH STUCK-OUT TONGUE 1F61C;AL # FACE WITH STUCK-OUT TONGUE AND WINKING EYE 1F61D;AL # FACE WITH STUCK-OUT TONGUE AND TIGHTLY-CLOSED EYES 1F61E;AL # DISAPPOINTED FACE +1F61F;AL # WORRIED FACE 1F620;AL # ANGRY FACE 1F621;AL # POUTING FACE 1F622;AL # CRYING FACE 1F623;AL # PERSEVERING FACE 1F624;AL # FACE WITH LOOK OF TRIUMPH 1F625;AL # DISAPPOINTED BUT RELIEVED FACE +1F626;AL # FROWNING FACE WITH OPEN MOUTH +1F627;AL # ANGUISHED FACE 1F628;AL # FEARFUL FACE 1F629;AL # WEARY FACE 1F62A;AL # SLEEPY FACE 1F62B;AL # TIRED FACE +1F62C;AL # GRIMACING FACE 1F62D;AL # LOUDLY CRYING FACE +1F62E;AL # FACE WITH OPEN MOUTH +1F62F;AL # HUSHED FACE 1F630;AL # FACE WITH OPEN MOUTH AND COLD SWEAT 1F631;AL # FACE SCREAMING IN FEAR 1F632;AL # ASTONISHED FACE 1F633;AL # FLUSHED FACE +1F634;AL # SLEEPING FACE 1F635;AL # DIZZY FACE 1F636;AL # FACE WITHOUT MOUTH 1F637;AL # FACE WITH MEDICAL MASK diff --git a/lib/unicore/NameAliases.txt b/lib/unicore/NameAliases.txt index caa462f4be..3992620096 100644 --- a/lib/unicore/NameAliases.txt +++ b/lib/unicore/NameAliases.txt @@ -1,40 +1,508 @@ -# NameAliases-6.0.0.txt -# Date: 2010-05-10, 11:58:00 PDT [KW] +# NameAliases-6.1.0.txt +# Date: 2012-01-03, 21:52:00 GMT [KW] # # This file is a normative contributory data file in the # Unicode Character Database. # -# Copyright (c) 2005-2010 Unicode, Inc. +# Copyright (c) 2005-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # # This file defines the formal name aliases for Unicode characters. # # For informative aliases see NamesList.txt # +# The formal name aliases are divided into five types. +# +# 1. Corrections for serious problems in the character names +# 2. ISO 6429 names for C0 and C1 control functions, and other +# commonly occurring names for control codes +# 3. A few widely used alternate names for format characters +# 4. Several documented labels for C1 control code points which +# were never actually approved in any standard +# 5. Commonly occurring abbreviations (or acronyms) for control codes, +# format characters, spaces, and variation selectors +# +# The formal name aliases are part of the Unicode character namespace, which +# includes the character names and the names of named character sequences. +# The inclusion of ISO 6429 names and other commonly occurring names and +# abbreviations for control codes and format characters as formal name aliases +# is to help avoid name collisions between Unicode character names and the +# labels which commonly appear in text and/or in implementations such as regex, for +# control codes (which have no Unicode character name) or for format characters. +# # For documentation, see NamesList.html and http://www.unicode.org/reports/tr44/ # # FORMAT # -# Each line has two fields -# First field: Code point +# Each line has three fields, as described here: +# +# First field: Code point # Second field: Alias +# Third field: Type +# +# The Type labels used are: correction, control, alternate, figment, abbreviation +# +# Those Type labels can be mapped to other strings for display, if desired. # # In case multiple aliases are assigned, additional aliases -# would be provided on separate lines +# are provided on separate lines. Parsers of this data file should +# take note that the same code point can (and does) occur more than once. # #----------------------------------------------------------------- -01A2;LATIN CAPITAL LETTER GHA -01A3;LATIN SMALL LETTER GHA -0CDE;KANNADA LETTER LLLA -0E9D;LAO LETTER FO FON -0E9F;LAO LETTER FO FAY -0EA3;LAO LETTER RO -0EA5;LAO LETTER LO -0FD0;TIBETAN MARK BKA- SHOG GI MGO RGYAN -A015;YI SYLLABLE ITERATION MARK -FE18;PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET -1D0C5;BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS -# Total code points: 11 +0000;NULL;control +0000;NUL;abbreviation +0001;START OF HEADING;control +0001;SOH;abbreviation +0002;START OF TEXT;control +0002;STX;abbreviation +0003;END OF TEXT;control +0003;ETX;abbreviation +0004;END OF TRANSMISSION;control +0004;EOT;abbreviation +0005;ENQUIRY;control +0005;ENQ;abbreviation +0006;ACKNOWLEDGE;control +0006;ACK;abbreviation + +# Note that no formal name alias for the ISO 6429 "BELL" is +# provided for U+0007, because of the existing name collision +# with U+1F514 BELL. + +0007;ALERT;control +0007;BEL;abbreviation +0008;BACKSPACE;control +0008;BS;abbreviation +0009;CHARACTER TABULATION;control +0009;HORIZONTAL TABULATION;control +0009;HT;abbreviation +0009;TAB;abbreviation +000A;LINE FEED;control +000A;NEW LINE;control +000A;END OF LINE;control +000A;LF;abbreviation +000A;NL;abbreviation +000A;EOL;abbreviation +000B;LINE TABULATION;control +000B;VERTICAL TABULATION;control +000B;VT;abbreviation +000C;FORM FEED;control +000C;FF;abbreviation +000D;CARRIAGE RETURN;control +000D;CR;abbreviation +000E;SHIFT OUT;control +000E;LOCKING-SHIFT ONE;control +000E;SO;abbreviation +000F;SHIFT IN;control +000F;LOCKING-SHIFT ZERO;control +000F;SI;abbreviation +0010;DATA LINK ESCAPE;control +0010;DLE;abbreviation +0011;DEVICE CONTROL ONE;control +0011;DC1;abbreviation +0012;DEVICE CONTROL TWO;control +0012;DC2;abbreviation +0013;DEVICE CONTROL THREE;control +0013;DC3;abbreviation +0014;DEVICE CONTROL FOUR;control +0014;DC4;abbreviation +0015;NEGATIVE ACKNOWLEDGE;control +0015;NAK;abbreviation +0016;SYNCHRONOUS IDLE;control +0016;SYN;abbreviation +0017;END OF TRANSMISSION BLOCK;control +0017;ETB;abbreviation +0018;CANCEL;control +0018;CAN;abbreviation +0019;END OF MEDIUM;control +0019;EOM;abbreviation +001A;SUBSTITUTE;control +001A;SUB;abbreviation +001B;ESCAPE;control +001B;ESC;abbreviation +001C;INFORMATION SEPARATOR FOUR;control +001C;FILE SEPARATOR;control +001C;FS;abbreviation +001D;INFORMATION SEPARATOR THREE;control +001D;GROUP SEPARATOR;control +001D;GS;abbreviation +001E;INFORMATION SEPARATOR TWO;control +001E;RECORD SEPARATOR;control +001E;RS;abbreviation +001F;INFORMATION SEPARATOR ONE;control +001F;UNIT SEPARATOR;control +001F;US;abbreviation +0020;SP;abbreviation +007F;DELETE;control +007F;DEL;abbreviation +0080;PADDING CHARACTER;figment +0080;PAD;abbreviation +0081;HIGH OCTET PRESET;figment +0081;HOP;abbreviation +0082;BREAK PERMITTED HERE;control +0082;BPH;abbreviation +0083;NO BREAK HERE;control +0083;NBH;abbreviation +0084;INDEX;control +0084;IND;abbreviation +0085;NEXT LINE;control +0085;NEL;abbreviation +0086;START OF SELECTED AREA;control +0086;SSA;abbreviation +0087;END OF SELECTED AREA;control +0087;ESA;abbreviation +0088;CHARACTER TABULATION SET;control +0088;HORIZONTAL TABULATION SET;control +0088;HTS;abbreviation +0089;CHARACTER TABULATION WITH JUSTIFICATION;control +0089;HORIZONTAL TABULATION WITH JUSTIFICATION;control +0089;HTJ;abbreviation +008A;LINE TABULATION SET;control +008A;VERTICAL TABULATION SET;control +008A;VTS;abbreviation +008B;PARTIAL LINE FORWARD;control +008B;PARTIAL LINE DOWN;control +008B;PLD;abbreviation +008C;PARTIAL LINE BACKWARD;control +008C;PARTIAL LINE UP;control +008C;PLU;abbreviation +008D;REVERSE LINE FEED;control +008D;REVERSE INDEX;control +008D;RI;abbreviation +008E;SINGLE SHIFT TWO;control +008E;SINGLE-SHIFT-2;control +008E;SS2;abbreviation +008F;SINGLE SHIFT THREE;control +008F;SINGLE-SHIFT-3;control +008F;SS3;abbreviation +0090;DEVICE CONTROL STRING;control +0090;DCS;abbreviation +0091;PRIVATE USE ONE;control +0091;PRIVATE USE-1;control +0091;PU1;abbreviation +0092;PRIVATE USE TWO;control +0092;PRIVATE USE-2;control +0092;PU2;abbreviation +0093;SET TRANSMIT STATE;control +0093;STS;abbreviation +0094;CANCEL CHARACTER;control +0094;CCH;abbreviation +0095;MESSAGE WAITING;control +0095;MW;abbreviation +0096;START OF GUARDED AREA;control +0096;START OF PROTECTED AREA;control +0096;SPA;abbreviation +0097;END OF GUARDED AREA;control +0097;END OF PROTECTED AREA;control +0097;EPA;abbreviation +0098;START OF STRING;control +0098;SOS;abbreviation +0099;SINGLE GRAPHIC CHARACTER INTRODUCER;figment +0099;SGC;abbreviation +009A;SINGLE CHARACTER INTRODUCER;control +009A;SCI;abbreviation +009B;CONTROL SEQUENCE INTRODUCER;control +009B;CSI;abbreviation +009C;STRING TERMINATOR;control +009C;ST;abbreviation +009D;OPERATING SYSTEM COMMAND;control +009D;OSC;abbreviation +009E;PRIVACY MESSAGE;control +009E;PM;abbreviation +009F;APPLICATION PROGRAM COMMAND;control +009F;APC;abbreviation +00A0;NBSP;abbreviation +00AD;SHY;abbreviation +01A2;LATIN CAPITAL LETTER GHA;correction +01A3;LATIN SMALL LETTER GHA;correction +034F;CGJ;abbreviation +0CDE;KANNADA LETTER LLLA;correction +0E9D;LAO LETTER FO FON;correction +0E9F;LAO LETTER FO FAY;correction +0EA3;LAO LETTER RO;correction +0EA5;LAO LETTER LO;correction +0FD0;TIBETAN MARK BKA- SHOG GI MGO RGYAN;correction +180B;FVS1;abbreviation +180C;FVS2;abbreviation +180D;FVS3;abbreviation +180E;MVS;abbreviation +200B;ZWSP;abbreviation +200C;ZWNJ;abbreviation +200D;ZWJ;abbreviation +200E;LRM;abbreviation +200F;RLM;abbreviation +202A;LRE;abbreviation +202B;RLE;abbreviation +202C;PDF;abbreviation +202D;LRO;abbreviation +202E;RLO;abbreviation +202F;NNBSP;abbreviation +205F;MMSP;abbreviation +2060;WJ;abbreviation +2118;WEIERSTRASS ELLIPTIC FUNCTION;correction +2448;MICR ON US SYMBOL;correction +2449;MICR DASH SYMBOL;correction +A015;YI SYLLABLE ITERATION MARK;correction +FE18;PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET;correction +FE00;VS1;abbreviation +FE01;VS2;abbreviation +FE02;VS3;abbreviation +FE03;VS4;abbreviation +FE04;VS5;abbreviation +FE05;VS6;abbreviation +FE06;VS7;abbreviation +FE07;VS8;abbreviation +FE08;VS9;abbreviation +FE09;VS10;abbreviation +FE0A;VS11;abbreviation +FE0B;VS12;abbreviation +FE0C;VS13;abbreviation +FE0D;VS14;abbreviation +FE0E;VS15;abbreviation +FE0F;VS16;abbreviation +FEFF;BYTE ORDER MARK;alternate +FEFF;BOM;abbreviation +FEFF;ZWNBSP;abbreviation +1D0C5;BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS;correction +E0100;VS17;abbreviation +E0101;VS18;abbreviation +E0102;VS19;abbreviation +E0103;VS20;abbreviation +E0104;VS21;abbreviation +E0105;VS22;abbreviation +E0106;VS23;abbreviation +E0107;VS24;abbreviation +E0108;VS25;abbreviation +E0109;VS26;abbreviation +E010A;VS27;abbreviation +E010B;VS28;abbreviation +E010C;VS29;abbreviation +E010D;VS30;abbreviation +E010E;VS31;abbreviation +E010F;VS32;abbreviation +E0110;VS33;abbreviation +E0111;VS34;abbreviation +E0112;VS35;abbreviation +E0113;VS36;abbreviation +E0114;VS37;abbreviation +E0115;VS38;abbreviation +E0116;VS39;abbreviation +E0117;VS40;abbreviation +E0118;VS41;abbreviation +E0119;VS42;abbreviation +E011A;VS43;abbreviation +E011B;VS44;abbreviation +E011C;VS45;abbreviation +E011D;VS46;abbreviation +E011E;VS47;abbreviation +E011F;VS48;abbreviation +E0120;VS49;abbreviation +E0121;VS50;abbreviation +E0122;VS51;abbreviation +E0123;VS52;abbreviation +E0124;VS53;abbreviation +E0125;VS54;abbreviation +E0126;VS55;abbreviation +E0127;VS56;abbreviation +E0128;VS57;abbreviation +E0129;VS58;abbreviation +E012A;VS59;abbreviation +E012B;VS60;abbreviation +E012C;VS61;abbreviation +E012D;VS62;abbreviation +E012E;VS63;abbreviation +E012F;VS64;abbreviation +E0130;VS65;abbreviation +E0131;VS66;abbreviation +E0132;VS67;abbreviation +E0133;VS68;abbreviation +E0134;VS69;abbreviation +E0135;VS70;abbreviation +E0136;VS71;abbreviation +E0137;VS72;abbreviation +E0138;VS73;abbreviation +E0139;VS74;abbreviation +E013A;VS75;abbreviation +E013B;VS76;abbreviation +E013C;VS77;abbreviation +E013D;VS78;abbreviation +E013E;VS79;abbreviation +E013F;VS80;abbreviation +E0140;VS81;abbreviation +E0141;VS82;abbreviation +E0142;VS83;abbreviation +E0143;VS84;abbreviation +E0144;VS85;abbreviation +E0145;VS86;abbreviation +E0146;VS87;abbreviation +E0147;VS88;abbreviation +E0148;VS89;abbreviation +E0149;VS90;abbreviation +E014A;VS91;abbreviation +E014B;VS92;abbreviation +E014C;VS93;abbreviation +E014D;VS94;abbreviation +E014E;VS95;abbreviation +E014F;VS96;abbreviation +E0150;VS97;abbreviation +E0151;VS98;abbreviation +E0152;VS99;abbreviation +E0153;VS100;abbreviation +E0154;VS101;abbreviation +E0155;VS102;abbreviation +E0156;VS103;abbreviation +E0157;VS104;abbreviation +E0158;VS105;abbreviation +E0159;VS106;abbreviation +E015A;VS107;abbreviation +E015B;VS108;abbreviation +E015C;VS109;abbreviation +E015D;VS110;abbreviation +E015E;VS111;abbreviation +E015F;VS112;abbreviation +E0160;VS113;abbreviation +E0161;VS114;abbreviation +E0162;VS115;abbreviation +E0163;VS116;abbreviation +E0164;VS117;abbreviation +E0165;VS118;abbreviation +E0166;VS119;abbreviation +E0167;VS120;abbreviation +E0168;VS121;abbreviation +E0169;VS122;abbreviation +E016A;VS123;abbreviation +E016B;VS124;abbreviation +E016C;VS125;abbreviation +E016D;VS126;abbreviation +E016E;VS127;abbreviation +E016F;VS128;abbreviation +E0170;VS129;abbreviation +E0171;VS130;abbreviation +E0172;VS131;abbreviation +E0173;VS132;abbreviation +E0174;VS133;abbreviation +E0175;VS134;abbreviation +E0176;VS135;abbreviation +E0177;VS136;abbreviation +E0178;VS137;abbreviation +E0179;VS138;abbreviation +E017A;VS139;abbreviation +E017B;VS140;abbreviation +E017C;VS141;abbreviation +E017D;VS142;abbreviation +E017E;VS143;abbreviation +E017F;VS144;abbreviation +E0180;VS145;abbreviation +E0181;VS146;abbreviation +E0182;VS147;abbreviation +E0183;VS148;abbreviation +E0184;VS149;abbreviation +E0185;VS150;abbreviation +E0186;VS151;abbreviation +E0187;VS152;abbreviation +E0188;VS153;abbreviation +E0189;VS154;abbreviation +E018A;VS155;abbreviation +E018B;VS156;abbreviation +E018C;VS157;abbreviation +E018D;VS158;abbreviation +E018E;VS159;abbreviation +E018F;VS160;abbreviation +E0190;VS161;abbreviation +E0191;VS162;abbreviation +E0192;VS163;abbreviation +E0193;VS164;abbreviation +E0194;VS165;abbreviation +E0195;VS166;abbreviation +E0196;VS167;abbreviation +E0197;VS168;abbreviation +E0198;VS169;abbreviation +E0199;VS170;abbreviation +E019A;VS171;abbreviation +E019B;VS172;abbreviation +E019C;VS173;abbreviation +E019D;VS174;abbreviation +E019E;VS175;abbreviation +E019F;VS176;abbreviation +E01A0;VS177;abbreviation +E01A1;VS178;abbreviation +E01A2;VS179;abbreviation +E01A3;VS180;abbreviation +E01A4;VS181;abbreviation +E01A5;VS182;abbreviation +E01A6;VS183;abbreviation +E01A7;VS184;abbreviation +E01A8;VS185;abbreviation +E01A9;VS186;abbreviation +E01AA;VS187;abbreviation +E01AB;VS188;abbreviation +E01AC;VS189;abbreviation +E01AD;VS190;abbreviation +E01AE;VS191;abbreviation +E01AF;VS192;abbreviation +E01B0;VS193;abbreviation +E01B1;VS194;abbreviation +E01B2;VS195;abbreviation +E01B3;VS196;abbreviation +E01B4;VS197;abbreviation +E01B5;VS198;abbreviation +E01B6;VS199;abbreviation +E01B7;VS200;abbreviation +E01B8;VS201;abbreviation +E01B9;VS202;abbreviation +E01BA;VS203;abbreviation +E01BB;VS204;abbreviation +E01BC;VS205;abbreviation +E01BD;VS206;abbreviation +E01BE;VS207;abbreviation +E01BF;VS208;abbreviation +E01C0;VS209;abbreviation +E01C1;VS210;abbreviation +E01C2;VS211;abbreviation +E01C3;VS212;abbreviation +E01C4;VS213;abbreviation +E01C5;VS214;abbreviation +E01C6;VS215;abbreviation +E01C7;VS216;abbreviation +E01C8;VS217;abbreviation +E01C9;VS218;abbreviation +E01CA;VS219;abbreviation +E01CB;VS220;abbreviation +E01CC;VS221;abbreviation +E01CD;VS222;abbreviation +E01CE;VS223;abbreviation +E01CF;VS224;abbreviation +E01D0;VS225;abbreviation +E01D1;VS226;abbreviation +E01D2;VS227;abbreviation +E01D3;VS228;abbreviation +E01D4;VS229;abbreviation +E01D5;VS230;abbreviation +E01D6;VS231;abbreviation +E01D7;VS232;abbreviation +E01D8;VS233;abbreviation +E01D9;VS234;abbreviation +E01DA;VS235;abbreviation +E01DB;VS236;abbreviation +E01DC;VS237;abbreviation +E01DD;VS238;abbreviation +E01DE;VS239;abbreviation +E01DF;VS240;abbreviation +E01E0;VS241;abbreviation +E01E1;VS242;abbreviation +E01E2;VS243;abbreviation +E01E3;VS244;abbreviation +E01E4;VS245;abbreviation +E01E5;VS246;abbreviation +E01E6;VS247;abbreviation +E01E7;VS248;abbreviation +E01E8;VS249;abbreviation +E01E9;VS250;abbreviation +E01EA;VS251;abbreviation +E01EB;VS252;abbreviation +E01EC;VS253;abbreviation +E01ED;VS254;abbreviation +E01EE;VS255;abbreviation +E01EF;VS256;abbreviation # EOF diff --git a/lib/unicore/NamedSequences.txt b/lib/unicore/NamedSequences.txt index 0c270410df..e14c39505b 100644 --- a/lib/unicore/NamedSequences.txt +++ b/lib/unicore/NamedSequences.txt @@ -1,8 +1,8 @@ -# NamedSequences-6.0.0.txt -# Date: 2010-05-18, 10:48:00 PDT [KW] +# NamedSequences-6.1.0.txt +# Date: 2011-07-26, 19:47:00 GMT [KW] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # @@ -431,6 +431,13 @@ TAMIL SYLLABLE KSSAU; 0B95 0BCD 0BB7 0BCC TAMIL SYLLABLE SHRII; 0BB6 0BCD 0BB0 0BC0 +# Sinhala medial consonants and "reph" form +# Provisional 2010-05-13, Approved 2011-08-05 + +SINHALA CONSONANT SIGN YANSAYA;0DCA 200D 0DBA +SINHALA CONSONANT SIGN RAKAARAANSAYA;0DCA 200D 0DBB +SINHALA CONSONANT SIGN REPAYA;0DBB 0DCA 200D + GEORGIAN LETTER U-BRJGU;10E3 0302 KHMER CONSONANT SIGN COENG KA;17D2 1780 KHMER CONSONANT SIGN COENG KHA;17D2 1781 diff --git a/lib/unicore/NamedSqProv.txt b/lib/unicore/NamedSqProv.txt index 9658de8451..c7561948f8 100644 --- a/lib/unicore/NamedSqProv.txt +++ b/lib/unicore/NamedSqProv.txt @@ -1,8 +1,8 @@ -# NamedSequencesProv-6.0.0.txt -# Date: 2010-05-18, 10:49:00 PDT [KW] +# NamedSequencesProv-6.1.0.txt +# Date: 2011-07-26, 19:46:00 GMT [KW] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # @@ -34,12 +34,7 @@ # Provisional entries for NamedSequences.txt. -# Sinhala medial consonants and "reph" form -# Added to provisional named sequences, 2010-05-13 - -SINHALA CONSONANT SIGN YANSAYA;0DCA 200D 0DBA -SINHALA CONSONANT SIGN RAKAARAANSAYA;0DCA 200D 0DBB -SINHALA CONSONANT SIGN REPAYA;0DBB 0DCA 200D +# There are currently no provisional named sequences. # ================================================ diff --git a/lib/unicore/NamesList.txt b/lib/unicore/NamesList.txt index 4f698c7339..19ecbdd55a 100644 --- a/lib/unicore/NamesList.txt +++ b/lib/unicore/NamesList.txt @@ -1,13 +1,13 @@ -@@@ The Unicode Standard 6.0 -@@@+ U60M100817.lst - Final Unicode 6.0 names list. +@@@ The Unicode Standard 6.1 +@@@+ U61M111117.lst + Final Unicode 6.1 names list. This file is semi-automatically derived from UnicodeData.txt and a set of manually created annotations using a script to select or suppress information from the data file. The rules used for this process are aimed at readability for the human reader, at the expense of some details; therefore, this file should not be parsed for machine-readable information. -@+ Copyright (c) 1991-2010 Unicode, Inc. +@+ Copyright (c) 1991-2012 Unicode, Inc. For terms of use, see http://www.unicode.org/terms_of_use.html @@ 0000 C0 Controls and Basic Latin (Basic Latin) 007F @@+ @@ -120,7 +120,7 @@ 0024 DOLLAR SIGN = milreis, escudo * glyph may have one or two vertical bars - * other currency symbol characters: 20A0-20B8 + * other currency symbol characters: 20A0-20B9 x (currency sign - 00A4) x (heavy dollar sign - 1F4B2) 0025 PERCENT SIGN @@ -466,7 +466,7 @@ x (lira sign - 20A4) x (roman semuncia sign - 10192) 00A4 CURRENCY SIGN - * other currency symbol characters: 20A0-20B5 + * other currency symbol characters: 20A0-20B9 x (dollar sign - 0024) 00A5 YEN SIGN = yuan sign @@ -551,6 +551,7 @@ x (bullet operator - 2219) x (dot operator - 22C5) x (word separator middle dot - 2E31) + x (raised dot - 2E33) x (katakana middle dot - 30FB) 00B8 CEDILLA * this is a spacing character @@ -657,7 +658,7 @@ = Eszett * German * uppercase is "SS" - * in origin a ligature of 017F and 0073 + * typographically the glyph for this character can be based on a ligature of 017F with either 0073 or with an old-style glyph for 007A (the latter similar in appearance to 0292). Both forms exist interchangeably today. x (greek small letter beta - 03B2) x (latin capital letter sharp s - 1E9E) 00E0 LATIN SMALL LETTER A WITH GRAVE @@ -848,6 +849,7 @@ * there are three major glyph variants : 0067 0327 0124 LATIN CAPITAL LETTER H WITH CIRCUMFLEX + * lowercase in Nawdm is 0266 : 0048 0302 0125 LATIN SMALL LETTER H WITH CIRCUMFLEX * Esperanto @@ -1747,10 +1749,12 @@ * uppercase is A78D 0266 LATIN SMALL LETTER H WITH HOOK * breathy-voiced glottal fricative + * uppercase is A7AA + * uppercase in Nawdm is 0124 x (modifier letter small h with hook - 02B1) 0267 LATIN SMALL LETTER HENG WITH HOOK * voiceless coarticulated velar and palatoalveolar fricative - * "tj" or "kj" or "sj" in some Swedish dialects + * "sj" in some Swedish dialects 0268 LATIN SMALL LETTER I WITH STROKE = barred i, i bar * high central unrounded vowel @@ -3405,6 +3409,8 @@ x (colon - 003A) 058A ARMENIAN HYPHEN = yentamna +@ Currency symbol +058F ARMENIAN DRAM SIGN @@ 0590 Hebrew 05FF @ Cantillation marks 0591 HEBREW ACCENT ETNAHTA @@ -3550,6 +3556,8 @@ 0601 ARABIC SIGN SANAH 0602 ARABIC FOOTNOTE MARKER 0603 ARABIC SIGN SAFHA +0604 ARABIC SIGN SAMVAT + * used for writing Samvat era dates in Urdu @ Radix symbols 0606 ARABIC-INDIC CUBE ROOT x (cube root - 221B) @@ -3568,6 +3576,7 @@ 060C ARABIC COMMA * also used with Thaana and Syriac in modern text x (comma - 002C) + x (turned comma - 2E32) 060D ARABIC DATE SEPARATOR @ Poetic marks 060E ARABIC POETIC VERSE SIGN @@ -3602,6 +3611,7 @@ 061B ARABIC SEMICOLON * also used with Thaana and Syriac in modern text x (semicolon - 003B) + x (turned semicolon - 2E35) 061E ARABIC TRIPLE DOT PUNCTUATION MARK 061F ARABIC QUESTION MARK * also used with Thaana and Syriac in modern text @@ -3610,6 +3620,7 @@ @ Addition for Kashmiri 0620 ARABIC LETTER KASHMIRI YEH @ Based on ISO 8859-6 +@+ Arabic letter names follow romanization conventions derived from ISO 8859-6. These differ from the Literary Arabic pronunciation of the letter names. For example, U+0628 ARABIC LETTER BEH has a Literary Arabic pronunciation of ba'. 0621 ARABIC LETTER HAMZA x (modifier letter right half ring - 02BE) 0622 ARABIC LETTER ALEF WITH MADDA ABOVE @@ -3666,7 +3677,11 @@ 0648 ARABIC LETTER WAW 0649 ARABIC LETTER ALEF MAKSURA * represents YEH-shaped letter with no dots in any positional form + * not intended for use in combination with 0654 + x (arabic letter yeh with hamza above - 0626) 064A ARABIC LETTER YEH + * loses its dots when used in combination with 0654 + * retains its dots when used in combination with other combining marks @ Points from ISO 8859-6 064B ARABIC FATHATAN 064C ARABIC DAMMATAN @@ -3683,6 +3698,8 @@ @ Combining maddah and hamza 0653 ARABIC MADDAH ABOVE 0654 ARABIC HAMZA ABOVE + * not restricted to hamza semantics + * may also occur as a diacritic forming new letters 0655 ARABIC HAMZA BELOW @ Other combining marks 0656 ARABIC SUBSCRIPT ALEF @@ -3739,8 +3756,12 @@ * Koranic Arabic 0672 ARABIC LETTER ALEF WITH WAVY HAMZA ABOVE * Baluchi, Kashmiri +@ Deprecated letter 0673 ARABIC LETTER ALEF WITH WAVY HAMZA BELOW * Kashmiri + * this character is deprecated and its use is strongly discouraged + * use the sequence 0627 065F instead +@ Extended Arabic letters 0674 ARABIC LETTER HIGH HAMZA * Kazakh * forms digraphs @@ -4461,6 +4482,60 @@ 085B MANDAIC GEMINATION MARK @ Punctuation 085E MANDAIC PUNCTUATION +@@ 08A0 Arabic Extended-A 08FF +@ Extended Arabic letters for African languages +08A0 ARABIC LETTER BEH WITH SMALL V BELOW +08A2 ARABIC LETTER JEEM WITH TWO DOTS ABOVE +08A3 ARABIC LETTER TAH WITH TWO DOTS ABOVE +08A4 ARABIC LETTER FEH WITH DOT BELOW AND THREE DOTS ABOVE +08A5 ARABIC LETTER QAF WITH DOT BELOW +08A6 ARABIC LETTER LAM WITH DOUBLE BAR +08A7 ARABIC LETTER MEEM WITH THREE DOTS ABOVE +08A8 ARABIC LETTER YEH WITH TWO DOTS BELOW AND HAMZA ABOVE +08A9 ARABIC LETTER YEH WITH TWO DOTS BELOW AND DOT ABOVE +@ Dependent consonants for Rohingya +08AA ARABIC LETTER REH WITH LOOP + = bottya-reh +08AB ARABIC LETTER WAW WITH DOT WITHIN + = nota-wa +08AC ARABIC LETTER ROHINGYA YEH + = bottya-yeh +@ Extended vowel signs for Rohingya +08E4 ARABIC CURLY FATHA +08E5 ARABIC CURLY DAMMA +08E6 ARABIC CURLY KASRA +08E7 ARABIC CURLY FATHATAN +08E8 ARABIC CURLY DAMMATAN +08E9 ARABIC CURLY KASRATAN +@ Tone marks for Rohingya +08EA ARABIC TONE ONE DOT ABOVE +08EB ARABIC TONE TWO DOTS ABOVE +08EC ARABIC TONE LOOP ABOVE +08ED ARABIC TONE ONE DOT BELOW +08EE ARABIC TONE TWO DOTS BELOW +08EF ARABIC TONE LOOP BELOW +@ Koranic annotation signs +08F0 ARABIC OPEN FATHATAN + = successive fathatan +08F1 ARABIC OPEN DAMMATAN + = successive dammatan +08F2 ARABIC OPEN KASRATAN + = successive kasratan +08F3 ARABIC SMALL HIGH WAW +@ Extended vowel signs for African languages +08F4 ARABIC FATHA WITH RING +08F5 ARABIC FATHA WITH DOT ABOVE +08F6 ARABIC KASRA WITH DOT BELOW + * also used in Philippine languages +08F7 ARABIC LEFT ARROWHEAD ABOVE +08F8 ARABIC RIGHT ARROWHEAD ABOVE +08F9 ARABIC LEFT ARROWHEAD BELOW +08FA ARABIC RIGHT ARROWHEAD BELOW +08FB ARABIC DOUBLE RIGHT ARROWHEAD ABOVE +08FC ARABIC DOUBLE RIGHT ARROWHEAD ABOVE WITH DOT +08FD ARABIC RIGHT ARROWHEAD ABOVE WITH DOT +@ Extended vowel sign for Philippine languages +08FE ARABIC DAMMA WITH DOT @@ 0900 Devanagari 097F @@+ @ Various signs @@ -4591,6 +4666,7 @@ x (combining grave accent - 0300) 0954 DEVANAGARI ACUTE ACCENT x (combining acute accent - 0301) +@ Dependent vowel sign 0955 DEVANAGARI VOWEL SIGN CANDRA LONG E * used in transliteration of Avestan @ Dependent vowel signs for Kashmiri @@ -4775,14 +4851,14 @@ 09ED BENGALI DIGIT SEVEN 09EE BENGALI DIGIT EIGHT 09EF BENGALI DIGIT NINE -@ Bengali-specific additions +@ Additions for Assamese 09F0 BENGALI LETTER RA WITH MIDDLE DIAGONAL - * Assamese 09F1 BENGALI LETTER RA WITH LOWER DIAGONAL = bengali letter va with lower diagonal (1.0) - * Assamese +@ Currency signs 09F2 BENGALI RUPEE MARK 09F3 BENGALI RUPEE SIGN +@ Historic symbols for fractional values 09F4 BENGALI CURRENCY NUMERATOR ONE * not in current usage 09F5 BENGALI CURRENCY NUMERATOR TWO @@ -4792,7 +4868,9 @@ 09F7 BENGALI CURRENCY NUMERATOR FOUR 09F8 BENGALI CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR 09F9 BENGALI CURRENCY DENOMINATOR SIXTEEN +@ Sign 09FA BENGALI ISSHAR +@ Currency sign 09FB BENGALI GANDA MARK @@ 0A00 Gurmukhi 0A7F @ Various signs @@ -5015,8 +5093,11 @@ 0AED GUJARATI DIGIT SEVEN 0AEE GUJARATI DIGIT EIGHT 0AEF GUJARATI DIGIT NINE +@ Abbreviation sign +0AF0 GUJARATI ABBREVIATION SIGN @ Currency sign 0AF1 GUJARATI RUPEE SIGN + * preferred spelling is 0AB0 0AC2 0AF0 @@ 0B00 Oriya 0B7F @ Various signs 0B01 ORIYA SIGN CANDRABINDU @@ -5133,8 +5214,9 @@ 0B6D ORIYA DIGIT SEVEN 0B6E ORIYA DIGIT EIGHT 0B6F ORIYA DIGIT NINE -@ Oriya-specific additions +@ Sign 0B70 ORIYA ISSHAR +@ Additional consonant 0B71 ORIYA LETTER WA x (oriya letter o - 0B13) x (oriya letter va - 0B35) @@ -5483,12 +5565,9 @@ 0CEF KANNADA DIGIT NINE @ Signs used in Sanskrit 0CF1 KANNADA SIGN JIHVAMULIYA - * marks a velar fricative occurring only before unvoiced velar stops - x (tibetan sign lce tsa can - 0F88) + x (vedic sign jihvamuliya - 1CF5) 0CF2 KANNADA SIGN UPADHMANIYA - * marks a bilabial fricative occurring only before unvoiced labial stops - x (tibetan sign mchu can - 0F89) - x (vedic sign ardhavisarga - 1CF2) + x (vedic sign upadhmaniya - 1CF6) @@ 0D00 Malayalam 0D7F @ Various signs 0D02 MALAYALAM SIGN ANUSVARA @@ -6075,6 +6154,9 @@ # 0EAB 0E99 0EDD LAO HO MO # 0EAB 0EA1 +@ Consonants for Khmu +0EDE LAO LETTER KHMU GO +0EDF LAO LETTER KHMU NYO @@ 0F00 Tibetan 0FFF @+ The Tibetan script is called the Bodhi script in Bhutan. @ Syllable @@ -6290,10 +6372,10 @@ 0F87 TIBETAN SIGN YANG RTAGS @ Transliteration head letters 0F88 TIBETAN SIGN LCE TSA CAN - x (kannada sign jihvamuliya - 0CF1) + x (vedic sign jihvamuliya - 1CF5) x (mongolian letter ali gali damaru - 1882) 0F89 TIBETAN SIGN MCHU CAN - x (kannada sign upadhmaniya - 0CF2) + x (vedic sign upadhmaniya - 1CF6) x (mongolian letter ali gali inverted ubadama - 1884) 0F8A TIBETAN SIGN GRU CAN RGYINGS * always followed by 0F82 @@ -6663,6 +6745,9 @@ 10C3 GEORGIAN CAPITAL LETTER WE 10C4 GEORGIAN CAPITAL LETTER HAR 10C5 GEORGIAN CAPITAL LETTER HOE +@ Additional letters for Ossetian +10C7 GEORGIAN CAPITAL LETTER YN +10CD GEORGIAN CAPITAL LETTER AEN @ Mkhedruli @+ This is the modern secular alphabet, which is caseless. 10D0 GEORGIAN LETTER AN @@ -6716,6 +6801,10 @@ @ Modifier letter 10FC MODIFIER LETTER GEORGIAN NAR # <super> 10DC +@ Additional letters for Ossetian and Abkhaz +10FD GEORGIAN LETTER AEN +10FE GEORGIAN LETTER HARD SIGN +10FF GEORGIAN LETTER LABIAL SIGN @@ 1100 Hangul Jamo 11FF @+ The aliases in this block represent the Jamo short names. @ Initial consonants @@ -6739,15 +6828,21 @@ = BB 1109 HANGUL CHOSEONG SIOS = S + * voiceless lenis alveolar fricative 110A HANGUL CHOSEONG SSANGSIOS = SS + * voiceless fortis alveolar fricative 110B HANGUL CHOSEONG IEUNG + * zero sound 110C HANGUL CHOSEONG CIEUC = J + * voiceless or voiced lenis alveolar affricate 110D HANGUL CHOSEONG SSANGCIEUC = JJ + * voiceless unaspirated fortis alveolar affricate 110E HANGUL CHOSEONG CHIEUCH = C + * voiceless aspirated alveolar affricate 110F HANGUL CHOSEONG KHIEUKH = K 1110 HANGUL CHOSEONG THIEUTH @@ -6756,6 +6851,7 @@ = P 1112 HANGUL CHOSEONG HIEUH = H + * voiceless glottal fricative 1113 HANGUL CHOSEONG NIEUN-KIYEOK 1114 HANGUL CHOSEONG SSANGNIEUN 1115 HANGUL CHOSEONG NIEUN-TIKEUT @@ -6798,10 +6894,15 @@ 113A HANGUL CHOSEONG SIOS-PHIEUPH 113B HANGUL CHOSEONG SIOS-HIEUH 113C HANGUL CHOSEONG CHITUEUMSIOS + * voiceless lenis dental fricative 113D HANGUL CHOSEONG CHITUEUMSSANGSIOS + * voicless fortis dental fricative 113E HANGUL CHOSEONG CEONGCHIEUMSIOS + * voiceless lenis retroflex fricative 113F HANGUL CHOSEONG CEONGCHIEUMSSANGSIOS + * voiceless fortis retroflex fricative 1140 HANGUL CHOSEONG PANSIOS + * voiced alveolar fricative 1141 HANGUL CHOSEONG IEUNG-KIYEOK 1142 HANGUL CHOSEONG IEUNG-TIKEUT 1143 HANGUL CHOSEONG IEUNG-MIEUM @@ -6814,19 +6915,27 @@ 114A HANGUL CHOSEONG IEUNG-THIEUTH 114B HANGUL CHOSEONG IEUNG-PHIEUPH 114C HANGUL CHOSEONG YESIEUNG + * velar nasal consonant 114D HANGUL CHOSEONG CIEUC-IEUNG 114E HANGUL CHOSEONG CHITUEUMCIEUC + * voiceless or voiced lenis dental affricate 114F HANGUL CHOSEONG CHITUEUMSSANGCIEUC + * voiceless unaspirated fortis dental affricate 1150 HANGUL CHOSEONG CEONGCHIEUMCIEUC + * voiceless or voiced lenis retroflex affricate 1151 HANGUL CHOSEONG CEONGCHIEUMSSANGCIEUC + * voiceless unaspirated fortis retroflex affricate 1152 HANGUL CHOSEONG CHIEUCH-KHIEUKH 1153 HANGUL CHOSEONG CHIEUCH-HIEUH 1154 HANGUL CHOSEONG CHITUEUMCHIEUCH + * voiceless aspirated dental affricate 1155 HANGUL CHOSEONG CEONGCHIEUMCHIEUCH + * voiceless aspirated retroflex affricate 1156 HANGUL CHOSEONG PHIEUPH-PIEUP 1157 HANGUL CHOSEONG KAPYEOUNPHIEUPH 1158 HANGUL CHOSEONG SSANGHIEUH 1159 HANGUL CHOSEONG YEORINHIEUH + * glottal stop 115A HANGUL CHOSEONG KIYEOK-TIKEUT 115B HANGUL CHOSEONG NIEUN-SIOS 115C HANGUL CHOSEONG NIEUN-CIEUC @@ -6918,6 +7027,7 @@ 119C HANGUL JUNGSEONG I-EU 119D HANGUL JUNGSEONG I-ARAEA 119E HANGUL JUNGSEONG ARAEA + * rounded open-mid back vowel 119F HANGUL JUNGSEONG ARAEA-EO 11A0 HANGUL JUNGSEONG ARAEA-U 11A1 HANGUL JUNGSEONG ARAEA-I @@ -6970,6 +7080,7 @@ = SS 11BC HANGUL JONGSEONG IEUNG = NG + * velar nasal consonant 11BD HANGUL JONGSEONG CIEUC = J 11BE HANGUL JONGSEONG CHIEUCH @@ -8599,7 +8710,7 @@ * 17B1 is the normal variant of this vowel 17B3 KHMER INDEPENDENT VOWEL QAU @ Inherent vowels -@+ These are for phonetic transcription to distinguish Indic language inherent vowels from Khmer inherent vowels. These characters are included solely for compatibility with particular applications; their use in other contexts is discouraged. +@+ These are invisible combining marks for phonetic transcription to distinguish Indic language inherent vowels from Khmer inherent vowels. These characters are included solely for compatibility with particular applications; their use in other contexts is discouraged. 17B4 KHMER VOWEL INHERENT AQ 17B5 KHMER VOWEL INHERENT AA @ Dependent vowel signs @@ -9338,6 +9449,8 @@ 1A2C TAI THAM LETTER NYA 1A2D TAI THAM LETTER RATA 1A2E TAI THAM LETTER HIGH RATHA + * an alternative glyph with the upper part shaped like 1A33 is used in Thailand and Laos + * contrast the sequence 1A2D 1A5B 1A2F TAI THAM LETTER DA 1A30 TAI THAM LETTER LOW RATHA 1A31 TAI THAM LETTER RANA @@ -9744,10 +9857,17 @@ = e 1BA9 SUNDANESE VOWEL SIGN PANEULEUNG = eu -@ Virama +@ Viramas 1BAA SUNDANESE SIGN PAMAAEH = virama * does not form conjuncts +1BAB SUNDANESE SIGN VIRAMA + * forms conjuncts in older orthography +@ Consonant signs +1BAC SUNDANESE CONSONANT SIGN PASANGAN MA + = subjoined ma +1BAD SUNDANESE CONSONANT SIGN PASANGAN WA + = subjoined wa @ Additional consonants 1BAE SUNDANESE LETTER KHA 1BAF SUNDANESE LETTER SYA @@ -9762,11 +9882,21 @@ 1BB7 SUNDANESE DIGIT SEVEN 1BB8 SUNDANESE DIGIT EIGHT 1BB9 SUNDANESE DIGIT NINE +@ Sign +1BBA SUNDANESE AVAGRAHA +@ Historic letters +1BBB SUNDANESE LETTER REU + * vocalic r +1BBC SUNDANESE LETTER LEU + * vocalic l +1BBD SUNDANESE LETTER BHA +1BBE SUNDANESE LETTER FINAL K +1BBF SUNDANESE LETTER FINAL M @@ 1BC0 Batak 1BFF @ Letters @+ Annotations for letters indicate different usage among the various alphabets sharing the Batak script. 1BC0 BATAK LETTER A - * letter a or ha for Karo and Pakpak + * letter a or ha for Karo and Pakpak 1BC1 BATAK LETTER SIMALUNGUN A 1BC2 BATAK LETTER HA * Toba letter ha or ka @@ -9989,6 +10119,20 @@ @ Punctuation 1C7E OL CHIKI PUNCTUATION MUCAAD 1C7F OL CHIKI PUNCTUATION DOUBLE MUCAAD +@@ 1CC0 Sundanese Supplement 1CCF +@ Punctuation +1CC0 SUNDANESE PUNCTUATION BINDU SURYA + * sun +1CC1 SUNDANESE PUNCTUATION BINDU PANGLONG + * half moon +1CC2 SUNDANESE PUNCTUATION BINDU PURNAMA + * full moon +1CC3 SUNDANESE PUNCTUATION BINDU CAKRA + * wheel +1CC4 SUNDANESE PUNCTUATION BINDU LEU SATANGA +1CC5 SUNDANESE PUNCTUATION BINDU KA SATANGA +1CC6 SUNDANESE PUNCTUATION BINDU DA SATANGA +1CC7 SUNDANESE PUNCTUATION BINDU BA SATANGA @@ 1CD0 Vedic Extensions 1CFF @ Tone marks for the Samaveda 1CD0 VEDIC TONE KARSHANA @@ -10074,8 +10218,18 @@ @+ Ardhavisarga denotes the sounds jihvamuliya and upadhmaniya (velar and bilabial voicelss fricatives) in Sanskrit. Its use is not limited to Vedic. 1CF2 VEDIC SIGN ARDHAVISARGA = vaidika jihvaamuuliiya upadhmaaniiya +1CF3 VEDIC SIGN ROTATED ARDHAVISARGA +@ Sign for Yajurvedic +1CF4 VEDIC TONE CANDRA ABOVE +@ Signs +1CF5 VEDIC SIGN JIHVAMULIYA + * marks a velar fricative occurring only before unvoiced velar stops x (kannada sign jihvamuliya - 0CF1) + x (tibetan sign lce tsa can - 0F88) +1CF6 VEDIC SIGN UPADHMANIYA + * marks a bilabial fricative occurring only before unvoiced labial stops x (kannada sign upadhmaniya - 0CF2) + x (tibetan sign mchu can - 0F89) @@ 1D00 Phonetic Extensions 1D7F @+ These are non-IPA phonetic extensions, mostly for the Uralic Phonetic Alphabet (UPA). @+ The small capitals, superscript, and subscript forms are for phonetic representations where style variations are semantically important. @@ -11594,6 +11748,7 @@ 2013 EN DASH 2014 EM DASH * may be used in pairs to offset parenthetical text + x (two-em dash - 2E3A) x (katakana-hiragana prolonged sound mark - 30FC) 2015 HORIZONTAL BAR = quotation dash @@ -11608,6 +11763,8 @@ x (low line - 005F) x (combining double low line - 0333) # 0020 0333 +@ Quotation marks and apostrophe +@+ Use of quotation marks differs by language. The character names cannot reflect actual usage for all languages. 2018 LEFT SINGLE QUOTATION MARK = single turned comma quotation mark * this is the preferred character (as opposed to 201B) @@ -11646,8 +11803,10 @@ 201F DOUBLE HIGH-REVERSED-9 QUOTATION MARK = double reversed comma quotation mark * has same semantic as 201C, but differs in appearance +@ General punctuation 2020 DAGGER = obelisk, obelus, long cross + x (turned dagger - 2E38) 2021 DOUBLE DAGGER = diesis, double obelisk 2022 BULLET @@ -11674,6 +11833,7 @@ x (presentation form for vertical horizontal ellipsis - FE19) # 002E 002E 002E 2027 HYPHENATION POINT + * visible symbol used to indicate correct positions for word breaking, as in dic·tion·ar·ies @ Format characters 2028 LINE SEPARATOR * may be used to represent this semantic unambiguously @@ -11733,6 +11893,7 @@ 2038 CARET x (up arrowhead - 2303) x (modifier letter low circumflex accent - A788) +@ Quotation marks 2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK = left pointing single guillemet * usually opening, sometimes closing @@ -11745,6 +11906,7 @@ x (greater-than sign - 003E) x (right-pointing angle bracket - 232A) x (right angle bracket - 3009) +@ General punctuation 203B REFERENCE MARK = Japanese kome = Urdu paragraph separator @@ -12061,8 +12223,8 @@ x (box drawings down single and horizontal double - 2564) x (postal mark - 3012) 20B9 INDIAN RUPEE SIGN - * official Rupee currency sign for India - * contrasts with script-specific Rupee signs and abbreviations + * official rupee currency sign for India + * contrasts with script-specific rupee signs and abbreviations x (devanagari letter ra - 0930) @@ 20D0 Combining Diacritical Marks for Symbols 20FF @ Combining diacritical marks for symbols @@ -12205,7 +12367,7 @@ x (copyright sign - 00A9) x (circled latin capital letter p - 24C5) 2118 SCRIPT CAPITAL P - = Weierstrass elliptic function + % WEIERSTRASS ELLIPTIC FUNCTION * actually this has the form of a lowercase calligraphic p, despite its name 2119 DOUBLE-STRUCK CAPITAL P # <font> 0050 latin capital letter p @@ -12640,16 +12802,18 @@ x (north east white arrow - 2B00) 21E7 UPWARDS WHITE ARROW = shift + = level 2 select (ISO 9995-7) 21E8 RIGHTWARDS WHITE ARROW + = group select (ISO 9995-7) 21E9 DOWNWARDS WHITE ARROW 21EA UPWARDS WHITE ARROW FROM BAR = caps lock 21EB UPWARDS WHITE ARROW ON PEDESTAL = level 2 lock 21EC UPWARDS WHITE ARROW ON PEDESTAL WITH HORIZONTAL BAR - = caps lock + = capitals (caps) lock 21ED UPWARDS WHITE ARROW ON PEDESTAL WITH VERTICAL BAR - = numerics lock + = numeric lock 21EE UPWARDS WHITE DOUBLE ARROW = level 3 select 21EF UPWARDS WHITE DOUBLE ARROW ON PEDESTAL @@ -12750,8 +12914,11 @@ * generic division operator x (solidus - 002F) x (fraction slash - 2044) + x (mathematical rising diagonal - 27CB) 2216 SET MINUS x (reverse solidus - 005C) + x (mathematical falling diagonal - 27CD) + x (reverse solidus operator - 29F5) 2217 ASTERISK OPERATOR x (asterisk - 002A) 2218 RING OPERATOR @@ -12781,7 +12948,7 @@ 2221 MEASURED ANGLE 2222 SPHERICAL ANGLE = angle arc -@ Operators +@ Relations 2223 DIVIDES = such that = APL stile @@ -13263,6 +13430,7 @@ x (equal and parallel to - 22D5) 2318 PLACE OF INTEREST SIGN = command key (1.0) + = operating system key (ISO 9995-7) 2319 TURNED NOT SIGN = line marker @ User interface symbols @@ -13686,21 +13854,28 @@ @+ * from ISO 2047 x (arabic question mark - 061F) @@ 2440 Optical Character Recognition 245F -@ OCR +@ OCR-A 2440 OCR HOOK 2441 OCR CHAIR 2442 OCR FORK 2443 OCR INVERTED FORK 2444 OCR BELT BUCKLE 2445 OCR BOW TIE + = unique asterisk x (bowtie - 22C8) +@ MICR +@+ These magnetic ink character recognition symbols are used on checks. The are derived from the E-13B font and are standardized in ISO 1004:1995. The Unicode character names include several misnomers. 2446 OCR BRANCH BANK IDENTIFICATION = transit 2447 OCR AMOUNT OF CHECK + = amount 2448 OCR DASH + % MICR ON US SYMBOL = on us 2449 OCR CUSTOMER ACCOUNT NUMBER + % MICR DASH SYMBOL = dash +@ OCR 244A OCR DOUBLE BACKSLASH @@ 2460 Enclosed Alphanumerics 24FF @ Circled numbers @@ -14249,6 +14424,7 @@ 25A8 SQUARE WITH UPPER RIGHT TO LOWER LEFT FILL 25A9 SQUARE WITH DIAGONAL CROSSHATCH FILL 25AA BLACK SMALL SQUARE + = square bullet x (black very small square - 2B1D) 25AB WHITE SMALL SQUARE x (white very small square - 2B1E) @@ -14710,6 +14886,7 @@ = legal term, jurisprudence 2697 ALEMBIC = chemical term, chemistry + x (alchemical symbol for retort - 1F76D) 2698 FLOWER = botanical term x (flower punctuation mark - 2055) @@ -15246,12 +15423,21 @@ 27CA VERTICAL BAR WITH HORIZONTAL STROKE x (parallel with horizontal stroke - 2AF2) x (triple vertical bar with horizontal stroke - 2AF5) +@ Miscellaneous symbol +27CB MATHEMATICAL RISING DIAGONAL + = \diagup + x (division slash - 2215) @ Division operator 27CC LONG DIVISION * graphically extends over the dividend x (division sign - 00F7) x (division slash - 2215) x (square root - 221A) +@ Miscellaneous symbol +27CD MATHEMATICAL FALLING DIAGONAL + = \diagdown + x (set minus - 2216) + x (reverse solidus operator - 29F5) @ Operators 27CE SQUARED LOGICAL AND = box min @@ -15318,6 +15504,7 @@ 27E5 WHITE SQUARE WITH RIGHTWARDS TICK = will always be (modal operator) @ Mathematical brackets +@+ These bracket characters are also used as punctuation outside of a mathematical context. 27E6 MATHEMATICAL LEFT WHITE SQUARE BRACKET = z notation left bag bracket x (left white square bracket - 301A) @@ -16032,6 +16219,7 @@ 29F5 REVERSE SOLIDUS OPERATOR x (reverse solidus - 005C) x (set minus - 2216) + x (mathematical falling diagonal - 27CD) 29F6 SOLIDUS WITH OVERBAR 29F7 REVERSE SOLIDUS WITH HORIZONTAL STROKE x (apl functional symbol backslash bar - 2340) @@ -16911,6 +17099,11 @@ 2CF1 COPTIC COMBINING SPIRITUS LENIS x (combining comma above - 0313) x (combining cyrillic psili pneumata - 0486) +@ Bohairic Coptic letters +2CF2 COPTIC CAPITAL LETTER BOHAIRIC KHEI + x (coptic capital letter khei - 03E6) +2CF3 COPTIC SMALL LETTER BOHAIRIC KHEI + x (coptic small letter khei - 03E7) @ Old Nubian punctuation 2CF9 COPTIC OLD NUBIAN FULL STOP 2CFA COPTIC OLD NUBIAN DIRECT QUESTION MARK @@ -16962,6 +17155,9 @@ 2D23 GEORGIAN SMALL LETTER WE 2D24 GEORGIAN SMALL LETTER HAR 2D25 GEORGIAN SMALL LETTER HOE +@ Additional letters for Ossetian +2D27 GEORGIAN SMALL LETTER YN +2D2D GEORGIAN SMALL LETTER AEN @@ 2D30 Tifinagh 2D7F @ Letters 2D30 TIFINAGH LETTER YA @@ -17022,6 +17218,8 @@ 2D64 TIFINAGH LETTER TAWELLEMET YAZ = harpoon yaz 2D65 TIFINAGH LETTER YAZZ +2D66 TIFINAGH LETTER YE +2D67 TIFINAGH LETTER YO @ Modifier letter 2D6F TIFINAGH MODIFIER LETTER LABIALIZATION MARK = tamatart @@ -17265,6 +17463,36 @@ 2E31 WORD SEPARATOR MIDDLE DOT * used in Avestan, Samaritan, ... x (middle dot - 00B7) +@ Palaeotype transliteration symbol +2E32 TURNED COMMA + * indicates nasalization + x (arabic comma - 060C) +@ Historic punctuation +2E33 RAISED DOT + * glyph position intermediate between 002E and 00B7 + x (full stop - 002E) + x (middle dot - 00B7) +2E34 RAISED COMMA + x (comma - 002C) +@ Palaeotype transliteration symbols +2E35 TURNED SEMICOLON + * indicates sudden glottal closure + x (arabic semicolon - 061B) +2E36 DAGGER WITH LEFT GUARD + * indicates retracted pronunciation +2E37 DAGGER WITH RIGHT GUARD + * indicates advanced pronunciation +2E38 TURNED DAGGER + * indicates retroflex pronunciation + x (dagger - 2020) +2E39 TOP HALF SECTION SIGN + * indicates pronunciation on one side of the mouth only + x (section sign - 00A7) +@ Dashes +2E3A TWO-EM DASH + = omission dash + x (em dash - 2014) +2E3B THREE-EM DASH @@ 2E80 CJK Radicals Supplement 2EFF @ CJK radicals supplement 2E80 CJK RADICAL REPEAT @@ -18499,6 +18727,7 @@ 3146 HANGUL LETTER SSANGSIOS # 110A hangul choseong ssangsios 3147 HANGUL LETTER IEUNG + * zero sound as initial or velar nasal consonant as final # 110B hangul choseong ieung 3148 HANGUL LETTER CIEUC # 110C hangul choseong cieuc @@ -18513,6 +18742,7 @@ 314D HANGUL LETTER PHIEUPH # 1111 hangul choseong phieuph 314E HANGUL LETTER HIEUH + * voiceless glottal fricative # 1112 hangul choseong hieuh 314F HANGUL LETTER A # 1161 hangul jungseong a @@ -18614,12 +18844,13 @@ 317E HANGUL LETTER SIOS-CIEUC # 1136 hangul choseong sios-cieuc 317F HANGUL LETTER PANSIOS + * voiced alveolar fricative # 1140 hangul choseong pansios 3180 HANGUL LETTER SSANGIEUNG = ssangyesieung # 1147 hangul choseong ssangieung 3181 HANGUL LETTER YESIEUNG - * old velar nasal + * velar nasal consonant # 114C hangul choseong yesieung 3182 HANGUL LETTER YESIEUNG-SIOS # 11F1 hangul jongseong yesieung-sios @@ -18630,7 +18861,7 @@ 3185 HANGUL LETTER SSANGHIEUH # 1158 hangul choseong ssanghieuh 3186 HANGUL LETTER YEORINHIEUH - * old glottal stop + * glottal stop # 1159 hangul choseong yeorinhieuh 3187 HANGUL LETTER YO-YA # 1184 hangul jungseong yo-ya @@ -18645,6 +18876,7 @@ 318C HANGUL LETTER YU-I # 1194 hangul jungseong yu-i 318D HANGUL LETTER ARAEA + * rounded open-mid back vowel # 119E hangul jungseong araea 318E HANGUL LETTER ARAEAE # 11A1 hangul jungseong araea-i @@ -19770,6 +20002,8 @@ # <square> 0047 0048 007A 3394 SQUARE THZ # <square> 0054 0048 007A +@ Abbreviations involving liter symbols +@+ The glyphs for these squared abbreviations may use the SI symbol for liter, "l" or "L", instead of a script l. 3395 SQUARE MU L # <square> 03BC 2113 3396 SQUARE ML @@ -19778,6 +20012,7 @@ # <square> 0064 2113 3398 SQUARE KL # <square> 006B 2113 +@ Squared Latin abbreviations 3399 SQUARE FM # <square> 0066 006D 339A SQUARE NM @@ -20054,7 +20289,7 @@ 4DFD HEXAGRAM FOR SMALL PREPONDERANCE 4DFE HEXAGRAM FOR AFTER COMPLETION 4DFF HEXAGRAM FOR BEFORE COMPLETION -@@ 4E00 CJK Unified Ideographs 9FCB +@@ 4E00 CJK Unified Ideographs 9FCC @@ A000 Yi Syllables A48F @@+ @ Syllables @@ -21767,6 +22002,14 @@ A672 COMBINING CYRILLIC THOUSAND MILLIONS SIGN @ Punctuation mark A673 SLAVONIC ASTERISK @ Combining marks for Old Cyrillic +A674 COMBINING CYRILLIC LETTER UKRAINIAN IE +A675 COMBINING CYRILLIC LETTER I +A676 COMBINING CYRILLIC LETTER YI +A677 COMBINING CYRILLIC LETTER U +A678 COMBINING CYRILLIC LETTER HARD SIGN +A679 COMBINING CYRILLIC LETTER YERU +A67A COMBINING CYRILLIC LETTER SOFT SIGN +A67B COMBINING CYRILLIC LETTER OMEGA A67C COMBINING CYRILLIC KAVYKA * indicates an alternative reading to part of a word x (combining breve - 0306) @@ -21805,6 +22048,8 @@ A694 CYRILLIC CAPITAL LETTER HWE A695 CYRILLIC SMALL LETTER HWE A696 CYRILLIC CAPITAL LETTER SHWE A697 CYRILLIC SMALL LETTER SHWE +@ Combining mark for Old Cyrillic +A69F COMBINING CYRILLIC LETTER IOTIFIED E @@ A6A0 Bamum A6FF @ Syllables A6A0 BAMUM LETTER A @@ -22086,7 +22331,7 @@ A78C LATIN SMALL LETTER SALTILLO x (latin letter glottal stop - 0294) x (modifier letter apostrophe - 02BC) x (modifier letter glottal stop - 02C0) -@ African letter +@ Additional letter A78D LATIN CAPITAL LETTER TURNED H * used in the Dan/Gio orthography in Liberia * lowercase is 0265 @@ -22094,9 +22339,14 @@ A78D LATIN CAPITAL LETTER TURNED H A78E LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT * voiceless lateral retroflex fricative * used to transcribe Toda -@ Janalif letters +@ Additional letters A790 LATIN CAPITAL LETTER N WITH DESCENDER A791 LATIN SMALL LETTER N WITH DESCENDER + * Janalif +A792 LATIN CAPITAL LETTER C WITH BAR + = Cambrian symbol +A793 LATIN SMALL LETTER C WITH BAR + * Nanai @ Latvian letters for pre-1921 orthography A7A0 LATIN CAPITAL LETTER G WITH OBLIQUE STROKE A7A1 LATIN SMALL LETTER G WITH OBLIQUE STROKE @@ -22110,6 +22360,17 @@ A7A8 LATIN CAPITAL LETTER S WITH OBLIQUE STROKE A7A9 LATIN SMALL LETTER S WITH OBLIQUE STROKE * also used in pre-1950 Lower Sorbian orthography x (latin small letter long s with diagonal stroke - 1E9C) +@ Additional letter +A7AA LATIN CAPITAL LETTER H WITH HOOK + * lowercase is 0266 + * used in Chad +@ Additions for Extended IPA +A7F8 MODIFIER LETTER CAPITAL H WITH STROKE + * faucalized + # <super> 0126 +A7F9 MODIFIER LETTER SMALL LIGATURE OE + * labialized: open-rounded + # <super> 0153 @ Addition for UPA A7FA LATIN LETTER SMALL CAPITAL TURNED M @ Ancient Roman epigraphic letters @@ -23002,6 +23263,46 @@ AADE TAI VIET SYMBOL HO HOI * marks start of text in songs and poems AADF TAI VIET SYMBOL KOI KOI * marks end of text in songs and poems +@@ AAE0 Meetei Mayek Extensions AAFF +@+ The characters in this block are extensions for historical orthographies of Meetei and are not specified in the Manupuri Government order No. 1/2/78-SS/E. +@ Independent vowel signs +AAE0 MEETEI MAYEK LETTER E +AAE1 MEETEI MAYEK LETTER O +@ Consonants +AAE2 MEETEI MAYEK LETTER CHA +AAE3 MEETEI MAYEK LETTER NYA +AAE4 MEETEI MAYEK LETTER TTA +AAE5 MEETEI MAYEK LETTER TTHA +AAE6 MEETEI MAYEK LETTER DDA +AAE7 MEETEI MAYEK LETTER DDHA +AAE8 MEETEI MAYEK LETTER NNA +AAE9 MEETEI MAYEK LETTER SHA +AAEA MEETEI MAYEK LETTER SSA +@ Dependent vowel signs +AAEB MEETEI MAYEK VOWEL SIGN II +AAEC MEETEI MAYEK VOWEL SIGN UU +AAED MEETEI MAYEK VOWEL SIGN AAI +AAEE MEETEI MAYEK VOWEL SIGN AU +AAEF MEETEI MAYEK VOWEL SIGN AAU +@ Punctuation +AAF0 MEETEI MAYEK CHEIKHAN + = danda +AAF1 MEETEI MAYEK AHANG KHUDAM + = question mark +@ Sign +AAF2 MEETEI MAYEK ANJI + * a philosophical sign + x (devanagari om - 0950) +@ Repetition marks +@+ These marks have fallen into disuse. +AAF3 MEETEI MAYEK SYLLABLE REPETITION MARK +AAF4 MEETEI MAYEK WORD REPETITION MARK +@ Sign +AAF5 MEETEI MAYEK VOWEL SIGN VISARGA +@ Virama +AAF6 MEETEI MAYEK VIRAMA + * used to form conjuncts in historical orthographies + x (myanmar sign virama - 1039) @@ AB00 Ethiopic Extended-A AB2F @ Gamo-Gofa-Dawro and Basketo AB01 ETHIOPIC SYLLABLE TTHU @@ -23238,6 +23539,7 @@ D7FB HANGUL JONGSEONG PHIEUPH-THIEUTH @@ F900 CJK Compatibility Ideographs FAFF @@+ @+ This block, despite its name, contains a number of unified CJK ideographs. Those characters are individually identified by annotations. +@+ Subheaders identifying sources for subranges do not indicate required usage or preclude mappings to other sources. For example, many pronunciation variants from KS X 1001:1998 are also mapped to a J source. @ Pronunciation variants from KS X 1001:1998 F900 CJK COMPATIBILITY IDEOGRAPH-F900 : 8C48 @@ -23847,6 +24149,11 @@ FA2C CJK COMPATIBILITY IDEOGRAPH-FA2C : 9928 FA2D CJK COMPATIBILITY IDEOGRAPH-FA2D : 9DB4 +@ Korean compatibility ideographs +FA2E CJK COMPATIBILITY IDEOGRAPH-FA2E + : 90DE +FA2F CJK COMPATIBILITY IDEOGRAPH-FA2F + : 96B7 @ JIS X 0213 compatibility ideographs FA30 CJK COMPATIBILITY IDEOGRAPH-FA30 : 4FAE @@ -25589,7 +25896,7 @@ FE0C VARIATION SELECTOR-13 FE0D VARIATION SELECTOR-14 FE0E VARIATION SELECTOR-15 FE0F VARIATION SELECTOR-16 -@@ FE10 Vertical forms FE1F +@@ FE10 Vertical Forms FE1F @+ These characters are compatibility characters needed to map to GB 18030. @ Glyphs for vertical variants FE10 PRESENTATION FORM FOR VERTICAL COMMA @@ -27624,6 +27931,102 @@ FFFF <not a character> 10939 LYDIAN LETTER C @ Punctuation 1093F LYDIAN TRIANGULAR MARK +@@ 10980 Meroitic Hieroglyphs 1099F +@ Vowel letters +10980 MEROITIC HIEROGLYPHIC LETTER A + x (egyptian hieroglyph a001 - 13000) +10981 MEROITIC HIEROGLYPHIC LETTER E + x (egyptian hieroglyph h006 - 13184) +10982 MEROITIC HIEROGLYPHIC LETTER I + x (egyptian hieroglyph a026 - 1301E) +10983 MEROITIC HIEROGLYPHIC LETTER O + x (egyptian hieroglyph f001 - 130FE) +@ Consonant letters +10984 MEROITIC HIEROGLYPHIC LETTER YA + x (egyptian hieroglyph m017a - 131CC) +10985 MEROITIC HIEROGLYPHIC LETTER WA + x (egyptian hieroglyph v004 - 1336F) +10986 MEROITIC HIEROGLYPHIC LETTER BA + x (egyptian hieroglyph e011 - 130DE) +10987 MEROITIC HIEROGLYPHIC LETTER BA-2 + x (egyptian hieroglyph d058 - 130C0) +10988 MEROITIC HIEROGLYPHIC LETTER PA + x (egyptian hieroglyph q003 - 132AA) +10989 MEROITIC HIEROGLYPHIC LETTER MA + x (egyptian hieroglyph g017 - 13153) +1098A MEROITIC HIEROGLYPHIC LETTER NA + x (egyptian hieroglyph n035 - 13216) +1098B MEROITIC HIEROGLYPHIC LETTER NA-2 +1098C MEROITIC HIEROGLYPHIC LETTER NE + x (egyptian hieroglyph m022a - 131D2) +1098D MEROITIC HIEROGLYPHIC LETTER NE-2 +1098E MEROITIC HIEROGLYPHIC LETTER RA + x (egyptian hieroglyph d021 - 1308B) +1098F MEROITIC HIEROGLYPHIC LETTER RA-2 +10990 MEROITIC HIEROGLYPHIC LETTER LA + x (egyptian hieroglyph e023 - 130ED) +10991 MEROITIC HIEROGLYPHIC LETTER KHA + x (egyptian hieroglyph aa001 - 1340D) +10992 MEROITIC HIEROGLYPHIC LETTER HHA + x (egyptian hieroglyph w011 - 133BC) +10993 MEROITIC HIEROGLYPHIC LETTER SA + x (egyptian hieroglyph m008 - 131B7) +10994 MEROITIC HIEROGLYPHIC LETTER SA-2 + x (egyptian hieroglyph o034 - 13283) +10995 MEROITIC HIEROGLYPHIC LETTER SE + x (egyptian hieroglyph o034 - 13283) +10996 MEROITIC HIEROGLYPHIC LETTER KA + x (egyptian hieroglyph g038 - 1316C) +10997 MEROITIC HIEROGLYPHIC LETTER QA + x (egyptian hieroglyph n029 - 1320E) +10998 MEROITIC HIEROGLYPHIC LETTER TA + x (egyptian hieroglyph v013 - 1337F) +10999 MEROITIC HIEROGLYPHIC LETTER TA-2 + x (egyptian hieroglyph n016 - 131FE) +1099A MEROITIC HIEROGLYPHIC LETTER TE + x (egyptian hieroglyph n016 - 131FE) + x (egyptian hieroglyph o004 - 13254) +1099B MEROITIC HIEROGLYPHIC LETTER TE-2 + x (egyptian hieroglyph o004 - 13254) +1099C MEROITIC HIEROGLYPHIC LETTER TO + x (egyptian hieroglyph n021 - 13205) +1099D MEROITIC HIEROGLYPHIC LETTER DA + x (egyptian hieroglyph d006 - 1307B) +@ Symbols +1099E MEROITIC HIEROGLYPHIC SYMBOL VIDJ + x (ankh - 2625) + x (egyptian hieroglyph s034 - 132F9) +1099F MEROITIC HIEROGLYPHIC SYMBOL VIDJ-2 +@@ 109A0 Meroitic Cursive 109FF +@ Vowel letters +109A0 MEROITIC CURSIVE LETTER A +109A1 MEROITIC CURSIVE LETTER E +109A2 MEROITIC CURSIVE LETTER I +109A3 MEROITIC CURSIVE LETTER O +@ Consonant letters +109A4 MEROITIC CURSIVE LETTER YA +109A5 MEROITIC CURSIVE LETTER WA +109A6 MEROITIC CURSIVE LETTER BA +109A7 MEROITIC CURSIVE LETTER PA +109A8 MEROITIC CURSIVE LETTER MA +109A9 MEROITIC CURSIVE LETTER NA +109AA MEROITIC CURSIVE LETTER NE +109AB MEROITIC CURSIVE LETTER RA +109AC MEROITIC CURSIVE LETTER LA +109AD MEROITIC CURSIVE LETTER KHA +109AE MEROITIC CURSIVE LETTER HHA +109AF MEROITIC CURSIVE LETTER SA +109B0 MEROITIC CURSIVE LETTER ARCHAIC SA +109B1 MEROITIC CURSIVE LETTER SE +109B2 MEROITIC CURSIVE LETTER KA +109B3 MEROITIC CURSIVE LETTER QA +109B4 MEROITIC CURSIVE LETTER TA +109B5 MEROITIC CURSIVE LETTER TE +109B6 MEROITIC CURSIVE LETTER TO +109B7 MEROITIC CURSIVE LETTER DA +@ Logograms +109BE MEROITIC CURSIVE LOGOGRAM RMT +109BF MEROITIC CURSIVE LOGOGRAM IMN @@ 10A00 Kharoshthi 10A5F @ Vowels 10A00 KHAROSHTHI LETTER A @@ -28255,6 +28658,346 @@ FFFF <not a character> * paragraph delimiter 110C0 KAITHI DANDA 110C1 KAITHI DOUBLE DANDA +@@ 110D0 Sora Sompeng 110FF +@ Consonants +110D0 SORA SOMPENG LETTER SAH +110D1 SORA SOMPENG LETTER TAH +110D2 SORA SOMPENG LETTER BAH +110D3 SORA SOMPENG LETTER CAH +110D4 SORA SOMPENG LETTER DAH +110D5 SORA SOMPENG LETTER GAH +110D6 SORA SOMPENG LETTER MAH +110D7 SORA SOMPENG LETTER NGAH +110D8 SORA SOMPENG LETTER LAH +110D9 SORA SOMPENG LETTER NAH +110DA SORA SOMPENG LETTER VAH +110DB SORA SOMPENG LETTER PAH +110DC SORA SOMPENG LETTER YAH +110DD SORA SOMPENG LETTER RAH +110DE SORA SOMPENG LETTER HAH +110DF SORA SOMPENG LETTER KAH +110E0 SORA SOMPENG LETTER JAH +110E1 SORA SOMPENG LETTER NYAH +@ Vowels +110E2 SORA SOMPENG LETTER AH +110E3 SORA SOMPENG LETTER EEH +110E4 SORA SOMPENG LETTER IH +110E5 SORA SOMPENG LETTER UH +110E6 SORA SOMPENG LETTER OH +110E7 SORA SOMPENG LETTER EH +@ Other letter +110E8 SORA SOMPENG LETTER MAE +@ Digits +110F0 SORA SOMPENG DIGIT ZERO +110F1 SORA SOMPENG DIGIT ONE +110F2 SORA SOMPENG DIGIT TWO +110F3 SORA SOMPENG DIGIT THREE +110F4 SORA SOMPENG DIGIT FOUR +110F5 SORA SOMPENG DIGIT FIVE +110F6 SORA SOMPENG DIGIT SIX +110F7 SORA SOMPENG DIGIT SEVEN +110F8 SORA SOMPENG DIGIT EIGHT +110F9 SORA SOMPENG DIGIT NINE +@@ 11100 Chakma 1114F +@ Various signs +11100 CHAKMA SIGN CANDRABINDU + = caanaphupudaa +11101 CHAKMA SIGN ANUSVARA + = ekaphudaa +11102 CHAKMA SIGN VISARGA + = dviphudaa +@ Independent vowels +11103 CHAKMA LETTER AA + = pichapujhaa aa +11104 CHAKMA LETTER I + = delabhaangagaa i +11105 CHAKMA LETTER U + = bacacu u +11106 CHAKMA LETTER E + = lejaubaa e +@ Consonants +11107 CHAKMA LETTER KAA + = cucyaangyaa kaa +11108 CHAKMA LETTER KHAA + = grajaangyaa khaa +11109 CHAKMA LETTER GAA + = caandyaa gaa +1110A CHAKMA LETTER GHAA + = tinaddaalyaa ghaa +1110B CHAKMA LETTER NGAA + = cilaama ngaa +1110C CHAKMA LETTER CAA + = dvibhalyaa caa +1110D CHAKMA LETTER CHAA + = majaraa chaa +1110E CHAKMA LETTER JAA + = dvipadalaa haa +1110F CHAKMA LETTER JHAA + = uraauraa jhaa +11110 CHAKMA LETTER NYAA + = silaacyaa nyaa +11111 CHAKMA LETTER TTAA + = dviyaadaat ttaa +11112 CHAKMA LETTER TTHAA + = phudaadviyaat tthaa +11113 CHAKMA LETTER DDAA + = aadudaangaat ddaa +11114 CHAKMA LETTER DDHAA + = lejabharaat ddhaa +11115 CHAKMA LETTER NNAA + = pettttuyaa nnaa +11116 CHAKMA LETTER TAA + = ghangadaat taa +11117 CHAKMA LETTER THAA + = jagadaat thaa +11118 CHAKMA LETTER DAA + = dolaniit daa +11119 CHAKMA LETTER DHAA + = talamuyaat dhaa +1111A CHAKMA LETTER NAA + = phaarabaanyaa naa +1111B CHAKMA LETTER PAA + = paalyaa paa +1111C CHAKMA LETTER PHAA + = ubaraphudaa phaa +1111D CHAKMA LETTER BAA + = ubaramuyaa baa +1111E CHAKMA LETTER BHAA + = ciraddaalyaa bhaa +1111F CHAKMA LETTER MAA + = bugatpadalaa maa +11120 CHAKMA LETTER YYAA + = cimayyaa yyaa +11121 CHAKMA LETTER YAA + = jilyaa yaa +11122 CHAKMA LETTER RAA + = dvidaayyaa raa +11123 CHAKMA LETTER LAA + = talamuyaa laa +11124 CHAKMA LETTER WAA + = bajhonyaa waa +11125 CHAKMA LETTER SAA + = bhudibukyaa saa +11126 CHAKMA LETTER HAA + = ubaramuyaa haa +@ Dependent vowel signs +11127 CHAKMA VOWEL SIGN A + = ubaratulyaa a +11128 CHAKMA VOWEL SIGN I + = bahryaa i +11129 CHAKMA VOWEL SIGN II + = baaniiphadaa ii +1112A CHAKMA VOWEL SIGN U + = ekattaana u +1112B CHAKMA VOWEL SIGN UU + = dvittaana uu +1112C CHAKMA VOWEL SIGN E + = ekaara e +1112D CHAKMA VOWEL SIGN AI + = delabhaanga ai +1112E CHAKMA VOWEL SIGN O + = okaara o + : 11131 11127 +1112F CHAKMA VOWEL SIGN AU + = aukaara au + : 11132 11127 +11130 CHAKMA VOWEL SIGN OI + = oikaara oi +11131 CHAKMA O MARK +11132 CHAKMA AU MARK +@ Various signs +11133 CHAKMA VIRAMA + * used to form conjuncts + x (myanmar sign virama - 1039) +11134 CHAKMA MAAYYAA + * killer + x (myanmar sign asat - 103A) +@ Digits +11136 CHAKMA DIGIT ZERO +11137 CHAKMA DIGIT ONE +11138 CHAKMA DIGIT TWO +11139 CHAKMA DIGIT THREE +1113A CHAKMA DIGIT FOUR +1113B CHAKMA DIGIT FIVE +1113C CHAKMA DIGIT SIX +1113D CHAKMA DIGIT SEVEN +1113E CHAKMA DIGIT EIGHT +1113F CHAKMA DIGIT NINE +@ Punctuation +11140 CHAKMA SECTION MARK + = phulacihna +11141 CHAKMA DANDA + = ekacilyaa +11142 CHAKMA DOUBLE DANDA + = dvicilyaa +11143 CHAKMA QUESTION MARK + = pujhaar +@@ 11180 Sharada 111DF +@ Various signs +11180 SHARADA SIGN CANDRABINDU +11181 SHARADA SIGN ANUSVARA +11182 SHARADA SIGN VISARGA +@ Independent vowels +11183 SHARADA LETTER A +11184 SHARADA LETTER AA +11185 SHARADA LETTER I +11186 SHARADA LETTER II +11187 SHARADA LETTER U +11188 SHARADA LETTER UU +11189 SHARADA LETTER VOCALIC R +1118A SHARADA LETTER VOCALIC RR +1118B SHARADA LETTER VOCALIC L +1118C SHARADA LETTER VOCALIC LL +1118D SHARADA LETTER E +1118E SHARADA LETTER AI +1118F SHARADA LETTER O +11190 SHARADA LETTER AU +@ Consonants +11191 SHARADA LETTER KA +11192 SHARADA LETTER KHA +11193 SHARADA LETTER GA +11194 SHARADA LETTER GHA +11195 SHARADA LETTER NGA +11196 SHARADA LETTER CA +11197 SHARADA LETTER CHA +11198 SHARADA LETTER JA +11199 SHARADA LETTER JHA +1119A SHARADA LETTER NYA +1119B SHARADA LETTER TTA +1119C SHARADA LETTER TTHA +1119D SHARADA LETTER DDA +1119E SHARADA LETTER DDHA +1119F SHARADA LETTER NNA +111A0 SHARADA LETTER TA +111A1 SHARADA LETTER THA +111A2 SHARADA LETTER DA +111A3 SHARADA LETTER DHA +111A4 SHARADA LETTER NA +111A5 SHARADA LETTER PA +111A6 SHARADA LETTER PHA +111A7 SHARADA LETTER BA +111A8 SHARADA LETTER BHA +111A9 SHARADA LETTER MA +111AA SHARADA LETTER YA +111AB SHARADA LETTER RA +111AC SHARADA LETTER LA +111AD SHARADA LETTER LLA +111AE SHARADA LETTER VA +111AF SHARADA LETTER SHA +111B0 SHARADA LETTER SSA +111B1 SHARADA LETTER SA +111B2 SHARADA LETTER HA +@ Dependent vowel signs +111B3 SHARADA VOWEL SIGN AA +111B4 SHARADA VOWEL SIGN I +111B5 SHARADA VOWEL SIGN II +111B6 SHARADA VOWEL SIGN U +111B7 SHARADA VOWEL SIGN UU +111B8 SHARADA VOWEL SIGN VOCALIC R +111B9 SHARADA VOWEL SIGN VOCALIC RR +111BA SHARADA VOWEL SIGN VOCALIC L +111BB SHARADA VOWEL SIGN VOCALIC LL +111BC SHARADA VOWEL SIGN E +111BD SHARADA VOWEL SIGN AI +111BE SHARADA VOWEL SIGN O +111BF SHARADA VOWEL SIGN AU +@ Virama +111C0 SHARADA SIGN VIRAMA +@ Various signs +111C1 SHARADA SIGN AVAGRAHA +111C2 SHARADA SIGN JIHVAMULIYA +111C3 SHARADA SIGN UPADHMANIYA +111C4 SHARADA OM +@ Punctuation +111C5 SHARADA DANDA +111C6 SHARADA DOUBLE DANDA +111C7 SHARADA ABBREVIATION SIGN +111C8 SHARADA SEPARATOR +@ Digits +111D0 SHARADA DIGIT ZERO +111D1 SHARADA DIGIT ONE +111D2 SHARADA DIGIT TWO +111D3 SHARADA DIGIT THREE +111D4 SHARADA DIGIT FOUR +111D5 SHARADA DIGIT FIVE +111D6 SHARADA DIGIT SIX +111D7 SHARADA DIGIT SEVEN +111D8 SHARADA DIGIT EIGHT +111D9 SHARADA DIGIT NINE +@@ 11680 Takri 116CF +@ Independent vowels +11680 TAKRI LETTER A +11681 TAKRI LETTER AA +11682 TAKRI LETTER I +11683 TAKRI LETTER II +11684 TAKRI LETTER U +11685 TAKRI LETTER UU +11686 TAKRI LETTER E +11687 TAKRI LETTER AI +11688 TAKRI LETTER O +11689 TAKRI LETTER AU +@ Consonants +1168A TAKRI LETTER KA +1168B TAKRI LETTER KHA +1168C TAKRI LETTER GA +1168D TAKRI LETTER GHA +1168E TAKRI LETTER NGA +1168F TAKRI LETTER CA +11690 TAKRI LETTER CHA +11691 TAKRI LETTER JA +11692 TAKRI LETTER JHA +11693 TAKRI LETTER NYA +11694 TAKRI LETTER TTA +11695 TAKRI LETTER TTHA +11696 TAKRI LETTER DDA +11697 TAKRI LETTER DDHA +11698 TAKRI LETTER NNA +11699 TAKRI LETTER TA +1169A TAKRI LETTER THA +1169B TAKRI LETTER DA +1169C TAKRI LETTER DHA +1169D TAKRI LETTER NA +1169E TAKRI LETTER PA +1169F TAKRI LETTER PHA +116A0 TAKRI LETTER BA +116A1 TAKRI LETTER BHA +116A2 TAKRI LETTER MA +116A3 TAKRI LETTER YA +116A4 TAKRI LETTER RA +116A5 TAKRI LETTER LA +116A6 TAKRI LETTER VA +116A7 TAKRI LETTER SHA +116A8 TAKRI LETTER SA +116A9 TAKRI LETTER HA +116AA TAKRI LETTER RRA +@ Various signs +116AB TAKRI SIGN ANUSVARA +116AC TAKRI SIGN VISARGA +@ Dependent vowel signs +116AD TAKRI VOWEL SIGN AA +116AE TAKRI VOWEL SIGN I +116AF TAKRI VOWEL SIGN II +116B0 TAKRI VOWEL SIGN U +116B1 TAKRI VOWEL SIGN UU +116B2 TAKRI VOWEL SIGN E +116B3 TAKRI VOWEL SIGN AI +116B4 TAKRI VOWEL SIGN O +116B5 TAKRI VOWEL SIGN AU +@ Virama +116B6 TAKRI SIGN VIRAMA +@ Nukta +116B7 TAKRI SIGN NUKTA +@ Digits +116C0 TAKRI DIGIT ZERO +116C1 TAKRI DIGIT ONE +116C2 TAKRI DIGIT TWO +116C3 TAKRI DIGIT THREE +116C4 TAKRI DIGIT FOUR +116C5 TAKRI DIGIT FIVE +116C6 TAKRI DIGIT SIX +116C7 TAKRI DIGIT SEVEN +116C8 TAKRI DIGIT EIGHT +116C9 TAKRI DIGIT NINE @@ 12000 Cuneiform 123FF @ Signs 12000 CUNEIFORM SIGN A @@ -30499,7 +31242,7 @@ FFFF <not a character> 1342D EGYPTIAN HIEROGLYPH AA031 1342E EGYPTIAN HIEROGLYPH AA032 @@ 16800 Bamum Supplement 16A3F -@ Characters found through Phase A +@ Characters found through Phase A 16800 BAMUM LETTER PHASE-A NGKUE MFON 16801 BAMUM LETTER PHASE-A GBIEE FON 16802 BAMUM LETTER PHASE-A PON MFON PIPAEMGBIEE @@ -30587,7 +31330,7 @@ FFFF <not a character> 16854 BAMUM LETTER PHASE-A NEN 16855 BAMUM LETTER PHASE-A NAQ 16856 BAMUM LETTER PHASE-A MBAQ -@ Characters found through Phase B +@ Characters found through Phase B 16857 BAMUM LETTER PHASE-B NSHUET 16858 BAMUM LETTER PHASE-B TU MAEMGBIEE 16859 BAMUM LETTER PHASE-B SIEE @@ -30645,7 +31388,7 @@ FFFF <not a character> 1688C BAMUM LETTER PHASE-B MA 1688D BAMUM LETTER PHASE-B KIQ 1688E BAMUM LETTER PHASE-B NGOM -@ Characters found through Phase C +@ Characters found through Phase C 1688F BAMUM LETTER PHASE-C NGKUE MAEMBA 16890 BAMUM LETTER PHASE-C NZA 16891 BAMUM LETTER PHASE-C YUM @@ -30745,7 +31488,7 @@ FFFF <not a character> 168EE BAMUM LETTER PHASE-C PIN 168EF BAMUM LETTER PHASE-C PEN 168F0 BAMUM LETTER PHASE-C TET -@ Characters found through Phase D +@ Characters found through Phase D 168F1 BAMUM LETTER PHASE-D MBUO 168F2 BAMUM LETTER PHASE-D WAP 168F3 BAMUM LETTER PHASE-D NJI @@ -30870,7 +31613,7 @@ FFFF <not a character> 16964 BAMUM LETTER PHASE-D SAQ 16965 BAMUM LETTER PHASE-D FAA * used before 169B8 for faamae '8' in Phases A-D -@ Characters found through Phase E +@ Characters found through Phase E 16966 BAMUM LETTER PHASE-E NDAP * i in Phase F 16967 BAMUM LETTER PHASE-E TOON @@ -31045,7 +31788,7 @@ FFFF <not a character> 16A01 BAMUM LETTER PHASE-E FAQ 16A02 BAMUM LETTER PHASE-E GHOM * used after 169F9 for koghom '10' in Phases A-D -@ Characters found through Phase F +@ Characters found through Phase F 16A03 BAMUM LETTER PHASE-F KA 16A04 BAMUM LETTER PHASE-F U 16A05 BAMUM LETTER PHASE-F KU @@ -31101,6 +31844,194 @@ FFFF <not a character> 16A37 BAMUM LETTER PHASE-F SAMBA 16A38 BAMUM LETTER PHASE-F VUEQ * used after 169F9 for kovue '9' in Phases A-D +@@ 16F00 Miao 16F9F +@ Consonant onsets +16F00 MIAO LETTER PA + * used for ba in Dry Yi +16F01 MIAO LETTER BA +16F02 MIAO LETTER YI PA + * used for pa in Dry Yi +16F03 MIAO LETTER PLA + * used in Sichuan Hmong +16F04 MIAO LETTER MA +16F05 MIAO LETTER MHA +16F06 MIAO LETTER ARCHAIC MA + * used in Pollard's early orthography +16F07 MIAO LETTER FA +16F08 MIAO LETTER VA +16F09 MIAO LETTER VFA + * used in Black Yi +16F0A MIAO LETTER TA + * used for da in Dry Yi +16F0B MIAO LETTER DA +16F0C MIAO LETTER YI TTA + * used in Hei Yi +16F0D MIAO LETTER YI TA + * used for ta in Dry Yi +16F0E MIAO LETTER TTA +16F0F MIAO LETTER DDA +16F10 MIAO LETTER NA +16F11 MIAO LETTER NHA +16F12 MIAO LETTER YI NNA + * used in Hei Yi +16F13 MIAO LETTER ARCHAIC NA + * used in Pollard's early orthography +16F14 MIAO LETTER NNA +16F15 MIAO LETTER NNHA +16F16 MIAO LETTER LA +16F17 MIAO LETTER LYA + * used in Black Yi +16F18 MIAO LETTER LHA +16F19 MIAO LETTER LHYA + * used in Black Yi +16F1A MIAO LETTER TLHA +16F1B MIAO LETTER DLHA +16F1C MIAO LETTER TLHYA +16F1D MIAO LETTER DLHYA +16F1E MIAO LETTER KA + * used for ga in Dry Yi +16F1F MIAO LETTER GA +16F20 MIAO LETTER YI KA + * used for ka in Dry Yi +16F21 MIAO LETTER QA +16F22 MIAO LETTER QGA +16F23 MIAO LETTER NGA +16F24 MIAO LETTER NGHA +16F25 MIAO LETTER ARCHAIC NGA + * used in Pollard's early orthography +16F26 MIAO LETTER HA +16F27 MIAO LETTER XA +@+ * archaic character used in a post-1949 reformed orthography +16F28 MIAO LETTER GHA +16F29 MIAO LETTER GHHA +16F2A MIAO LETTER TSSA +16F2B MIAO LETTER DZZA +16F2C MIAO LETTER NYA +16F2D MIAO LETTER NYHA +16F2E MIAO LETTER TSHA + * used for dzha in Dry Yi +16F2F MIAO LETTER DZHA +16F30 MIAO LETTER YI TSHA + * used for tsha in Dry Yi +16F31 MIAO LETTER YI DZHA + * used in Hei Yi +16F32 MIAO LETTER REFORMED TSHA +@+ * archaic character used in a post-1949 reformed orthography +16F33 MIAO LETTER SHA +16F34 MIAO LETTER SSA +16F35 MIAO LETTER ZHA + * used in Black Yi +16F36 MIAO LETTER ZSHA + * used in Black Yi +16F37 MIAO LETTER TSA + * used for dza in Dry Yi +16F38 MIAO LETTER DZA +16F39 MIAO LETTER YI TSA + * used for tsa in Dry Yi +16F3A MIAO LETTER SA +16F3B MIAO LETTER ZA +16F3C MIAO LETTER ZSA + * used in Black Yi +16F3D MIAO LETTER ZZA +16F3E MIAO LETTER ZZSA + * used in Black Yi +16F3F MIAO LETTER ARCHAIC ZZA + * used in Pollard's early orthography +16F40 MIAO LETTER ZZYA + * used in Black Yi +16F41 MIAO LETTER ZZSYA + * used in Black Yi +16F42 MIAO LETTER WA +16F43 MIAO LETTER AH + * glottal stop +16F44 MIAO LETTER HHA + * used in Black Yi +@ Modifiers +16F50 MIAO LETTER NASALIZATION +16F51 MIAO SIGN ASPIRATION +16F52 MIAO SIGN REFORMED VOICING +@+ * archaic character used in a post-1949 reformed orthography +16F53 MIAO SIGN REFORMED ASPIRATION +@+ * archaic character used in a post-1949 reformed orthography +@ Vowels and finals +16F54 MIAO VOWEL SIGN A +16F55 MIAO VOWEL SIGN AA + * used in Eastern Lisu +16F56 MIAO VOWEL SIGN AHH + * used in Gan Yi +16F57 MIAO VOWEL SIGN AN +16F58 MIAO VOWEL SIGN ANG + * also used for aw +16F59 MIAO VOWEL SIGN O +16F5A MIAO VOWEL SIGN OO +16F5B MIAO VOWEL SIGN WO + * used in Hei Yi +16F5C MIAO VOWEL SIGN W +16F5D MIAO VOWEL SIGN E +16F5E MIAO VOWEL SIGN EN +16F5F MIAO VOWEL SIGN ENG +16F60 MIAO VOWEL SIGN OEY +16F61 MIAO VOWEL SIGN I +16F62 MIAO VOWEL SIGN IA +16F63 MIAO VOWEL SIGN IAN +16F64 MIAO VOWEL SIGN IANG + * also used for iaw +16F65 MIAO VOWEL SIGN IO +16F66 MIAO VOWEL SIGN IE +16F67 MIAO VOWEL SIGN II + * used in Eastern Lisu +16F68 MIAO VOWEL SIGN IU +16F69 MIAO VOWEL SIGN ING + * also used for in +16F6A MIAO VOWEL SIGN U +16F6B MIAO VOWEL SIGN UA +16F6C MIAO VOWEL SIGN UAN +16F6D MIAO VOWEL SIGN UANG + * also used for uaw +16F6E MIAO VOWEL SIGN UU + * used in Eastern Lisu +16F6F MIAO VOWEL SIGN UEI +16F70 MIAO VOWEL SIGN UNG +16F71 MIAO VOWEL SIGN Y +16F72 MIAO VOWEL SIGN YI +16F73 MIAO VOWEL SIGN AE +16F74 MIAO VOWEL SIGN AEE + * used in Eastern Lisu +16F75 MIAO VOWEL SIGN ERR +16F76 MIAO VOWEL SIGN ROUNDED ERR + * used in Eastern Lisu +16F77 MIAO VOWEL SIGN ER +16F78 MIAO VOWEL SIGN ROUNDED ER + * used in Eastern Lisu +16F79 MIAO VOWEL SIGN AI +16F7A MIAO VOWEL SIGN EI +16F7B MIAO VOWEL SIGN AU +16F7C MIAO VOWEL SIGN OU +16F7D MIAO VOWEL SIGN N +16F7E MIAO VOWEL SIGN NG +@ Positioning tone marks +@+ These are used to position the vowel off of the baseline position to indicate a changed tone. +16F8F MIAO TONE RIGHT +16F90 MIAO TONE TOP RIGHT +16F91 MIAO TONE ABOVE +16F92 MIAO TONE BELOW +@ Baseline tone marks +@+ These are used in Chuxiong Ahmao instead of the positioning tone marks. +16F93 MIAO LETTER TONE-2 +16F94 MIAO LETTER TONE-3 +16F95 MIAO LETTER TONE-4 +16F96 MIAO LETTER TONE-5 +16F97 MIAO LETTER TONE-6 +16F98 MIAO LETTER TONE-7 +16F99 MIAO LETTER TONE-8 +@ Archaic baseline tone marks +@+ These are archaic characters used in a post-1949 reformed orthography. +16F9A MIAO LETTER REFORMED TONE-1 +16F9B MIAO LETTER REFORMED TONE-2 +16F9C MIAO LETTER REFORMED TONE-4 +16F9D MIAO LETTER REFORMED TONE-5 +16F9E MIAO LETTER REFORMED TONE-6 +16F9F MIAO LETTER REFORMED TONE-8 @@ 1B000 Kana Supplement 1B0FF @ Historic Katakana 1B000 KATAKANA LETTER ARCHAIC E @@ -34056,6 +34987,355 @@ FFFF <not a character> # <font> 0038 digit eight 1D7FF MATHEMATICAL MONOSPACE DIGIT NINE # <font> 0039 digit nine +@@ 1EE00 Arabic Mathematical Alphabetic Symbols 1EEFF +@ Isolated symbols +1EE00 ARABIC MATHEMATICAL ALEF + x (arabic letter alef isolated form - FE8D) + # <font> 0627 arabic letter alef +1EE01 ARABIC MATHEMATICAL BEH + x (arabic letter beh isolated form - FE8F) + # <font> 0628 arabic letter beh +1EE02 ARABIC MATHEMATICAL JEEM + x (arabic letter jeem isolated form - FE9D) + # <font> 062C arabic letter jeem +1EE03 ARABIC MATHEMATICAL DAL + x (arabic letter dal isolated form - FEA9) + # <font> 062F arabic letter dal +1EE05 ARABIC MATHEMATICAL WAW + x (arabic letter waw isolated form - FEED) + # <font> 0648 arabic letter waw +1EE06 ARABIC MATHEMATICAL ZAIN + x (arabic letter zain isolated form - FEAF) + # <font> 0632 arabic letter zain +1EE07 ARABIC MATHEMATICAL HAH + x (arabic letter hah isolated form - FEA1) + # <font> 062D arabic letter hah +1EE08 ARABIC MATHEMATICAL TAH + x (arabic letter tah isolated form - FEC1) + # <font> 0637 arabic letter tah +1EE09 ARABIC MATHEMATICAL YEH + x (arabic letter yeh isolated form - FEF1) + # <font> 064A arabic letter yeh +1EE0A ARABIC MATHEMATICAL KAF + x (arabic letter kaf isolated form - FED9) + # <font> 0643 arabic letter kaf +1EE0B ARABIC MATHEMATICAL LAM + x (arabic letter lam isolated form - FEDD) + # <font> 0644 arabic letter lam +1EE0C ARABIC MATHEMATICAL MEEM + x (arabic letter meem isolated form - FEE1) + # <font> 0645 arabic letter meem +1EE0D ARABIC MATHEMATICAL NOON + x (arabic letter noon isolated form - FEE5) + # <font> 0646 arabic letter noon +1EE0E ARABIC MATHEMATICAL SEEN + x (arabic letter seen isolated form - FEB1) + # <font> 0633 arabic letter seen +1EE0F ARABIC MATHEMATICAL AIN + x (arabic letter ain isolated form - FEC9) + # <font> 0639 arabic letter ain +1EE10 ARABIC MATHEMATICAL FEH + x (arabic letter feh isolated form - FED1) + # <font> 0641 arabic letter feh +1EE11 ARABIC MATHEMATICAL SAD + x (arabic letter sad isolated form - FEB9) + # <font> 0635 arabic letter sad +1EE12 ARABIC MATHEMATICAL QAF + x (arabic letter qaf isolated form - FED5) + # <font> 0642 arabic letter qaf +1EE13 ARABIC MATHEMATICAL REH + x (arabic letter reh isolated form - FEAD) + # <font> 0631 arabic letter reh +1EE14 ARABIC MATHEMATICAL SHEEN + x (arabic letter sheen isolated form - FEB5) + # <font> 0634 arabic letter sheen +1EE15 ARABIC MATHEMATICAL TEH + x (arabic letter teh isolated form - FE95) + # <font> 062A arabic letter teh +1EE16 ARABIC MATHEMATICAL THEH + x (arabic letter theh isolated form - FE99) + # <font> 062B arabic letter theh +1EE17 ARABIC MATHEMATICAL KHAH + x (arabic letter khah isolated form - FEA5) + # <font> 062E arabic letter khah +1EE18 ARABIC MATHEMATICAL THAL + x (arabic letter thal isolated form - FEAB) + # <font> 0630 arabic letter thal +1EE19 ARABIC MATHEMATICAL DAD + x (arabic letter dad isolated form - FEBD) + # <font> 0636 arabic letter dad +1EE1A ARABIC MATHEMATICAL ZAH + x (arabic letter zah isolated form - FEC5) + # <font> 0638 arabic letter zah +1EE1B ARABIC MATHEMATICAL GHAIN + x (arabic letter ghain isolated form - FECD) + # <font> 063A arabic letter ghain +1EE1C ARABIC MATHEMATICAL DOTLESS BEH + x (arabic letter dotless beh - 066E) + # <font> 066E arabic letter dotless beh +1EE1D ARABIC MATHEMATICAL DOTLESS NOON + x (arabic letter noon ghunna isolated form - FB9E) + # <font> 06BA arabic letter noon ghunna +1EE1E ARABIC MATHEMATICAL DOTLESS FEH + x (arabic letter dotless feh - 06A1) + # <font> 06A1 arabic letter dotless feh +1EE1F ARABIC MATHEMATICAL DOTLESS QAF + x (arabic letter dotless qaf - 066F) + # <font> 066F arabic letter dotless qaf +@ Initial symbols +1EE21 ARABIC MATHEMATICAL INITIAL BEH + x (arabic letter beh initial form - FE91) + # <font> 0628 arabic letter beh +1EE22 ARABIC MATHEMATICAL INITIAL JEEM + x (arabic letter jeem initial form - FE9F) + # <font> 062C arabic letter jeem +1EE24 ARABIC MATHEMATICAL INITIAL HEH + x (arabic letter heh initial form - FEEB) + # <font> 0647 arabic letter heh +1EE27 ARABIC MATHEMATICAL INITIAL HAH + x (arabic letter hah initial form - FEA3) + # <font> 062D arabic letter hah +1EE29 ARABIC MATHEMATICAL INITIAL YEH + x (arabic letter yeh initial form - FEF3) + # <font> 064A arabic letter yeh +1EE2A ARABIC MATHEMATICAL INITIAL KAF + x (arabic letter kaf initial form - FEDB) + # <font> 0643 arabic letter kaf +1EE2B ARABIC MATHEMATICAL INITIAL LAM + x (arabic letter lam initial form - FEDF) + # <font> 0644 arabic letter lam +1EE2C ARABIC MATHEMATICAL INITIAL MEEM + x (arabic letter meem initial form - FEE3) + # <font> 0645 arabic letter meem +1EE2D ARABIC MATHEMATICAL INITIAL NOON + x (arabic letter noon initial form - FEE7) + # <font> 0646 arabic letter noon +1EE2E ARABIC MATHEMATICAL INITIAL SEEN + x (arabic letter seen initial form - FEB3) + # <font> 0633 arabic letter seen +1EE2F ARABIC MATHEMATICAL INITIAL AIN + x (arabic letter ain initial form - FECB) + # <font> 0639 arabic letter ain +1EE30 ARABIC MATHEMATICAL INITIAL FEH + x (arabic letter feh initial form - FED3) + # <font> 0641 arabic letter feh +1EE31 ARABIC MATHEMATICAL INITIAL SAD + x (arabic letter sad initial form - FEBB) + # <font> 0635 arabic letter sad +1EE32 ARABIC MATHEMATICAL INITIAL QAF + x (arabic letter qaf initial form - FED7) + # <font> 0642 arabic letter qaf +1EE34 ARABIC MATHEMATICAL INITIAL SHEEN + x (arabic letter sheen initial form - FEB7) + # <font> 0634 arabic letter sheen +1EE35 ARABIC MATHEMATICAL INITIAL TEH + x (arabic letter teh initial form - FE97) + # <font> 062A arabic letter teh +1EE36 ARABIC MATHEMATICAL INITIAL THEH + x (arabic letter theh initial form - FE9B) + # <font> 062B arabic letter theh +1EE37 ARABIC MATHEMATICAL INITIAL KHAH + x (arabic letter khah initial form - FEA7) + # <font> 062E arabic letter khah +1EE39 ARABIC MATHEMATICAL INITIAL DAD + x (arabic letter dad initial form - FEBF) + # <font> 0636 arabic letter dad +1EE3B ARABIC MATHEMATICAL INITIAL GHAIN + x (arabic letter ghain initial form - FECF) + # <font> 063A arabic letter ghain +@ Tailed symbols +1EE42 ARABIC MATHEMATICAL TAILED JEEM + # <font> 062C arabic letter jeem +1EE47 ARABIC MATHEMATICAL TAILED HAH + # <font> 062D arabic letter hah +1EE49 ARABIC MATHEMATICAL TAILED YEH + # <font> 064A arabic letter yeh +1EE4B ARABIC MATHEMATICAL TAILED LAM + # <font> 0644 arabic letter lam +1EE4D ARABIC MATHEMATICAL TAILED NOON + # <font> 0646 arabic letter noon +1EE4E ARABIC MATHEMATICAL TAILED SEEN + # <font> 0633 arabic letter seen +1EE4F ARABIC MATHEMATICAL TAILED AIN + # <font> 0639 arabic letter ain +1EE51 ARABIC MATHEMATICAL TAILED SAD + # <font> 0635 arabic letter sad +1EE52 ARABIC MATHEMATICAL TAILED QAF + # <font> 0642 arabic letter qaf +1EE54 ARABIC MATHEMATICAL TAILED SHEEN + # <font> 0634 arabic letter sheen +1EE57 ARABIC MATHEMATICAL TAILED KHAH + # <font> 062E arabic letter khah +1EE59 ARABIC MATHEMATICAL TAILED DAD + # <font> 0636 arabic letter dad +1EE5B ARABIC MATHEMATICAL TAILED GHAIN + # <font> 063A arabic letter ghain +1EE5D ARABIC MATHEMATICAL TAILED DOTLESS NOON + # <font> 06BA arabic letter noon ghunna +1EE5F ARABIC MATHEMATICAL TAILED DOTLESS QAF + # <font> 066F arabic letter dotless qaf +@ Stretched symbols +1EE61 ARABIC MATHEMATICAL STRETCHED BEH + # <font> 0628 arabic letter beh +1EE62 ARABIC MATHEMATICAL STRETCHED JEEM + # <font> 062C arabic letter jeem +1EE64 ARABIC MATHEMATICAL STRETCHED HEH + # <font> 0647 arabic letter heh +1EE67 ARABIC MATHEMATICAL STRETCHED HAH + # <font> 062D arabic letter hah +1EE68 ARABIC MATHEMATICAL STRETCHED TAH + # <font> 0637 arabic letter tah +1EE69 ARABIC MATHEMATICAL STRETCHED YEH + # <font> 064A arabic letter yeh +1EE6A ARABIC MATHEMATICAL STRETCHED KAF + # <font> 0643 arabic letter kaf +1EE6C ARABIC MATHEMATICAL STRETCHED MEEM + # <font> 0645 arabic letter meem +1EE6D ARABIC MATHEMATICAL STRETCHED NOON + # <font> 0646 arabic letter noon +1EE6E ARABIC MATHEMATICAL STRETCHED SEEN + # <font> 0633 arabic letter seen +1EE6F ARABIC MATHEMATICAL STRETCHED AIN + # <font> 0639 arabic letter ain +1EE70 ARABIC MATHEMATICAL STRETCHED FEH + # <font> 0641 arabic letter feh +1EE71 ARABIC MATHEMATICAL STRETCHED SAD + # <font> 0635 arabic letter sad +1EE72 ARABIC MATHEMATICAL STRETCHED QAF + # <font> 0642 arabic letter qaf +1EE74 ARABIC MATHEMATICAL STRETCHED SHEEN + # <font> 0634 arabic letter sheen +1EE75 ARABIC MATHEMATICAL STRETCHED TEH + # <font> 062A arabic letter teh +1EE76 ARABIC MATHEMATICAL STRETCHED THEH + # <font> 062B arabic letter theh +1EE77 ARABIC MATHEMATICAL STRETCHED KHAH + # <font> 062E arabic letter khah +1EE79 ARABIC MATHEMATICAL STRETCHED DAD + # <font> 0636 arabic letter dad +1EE7A ARABIC MATHEMATICAL STRETCHED ZAH + # <font> 0638 arabic letter zah +1EE7B ARABIC MATHEMATICAL STRETCHED GHAIN + # <font> 063A arabic letter ghain +1EE7C ARABIC MATHEMATICAL STRETCHED DOTLESS BEH + # <font> 066E arabic letter dotless beh +1EE7E ARABIC MATHEMATICAL STRETCHED DOTLESS FEH + # <font> 06A1 arabic letter dotless feh +@ Looped symbols +1EE80 ARABIC MATHEMATICAL LOOPED ALEF + # <font> 0627 arabic letter alef +1EE81 ARABIC MATHEMATICAL LOOPED BEH + # <font> 0628 arabic letter beh +1EE82 ARABIC MATHEMATICAL LOOPED JEEM + # <font> 062C arabic letter jeem +1EE83 ARABIC MATHEMATICAL LOOPED DAL + # <font> 062F arabic letter dal +1EE84 ARABIC MATHEMATICAL LOOPED HEH + # <font> 0647 arabic letter heh +1EE85 ARABIC MATHEMATICAL LOOPED WAW + # <font> 0648 arabic letter waw +1EE86 ARABIC MATHEMATICAL LOOPED ZAIN + # <font> 0632 arabic letter zain +1EE87 ARABIC MATHEMATICAL LOOPED HAH + # <font> 062D arabic letter hah +1EE88 ARABIC MATHEMATICAL LOOPED TAH + # <font> 0637 arabic letter tah +1EE89 ARABIC MATHEMATICAL LOOPED YEH + # <font> 064A arabic letter yeh +1EE8B ARABIC MATHEMATICAL LOOPED LAM + # <font> 0644 arabic letter lam +1EE8C ARABIC MATHEMATICAL LOOPED MEEM + # <font> 0645 arabic letter meem +1EE8D ARABIC MATHEMATICAL LOOPED NOON + # <font> 0646 arabic letter noon +1EE8E ARABIC MATHEMATICAL LOOPED SEEN + # <font> 0633 arabic letter seen +1EE8F ARABIC MATHEMATICAL LOOPED AIN + # <font> 0639 arabic letter ain +1EE90 ARABIC MATHEMATICAL LOOPED FEH + # <font> 0641 arabic letter feh +1EE91 ARABIC MATHEMATICAL LOOPED SAD + # <font> 0635 arabic letter sad +1EE92 ARABIC MATHEMATICAL LOOPED QAF + # <font> 0642 arabic letter qaf +1EE93 ARABIC MATHEMATICAL LOOPED REH + # <font> 0631 arabic letter reh +1EE94 ARABIC MATHEMATICAL LOOPED SHEEN + # <font> 0634 arabic letter sheen +1EE95 ARABIC MATHEMATICAL LOOPED TEH + # <font> 062A arabic letter teh +1EE96 ARABIC MATHEMATICAL LOOPED THEH + # <font> 062B arabic letter theh +1EE97 ARABIC MATHEMATICAL LOOPED KHAH + # <font> 062E arabic letter khah +1EE98 ARABIC MATHEMATICAL LOOPED THAL + # <font> 0630 arabic letter thal +1EE99 ARABIC MATHEMATICAL LOOPED DAD + # <font> 0636 arabic letter dad +1EE9A ARABIC MATHEMATICAL LOOPED ZAH + # <font> 0638 arabic letter zah +1EE9B ARABIC MATHEMATICAL LOOPED GHAIN + # <font> 063A arabic letter ghain +@ Double-struck symbols +1EEA1 ARABIC MATHEMATICAL DOUBLE-STRUCK BEH + # <font> 0628 arabic letter beh +1EEA2 ARABIC MATHEMATICAL DOUBLE-STRUCK JEEM + # <font> 062C arabic letter jeem +1EEA3 ARABIC MATHEMATICAL DOUBLE-STRUCK DAL + # <font> 062F arabic letter dal +1EEA5 ARABIC MATHEMATICAL DOUBLE-STRUCK WAW + # <font> 0648 arabic letter waw +1EEA6 ARABIC MATHEMATICAL DOUBLE-STRUCK ZAIN + # <font> 0632 arabic letter zain +1EEA7 ARABIC MATHEMATICAL DOUBLE-STRUCK HAH + # <font> 062D arabic letter hah +1EEA8 ARABIC MATHEMATICAL DOUBLE-STRUCK TAH + # <font> 0637 arabic letter tah +1EEA9 ARABIC MATHEMATICAL DOUBLE-STRUCK YEH + # <font> 064A arabic letter yeh +1EEAB ARABIC MATHEMATICAL DOUBLE-STRUCK LAM + # <font> 0644 arabic letter lam +1EEAC ARABIC MATHEMATICAL DOUBLE-STRUCK MEEM + # <font> 0645 arabic letter meem +1EEAD ARABIC MATHEMATICAL DOUBLE-STRUCK NOON + # <font> 0646 arabic letter noon +1EEAE ARABIC MATHEMATICAL DOUBLE-STRUCK SEEN + # <font> 0633 arabic letter seen +1EEAF ARABIC MATHEMATICAL DOUBLE-STRUCK AIN + # <font> 0639 arabic letter ain +1EEB0 ARABIC MATHEMATICAL DOUBLE-STRUCK FEH + # <font> 0641 arabic letter feh +1EEB1 ARABIC MATHEMATICAL DOUBLE-STRUCK SAD + # <font> 0635 arabic letter sad +1EEB2 ARABIC MATHEMATICAL DOUBLE-STRUCK QAF + # <font> 0642 arabic letter qaf +1EEB3 ARABIC MATHEMATICAL DOUBLE-STRUCK REH + # <font> 0631 arabic letter reh +1EEB4 ARABIC MATHEMATICAL DOUBLE-STRUCK SHEEN + # <font> 0634 arabic letter sheen +1EEB5 ARABIC MATHEMATICAL DOUBLE-STRUCK TEH + # <font> 062A arabic letter teh +1EEB6 ARABIC MATHEMATICAL DOUBLE-STRUCK THEH + # <font> 062B arabic letter theh +1EEB7 ARABIC MATHEMATICAL DOUBLE-STRUCK KHAH + # <font> 062E arabic letter khah +1EEB8 ARABIC MATHEMATICAL DOUBLE-STRUCK THAL + # <font> 0630 arabic letter thal +1EEB9 ARABIC MATHEMATICAL DOUBLE-STRUCK DAD + # <font> 0636 arabic letter dad +1EEBA ARABIC MATHEMATICAL DOUBLE-STRUCK ZAH + # <font> 0638 arabic letter zah +1EEBB ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN + # <font> 063A arabic letter ghain +@ Stretching operators +@+ The following operators stretch based on the width of the text that is displayed below or above them. +1EEF0 ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL + * used in Arabic mathematics to denote summation + * stretched at the tatweel + x (n-ary summation - 2211) +1EEF1 ARABIC MATHEMATICAL OPERATOR HAH WITH DAL + * used in Persian mathematics to denote limits + * stretched between the hah and the dal @@ 1F000 Mahjong Tiles 1F02F @ Prevailing wind tiles 1F000 MAHJONG TILE EAST WIND @@ -34238,7 +35518,8 @@ FFFF <not a character> 1F092 DOMINO TILE VERTICAL-06-05 1F093 DOMINO TILE VERTICAL-06-06 @@ 1F0A0 Playing Cards 1F0FF -@+ These characters are used to represent the 52-card and 56-card variants of modern playing cards, as well as the 56-card Minor Arcana of the Western Tarot. +@+ These characters are used to represent the 52-card and 56-card variants of modern playing cards, as well as the 56-card Minor Arcana of the Western Tarot. The glyphs shown in the charts have only a symbolic and schematic equivalence to particular varieties of actual playing cards. +@ Back of card 1F0A0 PLAYING CARD BACK @ Spades or swords 1F0A1 PLAYING CARD ACE OF SPADES @@ -34293,6 +35574,7 @@ FFFF <not a character> 1F0CC PLAYING CARD KNIGHT OF DIAMONDS 1F0CD PLAYING CARD QUEEN OF DIAMONDS 1F0CE PLAYING CARD KING OF DIAMONDS +@ Joker 1F0CF PLAYING CARD BLACK JOKER @ Clubs or wands 1F0D1 PLAYING CARD ACE OF CLUBS @@ -34309,6 +35591,7 @@ FFFF <not a character> 1F0DC PLAYING CARD KNIGHT OF CLUBS 1F0DD PLAYING CARD QUEEN OF CLUBS 1F0DE PLAYING CARD KING OF CLUBS +@ Joker 1F0DF PLAYING CARD WHITE JOKER * may also be red @@ 1F100 Enclosed Alphanumeric Supplement 1F1FF @@ -34533,6 +35816,17 @@ FFFF <not a character> 1F167 NEGATIVE CIRCLED LATIN CAPITAL LETTER X 1F168 NEGATIVE CIRCLED LATIN CAPITAL LETTER Y 1F169 NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +@ Raised squared Latin sequences +1F16A RAISED MC SIGN + = marque de commerce + * used in Canada + x (trade mark sign - 2122) + # <super> 004D 0043 +1F16B RAISED MD SIGN + = marque déposée + * used in Canada + x (registered sign - 00AE) + # <super> 004D 0044 @ White on black squared Latin letters @+ The square edges may be slightly rounded. 1F170 NEGATIVE SQUARED LATIN CAPITAL LETTER A @@ -35330,6 +36624,7 @@ FFFF <not a character> @ Comic style symbols 1F4A0 DIAMOND SHAPE WITH A DOT INSIDE = kawaii, cute + * meaning of cuteness is based on association of glyph with shape of a flower x (white diamond with centred dot - 27D0) 1F4A1 ELECTRIC LIGHT BULB = idea @@ -35565,6 +36860,16 @@ FFFF <not a character> 1F53D DOWN-POINTING SMALL RED TRIANGLE = play arrow down x (black down-pointing small triangle - 25BE) +@ Religious symbols +1F540 CIRCLED CROSS POMMEE + * Orthodox typikon symbol for great feast service +1F541 CROSS POMMEE WITH HALF-CIRCLE BELOW + * Orthodox typikon symbol for vigil service +1F542 CROSS POMMEE + * Orthodox typikon symbol for Polyeleos + x (four teardrop-spoked asterisk - 2722) +1F543 NOTCHED LEFT SEMICIRCLE WITH THREE DOTS + * Orthodox typikon symbol for lower rank feast @ Clock face symbols 1F550 CLOCK FACE ONE OCLOCK x (watch - 231A) @@ -35602,6 +36907,7 @@ FFFF <not a character> @@ 1F600 Emoticons 1F64F @+ The emoticons have been organized by mouth shape to make it easier to locate the different characters in the code chart. @ Faces +1F600 GRINNING FACE 1F601 GRINNING FACE WITH SMILING EYES 1F602 FACE WITH TEARS OF JOY 1F603 SMILING FACE WITH OPEN MOUTH @@ -35620,33 +36926,45 @@ FFFF <not a character> 1F60F SMIRKING FACE 1F610 NEUTRAL FACE * used for the West Wind in some Mahjong annotation +1F611 EXPRESSIONLESS FACE 1F612 UNAMUSED FACE 1F613 FACE WITH COLD SWEAT 1F614 PENSIVE FACE +1F615 CONFUSED FACE 1F616 CONFOUNDED FACE +1F617 KISSING FACE 1F618 FACE THROWING A KISS +1F619 KISSING FACE WITH SMILING EYES 1F61A KISSING FACE WITH CLOSED EYES +1F61B FACE WITH STUCK-OUT TONGUE 1F61C FACE WITH STUCK-OUT TONGUE AND WINKING EYE * kidding, not serious 1F61D FACE WITH STUCK-OUT TONGUE AND TIGHTLY-CLOSED EYES * kidding, not serious 1F61E DISAPPOINTED FACE x (white frowning face - 2639) +1F61F WORRIED FACE 1F620 ANGRY FACE 1F621 POUTING FACE 1F622 CRYING FACE 1F623 PERSEVERING FACE 1F624 FACE WITH LOOK OF TRIUMPH 1F625 DISAPPOINTED BUT RELIEVED FACE +1F626 FROWNING FACE WITH OPEN MOUTH +1F627 ANGUISHED FACE 1F628 FEARFUL FACE 1F629 WEARY FACE 1F62A SLEEPY FACE 1F62B TIRED FACE +1F62C GRIMACING FACE 1F62D LOUDLY CRYING FACE +1F62E FACE WITH OPEN MOUTH +1F62F HUSHED FACE 1F630 FACE WITH OPEN MOUTH AND COLD SWEAT 1F631 FACE SCREAMING IN FEAR 1F632 ASTONISHED FACE 1F633 FLUSHED FACE +1F634 SLEEPING FACE 1F635 DIZZY FACE 1F636 FACE WITHOUT MOUTH * used for the South Wind in some Mahjong annotation @@ -35981,6 +37299,7 @@ FFFF <not a character> 1F76C ALCHEMICAL SYMBOL FOR BATH OF VAPOURS = balneum vaporis 1F76D ALCHEMICAL SYMBOL FOR RETORT + x (alembic - 2697) @ Time 1F76E ALCHEMICAL SYMBOL FOR HOUR x (hourglass - 231B) diff --git a/lib/unicore/NormalizationCorrections.txt b/lib/unicore/NormalizationCorrections.txt index 9c9c2e4420..61800b82ad 100644 --- a/lib/unicore/NormalizationCorrections.txt +++ b/lib/unicore/NormalizationCorrections.txt @@ -1,14 +1,14 @@ -# NormalizationCorrections-6.0.0.txt -# Date: 2010-05-19, 11:21:00 PDT [KW] +# NormalizationCorrections-6.1.0.txt +# Date: 2011-06-23, 00:46:00 GMT [KW, LI] # # This file is a normative contributory data file in the # Unicode Character Database. # -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # -# The normalization stabilization policy of the Unicode -# Consortium ordinarily precludes any change to the decomposition +# The normalization stability policy of the Unicode Consortium +# ordinarily precludes any change to the decomposition # for any character, once established in a relevant version # of the UnicodeData.txt data file. However, under certain # exceptional (and rare) conditions, an error in a decomposition diff --git a/lib/unicore/PropList.txt b/lib/unicore/PropList.txt index eeeb81845e..f9dcb2ae74 100644 --- a/lib/unicore/PropList.txt +++ b/lib/unicore/PropList.txt @@ -1,8 +1,8 @@ -# PropList-6.0.0.txt -# Date: 2010-08-19, 00:48:28 GMT [MD] +# PropList-6.1.0.txt +# Date: 2011-11-30, 01:49:54 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -50,6 +50,7 @@ 2212 ; Dash # Sm MINUS SIGN 2E17 ; Dash # Pd DOUBLE OBLIQUE HYPHEN 2E1A ; Dash # Pd HYPHEN WITH DIAERESIS +2E3A..2E3B ; Dash # Pd [2] TWO-EM DASH..THREE-EM DASH 301C ; Dash # Pd WAVE DASH 3030 ; Dash # Pd WAVY DASH 30A0 ; Dash # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN @@ -58,7 +59,7 @@ FE58 ; Dash # Pd SMALL EM DASH FE63 ; Dash # Pd SMALL HYPHEN-MINUS FF0D ; Dash # Pd FULLWIDTH HYPHEN-MINUS -# Total code points: 25 +# Total code points: 27 # ================================================ @@ -158,6 +159,7 @@ A92F ; Terminal_Punctuation # Po KAYAH LI SIGN SHYA A9C7..A9C9 ; Terminal_Punctuation # Po [3] JAVANESE PADA PANGKAT..JAVANESE PADA LUNGSI AA5D..AA5F ; Terminal_Punctuation # Po [3] CHAM PUNCTUATION DANDA..CHAM PUNCTUATION TRIPLE DANDA AADF ; Terminal_Punctuation # Po TAI VIET SYMBOL KOI KOI +AAF0..AAF1 ; Terminal_Punctuation # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM ABEB ; Terminal_Punctuation # Po MEETEI MAYEK CHEIKHEI FE50..FE52 ; Terminal_Punctuation # Po [3] SMALL COMMA..SMALL FULL STOP FE54..FE57 ; Terminal_Punctuation # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK @@ -175,9 +177,11 @@ FF64 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA 10B3A..10B3F ; Terminal_Punctuation # Po [6] TINY TWO DOTS OVER ONE DOT PUNCTUATION..LARGE ONE RING OVER TWO RINGS PUNCTUATION 11047..1104D ; Terminal_Punctuation # Po [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS 110BE..110C1 ; Terminal_Punctuation # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +11141..11143 ; Terminal_Punctuation # Po [3] CHAKMA DANDA..CHAKMA QUESTION MARK +111C5..111C6 ; Terminal_Punctuation # Po [2] SHARADA DANDA..SHARADA DOUBLE DANDA 12470..12473 ; Terminal_Punctuation # Po [4] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON -# Total code points: 169 +# Total code points: 176 # ================================================ @@ -320,8 +324,41 @@ FF3E ; Other_Math # Sk FULLWIDTH CIRCUMFLEX ACCENT 1D7AA..1D7C2 ; Other_Math # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA 1D7C4..1D7CB ; Other_Math # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1D7CE..1D7FF ; Other_Math # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE - -# Total code points: 1217 +1EE00..1EE03 ; Other_Math # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; Other_Math # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; Other_Math # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; Other_Math # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; Other_Math # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; Other_Math # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; Other_Math # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; Other_Math # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; Other_Math # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; Other_Math # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; Other_Math # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; Other_Math # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; Other_Math # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; Other_Math # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; Other_Math # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; Other_Math # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; Other_Math # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; Other_Math # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; Other_Math # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; Other_Math # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; Other_Math # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; Other_Math # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; Other_Math # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN + +# Total code points: 1358 # ================================================ @@ -365,6 +402,8 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 081B..0823 ; Other_Alphabetic # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A 0825..0827 ; Other_Alphabetic # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082C ; Other_Alphabetic # Mn [4] SAMARITAN VOWEL SIGN LONG I..SAMARITAN VOWEL SIGN SUKUN +08E4..08E9 ; Other_Alphabetic # Mn [6] ARABIC CURLY FATHA..ARABIC CURLY KASRATAN +08F0..08FE ; Other_Alphabetic # Mn [15] ARABIC OPEN FATHATAN..ARABIC DAMMA WITH DOT 0900..0902 ; Other_Alphabetic # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 0903 ; Other_Alphabetic # Mc DEVANAGARI SIGN VISARGA 093A ; Other_Alphabetic # Mn DEVANAGARI VOWEL SIGN OE @@ -525,6 +564,7 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 1BA2..1BA5 ; Other_Alphabetic # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU 1BA6..1BA7 ; Other_Alphabetic # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG 1BA8..1BA9 ; Other_Alphabetic # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAC..1BAD ; Other_Alphabetic # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA 1BE7 ; Other_Alphabetic # Mc BATAK VOWEL SIGN E 1BE8..1BE9 ; Other_Alphabetic # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE 1BEA..1BEC ; Other_Alphabetic # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O @@ -534,9 +574,11 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 1C24..1C2B ; Other_Alphabetic # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU 1C2C..1C33 ; Other_Alphabetic # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T 1C34..1C35 ; Other_Alphabetic # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG -1CF2 ; Other_Alphabetic # Mc VEDIC SIGN ARDHAVISARGA +1CF2..1CF3 ; Other_Alphabetic # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA 24B6..24E9 ; Other_Alphabetic # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z 2DE0..2DFF ; Other_Alphabetic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS +A674..A67B ; Other_Alphabetic # Mn [8] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC LETTER OMEGA +A69F ; Other_Alphabetic # Mn COMBINING CYRILLIC LETTER IOTIFIED E A823..A824 ; Other_Alphabetic # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I A825..A826 ; Other_Alphabetic # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E A827 ; Other_Alphabetic # Mc SYLOTI NAGRI VOWEL SIGN OO @@ -564,6 +606,10 @@ AAB0 ; Other_Alphabetic # Mn TAI VIET MAI KANG AAB2..AAB4 ; Other_Alphabetic # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U AAB7..AAB8 ; Other_Alphabetic # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA AABE ; Other_Alphabetic # Mn TAI VIET VOWEL AM +AAEB ; Other_Alphabetic # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; Other_Alphabetic # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF5 ; Other_Alphabetic # Mc MEETEI MAYEK VOWEL SIGN VISARGA ABE3..ABE4 ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP ABE5 ; Other_Alphabetic # Mn MEETEI MAYEK VOWEL SIGN ANAP ABE6..ABE7 ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP @@ -581,8 +627,23 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 110B0..110B2 ; Other_Alphabetic # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II 110B3..110B6 ; Other_Alphabetic # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B7..110B8 ; Other_Alphabetic # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU - -# Total code points: 795 +11100..11102 ; Other_Alphabetic # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; Other_Alphabetic # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; Other_Alphabetic # Mc CHAKMA VOWEL SIGN E +1112D..11132 ; Other_Alphabetic # Mn [6] CHAKMA VOWEL SIGN AI..CHAKMA AU MARK +11180..11181 ; Other_Alphabetic # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; Other_Alphabetic # Mc SHARADA SIGN VISARGA +111B3..111B5 ; Other_Alphabetic # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; Other_Alphabetic # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF ; Other_Alphabetic # Mc SHARADA VOWEL SIGN AU +116AB ; Other_Alphabetic # Mn TAKRI SIGN ANUSVARA +116AC ; Other_Alphabetic # Mc TAKRI SIGN VISARGA +116AD ; Other_Alphabetic # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; Other_Alphabetic # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; Other_Alphabetic # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +16F51..16F7E ; Other_Alphabetic # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG + +# Total code points: 922 # ================================================ @@ -591,16 +652,15 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 3021..3029 ; Ideographic # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE 3038..303A ; Ideographic # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY 3400..4DB5 ; Ideographic # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 -4E00..9FCB ; Ideographic # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB -F900..FA2D ; Ideographic # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; Ideographic # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +4E00..9FCC ; Ideographic # Lo [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC +F900..FA6D ; Ideographic # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 20000..2A6D6 ; Ideographic # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 2A700..2B734 ; Ideographic # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 2B740..2B81D ; Ideographic # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2F800..2FA1D ; Ideographic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 75630 +# Total code points: 75633 # ================================================ @@ -645,6 +705,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 07EB..07F3 ; Diacritic # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE 07F4..07F5 ; Diacritic # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE 0818..0819 ; Diacritic # Mn [2] SAMARITAN MARK OCCLUSION..SAMARITAN MARK DAGESH +08E4..08FE ; Diacritic # Mn [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT 093C ; Diacritic # Mn DEVANAGARI SIGN NUKTA 094D ; Diacritic # Mn DEVANAGARI SIGN VIRAMA 0951..0954 ; Diacritic # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT @@ -689,6 +750,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 1B44 ; Diacritic # Mc BALINESE ADEG ADEG 1B6B..1B73 ; Diacritic # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG 1BAA ; Diacritic # Mc SUNDANESE SIGN PAMAAEH +1BAB ; Diacritic # Mn SUNDANESE SIGN VIRAMA 1C36..1C37 ; Diacritic # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA 1C78..1C7D ; Diacritic # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD 1CD0..1CD2 ; Diacritic # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA @@ -697,8 +759,8 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 1CE1 ; Diacritic # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA 1CE2..1CE8 ; Diacritic # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL 1CED ; Diacritic # Mn VEDIC SIGN TIRYAK -1D2C..1D61 ; Diacritic # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D6A ; Diacritic # L& [9] LATIN SUBSCRIPT SMALL LETTER I..GREEK SUBSCRIPT SMALL LETTER CHI +1CF4 ; Diacritic # Mn VEDIC TONE CANDRA ABOVE +1D2C..1D6A ; Diacritic # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI 1DC4..1DCF ; Diacritic # Mn [12] COMBINING MACRON-ACUTE..COMBINING ZIGZAG BELOW 1DFD..1DFF ; Diacritic # Mn [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW 1FBD ; Diacritic # Sk GREEK KORONIS @@ -709,7 +771,8 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM 1FFD..1FFE ; Diacritic # Sk [2] GREEK OXIA..GREEK DASIA 2CEF..2CF1 ; Diacritic # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS 2E2F ; Diacritic # Lm VERTICAL TILDE -302A..302F ; Diacritic # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK +302A..302D ; Diacritic # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; Diacritic # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK 3099..309A ; Diacritic # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK 309B..309C ; Diacritic # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK 30FC ; Diacritic # Lm KATAKANA-HIRAGANA PROLONGED SOUND MARK @@ -720,6 +783,7 @@ A6F0..A6F1 ; Diacritic # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINI A717..A71F ; Diacritic # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK A720..A721 ; Diacritic # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE A788 ; Diacritic # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT +A7F8..A7F9 ; Diacritic # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A8C4 ; Diacritic # Mn SAURASHTRA SIGN VIRAMA A8E0..A8F1 ; Diacritic # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA A92B..A92D ; Diacritic # Mn [3] KAYAH LI TONE PLOPHU..KAYAH LI TONE CALYA PLOPHU @@ -732,6 +796,7 @@ AABF ; Diacritic # Mn TAI VIET TONE MAI EK AAC0 ; Diacritic # Lo TAI VIET TONE MAI NUENG AAC1 ; Diacritic # Mn TAI VIET TONE MAI THO AAC2 ; Diacritic # Lo TAI VIET TONE MAI SONG +AAF6 ; Diacritic # Mn MEETEI MAYEK VIRAMA ABEC ; Diacritic # Mc MEETEI MAYEK LUM IYEK ABED ; Diacritic # Mn MEETEI MAYEK APUN IYEK FB1E ; Diacritic # Mn HEBREW POINT JUDEO-SPANISH VARIKA @@ -742,13 +807,19 @@ FF70 ; Diacritic # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND FF9E..FF9F ; Diacritic # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK FFE3 ; Diacritic # Sk FULLWIDTH MACRON 110B9..110BA ; Diacritic # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +11133..11134 ; Diacritic # Mn [2] CHAKMA VIRAMA..CHAKMA MAAYYAA +111C0 ; Diacritic # Mc SHARADA SIGN VIRAMA +116B6 ; Diacritic # Mc TAKRI SIGN VIRAMA +116B7 ; Diacritic # Mn TAKRI SIGN NUKTA +16F8F..16F92 ; Diacritic # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16F93..16F9F ; Diacritic # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 1D167..1D169 ; Diacritic # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 1D16D..1D172 ; Diacritic # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 1D17B..1D182 ; Diacritic # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE 1D185..1D18B ; Diacritic # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE 1D1AA..1D1AD ; Diacritic # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO -# Total code points: 639 +# Total code points: 693 # ================================================ @@ -758,6 +829,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 07FA ; Extender # Lm NKO LAJANYALAN 0E46 ; Extender # Lm THAI CHARACTER MAIYAMOK 0EC6 ; Extender # Lm LAO KO LA +180A ; Extender # Po MONGOLIAN NIRUGU 1843 ; Extender # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN 1AA7 ; Extender # Lm TAI THAM SIGN MAI YAMOK 1C36 ; Extender # Mn LEPCHA SIGN RAN @@ -771,27 +843,33 @@ A60C ; Extender # Lm VAI SYLLABLE LENGTHENER A9CF ; Extender # Lm JAVANESE PANGRANGKEP AA70 ; Extender # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION AADD ; Extender # Lm TAI VIET SYMBOL SAM +AAF3..AAF4 ; Extender # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK -# Total code points: 28 +# Total code points: 31 # ================================================ +00AA ; Other_Lowercase # Lo FEMININE ORDINAL INDICATOR +00BA ; Other_Lowercase # Lo MASCULINE ORDINAL INDICATOR 02B0..02B8 ; Other_Lowercase # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y 02C0..02C1 ; Other_Lowercase # Lm [2] MODIFIER LETTER GLOTTAL STOP..MODIFIER LETTER REVERSED GLOTTAL STOP 02E0..02E4 ; Other_Lowercase # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP 0345 ; Other_Lowercase # Mn COMBINING GREEK YPOGEGRAMMENI 037A ; Other_Lowercase # Lm GREEK YPOGEGRAMMENI -1D2C..1D61 ; Other_Lowercase # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI +1D2C..1D6A ; Other_Lowercase # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI 1D78 ; Other_Lowercase # Lm MODIFIER LETTER CYRILLIC EN 1D9B..1DBF ; Other_Lowercase # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA -2090..2094 ; Other_Lowercase # Lm [5] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA +2071 ; Other_Lowercase # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; Other_Lowercase # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; Other_Lowercase # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T 2170..217F ; Other_Lowercase # Nl [16] SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL ONE THOUSAND 24D0..24E9 ; Other_Lowercase # So [26] CIRCLED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z -2C7D ; Other_Lowercase # Lm MODIFIER LETTER CAPITAL V +2C7C..2C7D ; Other_Lowercase # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V A770 ; Other_Lowercase # Lm MODIFIER LETTER US +A7F8..A7F9 ; Other_Lowercase # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE -# Total code points: 159 +# Total code points: 183 # ================================================ @@ -838,11 +916,12 @@ FFFFE..FFFFF ; Noncharacter_Code_Point # Cn [2] <noncharacter-FFFFE>..<noncha 0DCF ; Other_Grapheme_Extend # Mc SINHALA VOWEL SIGN AELA-PILLA 0DDF ; Other_Grapheme_Extend # Mc SINHALA VOWEL SIGN GAYANUKITTA 200C..200D ; Other_Grapheme_Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER +302E..302F ; Other_Grapheme_Extend # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK FF9E..FF9F ; Other_Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK 1D165 ; Other_Grapheme_Extend # Mc MUSICAL SYMBOL COMBINING STEM 1D16E..1D172 ; Other_Grapheme_Extend # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5 -# Total code points: 23 +# Total code points: 25 # ================================================ @@ -868,7 +947,7 @@ FF9E..FF9F ; Other_Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND # ================================================ 3400..4DB5 ; Unified_Ideograph # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 -4E00..9FCB ; Unified_Ideograph # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB +4E00..9FCC ; Unified_Ideograph # Lo [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC FA0E..FA0F ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA0E..CJK COMPATIBILITY IDEOGRAPH-FA0F FA11 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA11 FA13..FA14 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA13..CJK COMPATIBILITY IDEOGRAPH-FA14 @@ -880,12 +959,13 @@ FA27..FA29 ; Unified_Ideograph # Lo [3] CJK COMPATIBILITY IDEOGRAPH-FA27..C 2A700..2B734 ; Unified_Ideograph # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 2B740..2B81D ; Unified_Ideograph # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D -# Total code points: 74616 +# Total code points: 74617 # ================================================ 034F ; Other_Default_Ignorable_Code_Point # Mn COMBINING GRAPHEME JOINER 115F..1160 ; Other_Default_Ignorable_Code_Point # Lo [2] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG FILLER +17B4..17B5 ; Other_Default_Ignorable_Code_Point # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 2065..2069 ; Other_Default_Ignorable_Code_Point # Cn [5] <reserved-2065>..<reserved-2069> 3164 ; Other_Default_Ignorable_Code_Point # Lo HANGUL FILLER FFA0 ; Other_Default_Ignorable_Code_Point # Lo HALFWIDTH HANGUL FILLER @@ -895,7 +975,7 @@ E0002..E001F ; Other_Default_Ignorable_Code_Point # Cn [30] <reserved-E0002>.. E0080..E00FF ; Other_Default_Ignorable_Code_Point # Cn [128] <reserved-E0080>..<reserved-E00FF> E01F0..E0FFF ; Other_Default_Ignorable_Code_Point # Cn [3600] <reserved-E01F0>..<reserved-E0FFF> -# Total code points: 3778 +# Total code points: 3780 # ================================================ @@ -923,7 +1003,7 @@ E0020..E007F ; Deprecated # Cf [96] TAG SPACE..CANCEL TAG 03F3 ; Soft_Dotted # L& GREEK LETTER YOT 0456 ; Soft_Dotted # L& CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I 0458 ; Soft_Dotted # L& CYRILLIC SMALL LETTER JE -1D62 ; Soft_Dotted # L& LATIN SUBSCRIPT SMALL LETTER I +1D62 ; Soft_Dotted # Lm LATIN SUBSCRIPT SMALL LETTER I 1D96 ; Soft_Dotted # L& LATIN SMALL LETTER I WITH RETROFLEX HOOK 1DA4 ; Soft_Dotted # Lm MODIFIER LETTER SMALL I WITH STROKE 1DA8 ; Soft_Dotted # Lm MODIFIER LETTER SMALL J WITH CROSSED-TAIL @@ -931,7 +1011,7 @@ E0020..E007F ; Deprecated # Cf [96] TAG SPACE..CANCEL TAG 1ECB ; Soft_Dotted # L& LATIN SMALL LETTER I WITH DOT BELOW 2071 ; Soft_Dotted # Lm SUPERSCRIPT LATIN SMALL LETTER I 2148..2149 ; Soft_Dotted # L& [2] DOUBLE-STRUCK ITALIC SMALL I..DOUBLE-STRUCK ITALIC SMALL J -2C7C ; Soft_Dotted # L& LATIN SUBSCRIPT SMALL LETTER J +2C7C ; Soft_Dotted # Lm LATIN SUBSCRIPT SMALL LETTER J 1D422..1D423 ; Soft_Dotted # L& [2] MATHEMATICAL BOLD SMALL I..MATHEMATICAL BOLD SMALL J 1D456..1D457 ; Soft_Dotted # L& [2] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL ITALIC SMALL J 1D48A..1D48B ; Soft_Dotted # L& [2] MATHEMATICAL BOLD ITALIC SMALL I..MATHEMATICAL BOLD ITALIC SMALL J @@ -1014,6 +1094,7 @@ A8CE..A8CF ; STerm # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA A92F ; STerm # Po KAYAH LI SIGN SHYA A9C8..A9C9 ; STerm # Po [2] JAVANESE PADA LINGSA..JAVANESE PADA LUNGSI AA5D..AA5F ; STerm # Po [3] CHAM PUNCTUATION DANDA..CHAM PUNCTUATION TRIPLE DANDA +AAF0..AAF1 ; STerm # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM ABEB ; STerm # Po MEETEI MAYEK CHEIKHEI FE52 ; STerm # Po SMALL FULL STOP FE56..FE57 ; STerm # Po [2] SMALL QUESTION MARK..SMALL EXCLAMATION MARK @@ -1024,8 +1105,10 @@ FF61 ; STerm # Po HALFWIDTH IDEOGRAPHIC FULL STOP 10A56..10A57 ; STerm # Po [2] KHAROSHTHI PUNCTUATION DANDA..KHAROSHTHI PUNCTUATION DOUBLE DANDA 11047..11048 ; STerm # Po [2] BRAHMI DANDA..BRAHMI DOUBLE DANDA 110BE..110C1 ; STerm # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +11141..11143 ; STerm # Po [3] CHAKMA DANDA..CHAKMA QUESTION MARK +111C5..111C6 ; STerm # Po [2] SHARADA DANDA..SHARADA DOUBLE DANDA -# Total code points: 76 +# Total code points: 83 # ================================================ @@ -1072,14 +1155,15 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S 007E ; Pattern_Syntax # Sm TILDE 00A1 ; Pattern_Syntax # Po INVERTED EXCLAMATION MARK 00A2..00A5 ; Pattern_Syntax # Sc [4] CENT SIGN..YEN SIGN -00A6..00A7 ; Pattern_Syntax # So [2] BROKEN BAR..SECTION SIGN +00A6 ; Pattern_Syntax # So BROKEN BAR +00A7 ; Pattern_Syntax # Po SECTION SIGN 00A9 ; Pattern_Syntax # So COPYRIGHT SIGN 00AB ; Pattern_Syntax # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 00AC ; Pattern_Syntax # Sm NOT SIGN 00AE ; Pattern_Syntax # So REGISTERED SIGN 00B0 ; Pattern_Syntax # So DEGREE SIGN 00B1 ; Pattern_Syntax # Sm PLUS-MINUS SIGN -00B6 ; Pattern_Syntax # So PILCROW SIGN +00B6 ; Pattern_Syntax # Po PILCROW SIGN 00BB ; Pattern_Syntax # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 00BF ; Pattern_Syntax # Po INVERTED QUESTION MARK 00D7 ; Pattern_Syntax # Sm MULTIPLICATION SIGN @@ -1173,11 +1257,7 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S 27C0..27C4 ; Pattern_Syntax # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET 27C5 ; Pattern_Syntax # Ps LEFT S-SHAPED BAG DELIMITER 27C6 ; Pattern_Syntax # Pe RIGHT S-SHAPED BAG DELIMITER -27C7..27CA ; Pattern_Syntax # Sm [4] OR WITH DOT INSIDE..VERTICAL BAR WITH HORIZONTAL STROKE -27CB ; Pattern_Syntax # Cn <reserved-27CB> -27CC ; Pattern_Syntax # Sm LONG DIVISION -27CD ; Pattern_Syntax # Cn <reserved-27CD> -27CE..27E5 ; Pattern_Syntax # Sm [24] SQUARED LOGICAL AND..WHITE SQUARE WITH RIGHTWARDS TICK +27C7..27E5 ; Pattern_Syntax # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK 27E6 ; Pattern_Syntax # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET 27E7 ; Pattern_Syntax # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET 27E8 ; Pattern_Syntax # Ps MATHEMATICAL LEFT ANGLE BRACKET @@ -1260,8 +1340,9 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S 2E29 ; Pattern_Syntax # Pe RIGHT DOUBLE PARENTHESIS 2E2A..2E2E ; Pattern_Syntax # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK 2E2F ; Pattern_Syntax # Lm VERTICAL TILDE -2E30..2E31 ; Pattern_Syntax # Po [2] RING POINT..WORD SEPARATOR MIDDLE DOT -2E32..2E7F ; Pattern_Syntax # Cn [78] <reserved-2E32>..<reserved-2E7F> +2E30..2E39 ; Pattern_Syntax # Po [10] RING POINT..TOP HALF SECTION SIGN +2E3A..2E3B ; Pattern_Syntax # Pd [2] TWO-EM DASH..THREE-EM DASH +2E3C..2E7F ; Pattern_Syntax # Cn [68] <reserved-2E3C>..<reserved-2E7F> 3001..3003 ; Pattern_Syntax # Po [3] IDEOGRAPHIC COMMA..DITTO MARK 3008 ; Pattern_Syntax # Ps LEFT ANGLE BRACKET 3009 ; Pattern_Syntax # Pe RIGHT ANGLE BRACKET diff --git a/lib/unicore/PropValueAliases.txt b/lib/unicore/PropValueAliases.txt index 819e800e13..2f7bde28ec 100644 --- a/lib/unicore/PropValueAliases.txt +++ b/lib/unicore/PropValueAliases.txt @@ -1,15 +1,14 @@ -# PropertyValueAliases-6.0.0.txt -# Date: 2010-07-17, 22:44:06 GMT [MD] +# PropertyValueAliases-6.1.0.txt +# Date: 2011-12-07, 23:40:57 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # # This file contains aliases for property values used in the UCD. # These names can be used for XML formats of UCD data, for regular-expression # property tests, and other programmatic textual descriptions of Unicode data. -# For information on which properties are normative, see UCD.html. # # The names may be translated in appropriate environments, and additional # aliases may be useful. @@ -23,7 +22,6 @@ # property value name is used. # # Second Field: The second field is an abbreviated name. -# If there is no abbreviated name available, the field is marked with "n/a". # # Third Field: The third field is a long name. # @@ -35,7 +33,7 @@ # Loose matching should be applied to all property names and property values, with # the exception of String Property values. With loose matching of property names and # values, the case distinctions, whitespace, and '_' are ignored. For Numeric Property -# values, numeric equivalences are applied: thus "01.00" is equivalent to "1". +# values, numeric equivalencies are applied: thus "01.00" is equivalent to "1". # # NOTE: Property value names are NOT unique across properties. For example: # @@ -57,61 +55,62 @@ # ASCII_Hex_Digit (AHex) -AHex; N ; No ; F ; False -AHex; Y ; Yes ; T ; True +AHex; N ; No ; F ; False +AHex; Y ; Yes ; T ; True # Age (age) -age; n/a ; 1.1 -age; n/a ; 2.0 -age; n/a ; 2.1 -age; n/a ; 3.0 -age; n/a ; 3.1 -age; n/a ; 3.2 -age; n/a ; 4.0 -age; n/a ; 4.1 -age; n/a ; 5.0 -age; n/a ; 5.1 -age; n/a ; 5.2 -age; n/a ; 6.0 -age; n/a ; unassigned +age; 1.1 ; V1_1 +age; 2.0 ; V2_0 +age; 2.1 ; V2_1 +age; 3.0 ; V3_0 +age; 3.1 ; V3_1 +age; 3.2 ; V3_2 +age; 4.0 ; V4_0 +age; 4.1 ; V4_1 +age; 5.0 ; V5_0 +age; 5.1 ; V5_1 +age; 5.2 ; V5_2 +age; 6.0 ; V6_0 +age; 6.1 ; V6_1 +age; NA ; Unassigned # Alphabetic (Alpha) -Alpha; N ; No ; F ; False -Alpha; Y ; Yes ; T ; True +Alpha; N ; No ; F ; False +Alpha; Y ; Yes ; T ; True # Bidi_Class (bc) -bc ; AL ; Arabic_Letter -bc ; AN ; Arabic_Number -bc ; B ; Paragraph_Separator -bc ; BN ; Boundary_Neutral -bc ; CS ; Common_Separator -bc ; EN ; European_Number -bc ; ES ; European_Separator -bc ; ET ; European_Terminator -bc ; L ; Left_To_Right -bc ; LRE ; Left_To_Right_Embedding -bc ; LRO ; Left_To_Right_Override -bc ; NSM ; Nonspacing_Mark -bc ; ON ; Other_Neutral -bc ; PDF ; Pop_Directional_Format -bc ; R ; Right_To_Left -bc ; RLE ; Right_To_Left_Embedding -bc ; RLO ; Right_To_Left_Override -bc ; S ; Segment_Separator -bc ; WS ; White_Space +bc ; AL ; Arabic_Letter +bc ; AN ; Arabic_Number +bc ; B ; Paragraph_Separator +bc ; BN ; Boundary_Neutral +bc ; CS ; Common_Separator +bc ; EN ; European_Number +bc ; ES ; European_Separator +bc ; ET ; European_Terminator +bc ; L ; Left_To_Right +bc ; LRE ; Left_To_Right_Embedding +bc ; LRO ; Left_To_Right_Override +bc ; NSM ; Nonspacing_Mark +bc ; ON ; Other_Neutral +bc ; PDF ; Pop_Directional_Format +bc ; R ; Right_To_Left +bc ; RLE ; Right_To_Left_Embedding +bc ; RLO ; Right_To_Left_Override +bc ; S ; Segment_Separator +bc ; WS ; White_Space # Bidi_Control (Bidi_C) -Bidi_C; N ; No ; F ; False -Bidi_C; Y ; Yes ; T ; True +Bidi_C; N ; No ; F ; False +Bidi_C; Y ; Yes ; T ; True # Bidi_Mirrored (Bidi_M) -Bidi_M; N ; No ; F ; False -Bidi_M; Y ; Yes ; T ; True +Bidi_M; N ; No ; F ; False +Bidi_M; Y ; Yes ; T ; True # Bidi_Mirroring_Glyph (bmg) @@ -119,239 +118,286 @@ Bidi_M; Y ; Yes ; T # Block (blk) -blk; n/a ; Aegean_Numbers -blk; n/a ; Alchemical_Symbols -blk; n/a ; Alphabetic_Presentation_Forms -blk; n/a ; Ancient_Greek_Musical_Notation -blk; n/a ; Ancient_Greek_Numbers -blk; n/a ; Ancient_Symbols -blk; n/a ; Arabic -blk; n/a ; Arabic_Presentation_Forms_A ; Arabic_Presentation_Forms-A -blk; n/a ; Arabic_Presentation_Forms_B -blk; n/a ; Arabic_Supplement -blk; n/a ; Armenian -blk; n/a ; Arrows -blk; n/a ; Avestan -blk; n/a ; Balinese -blk; n/a ; Bamum -blk; n/a ; Bamum_Supplement -blk; n/a ; Basic_Latin ; ASCII -blk; n/a ; Batak -blk; n/a ; Bengali -blk; n/a ; Block_Elements -blk; n/a ; Bopomofo -blk; n/a ; Bopomofo_Extended -blk; n/a ; Box_Drawing -blk; n/a ; Brahmi -blk; n/a ; Braille_Patterns -blk; n/a ; Buginese -blk; n/a ; Buhid -blk; n/a ; Byzantine_Musical_Symbols -blk; n/a ; Carian -blk; n/a ; Cham -blk; n/a ; Cherokee -blk; n/a ; CJK_Compatibility -blk; n/a ; CJK_Compatibility_Forms -blk; n/a ; CJK_Compatibility_Ideographs -blk; n/a ; CJK_Compatibility_Ideographs_Supplement -blk; n/a ; CJK_Radicals_Supplement -blk; n/a ; CJK_Strokes -blk; n/a ; CJK_Symbols_And_Punctuation -blk; n/a ; CJK_Unified_Ideographs -blk; n/a ; CJK_Unified_Ideographs_Extension_A -blk; n/a ; CJK_Unified_Ideographs_Extension_B -blk; n/a ; CJK_Unified_Ideographs_Extension_C -blk; n/a ; CJK_Unified_Ideographs_Extension_D -blk; n/a ; Combining_Diacritical_Marks -blk; n/a ; Combining_Diacritical_Marks_For_Symbols; Combining_Marks_For_Symbols -blk; n/a ; Combining_Diacritical_Marks_Supplement -blk; n/a ; Combining_Half_Marks -blk; n/a ; Common_Indic_Number_Forms -blk; n/a ; Control_Pictures -blk; n/a ; Coptic -blk; n/a ; Counting_Rod_Numerals -blk; n/a ; Cuneiform -blk; n/a ; Cuneiform_Numbers_And_Punctuation -blk; n/a ; Currency_Symbols -blk; n/a ; Cypriot_Syllabary -blk; n/a ; Cyrillic -blk; n/a ; Cyrillic_Extended_A -blk; n/a ; Cyrillic_Extended_B -blk; n/a ; Cyrillic_Supplement ; Cyrillic_Supplementary -blk; n/a ; Deseret -blk; n/a ; Devanagari -blk; n/a ; Devanagari_Extended -blk; n/a ; Dingbats -blk; n/a ; Domino_Tiles -blk; n/a ; Egyptian_Hieroglyphs -blk; n/a ; Emoticons -blk; n/a ; Enclosed_Alphanumeric_Supplement -blk; n/a ; Enclosed_Alphanumerics -blk; n/a ; Enclosed_CJK_Letters_And_Months -blk; n/a ; Enclosed_Ideographic_Supplement -blk; n/a ; Ethiopic -blk; n/a ; Ethiopic_Extended -blk; n/a ; Ethiopic_Extended_A -blk; n/a ; Ethiopic_Supplement -blk; n/a ; General_Punctuation -blk; n/a ; Geometric_Shapes -blk; n/a ; Georgian -blk; n/a ; Georgian_Supplement -blk; n/a ; Glagolitic -blk; n/a ; Gothic -blk; n/a ; Greek_And_Coptic ; Greek -blk; n/a ; Greek_Extended -blk; n/a ; Gujarati -blk; n/a ; Gurmukhi -blk; n/a ; Halfwidth_And_Fullwidth_Forms -blk; n/a ; Hangul_Compatibility_Jamo -blk; n/a ; Hangul_Jamo -blk; n/a ; Hangul_Jamo_Extended_A -blk; n/a ; Hangul_Jamo_Extended_B -blk; n/a ; Hangul_Syllables -blk; n/a ; Hanunoo -blk; n/a ; Hebrew -blk; n/a ; High_Private_Use_Surrogates -blk; n/a ; High_Surrogates -blk; n/a ; Hiragana -blk; n/a ; Ideographic_Description_Characters -blk; n/a ; Imperial_Aramaic -blk; n/a ; Inscriptional_Pahlavi -blk; n/a ; Inscriptional_Parthian -blk; n/a ; IPA_Extensions -blk; n/a ; Javanese -blk; n/a ; Kaithi -blk; n/a ; Kana_Supplement -blk; n/a ; Kanbun -blk; n/a ; Kangxi_Radicals -blk; n/a ; Kannada -blk; n/a ; Katakana -blk; n/a ; Katakana_Phonetic_Extensions -blk; n/a ; Kayah_Li -blk; n/a ; Kharoshthi -blk; n/a ; Khmer -blk; n/a ; Khmer_Symbols -blk; n/a ; Lao -blk; n/a ; Latin_1_Supplement ; Latin_1 -blk; n/a ; Latin_Extended_A -blk; n/a ; Latin_Extended_Additional -blk; n/a ; Latin_Extended_B -blk; n/a ; Latin_Extended_C -blk; n/a ; Latin_Extended_D -blk; n/a ; Lepcha -blk; n/a ; Letterlike_Symbols -blk; n/a ; Limbu -blk; n/a ; Linear_B_Ideograms -blk; n/a ; Linear_B_Syllabary -blk; n/a ; Lisu -blk; n/a ; Low_Surrogates -blk; n/a ; Lycian -blk; n/a ; Lydian -blk; n/a ; Mahjong_Tiles -blk; n/a ; Malayalam -blk; n/a ; Mandaic -blk; n/a ; Mathematical_Alphanumeric_Symbols -blk; n/a ; Mathematical_Operators -blk; n/a ; Meetei_Mayek -blk; n/a ; Miscellaneous_Mathematical_Symbols_A -blk; n/a ; Miscellaneous_Mathematical_Symbols_B -blk; n/a ; Miscellaneous_Symbols -blk; n/a ; Miscellaneous_Symbols_And_Arrows -blk; n/a ; Miscellaneous_Symbols_And_Pictographs -blk; n/a ; Miscellaneous_Technical -blk; n/a ; Modifier_Tone_Letters -blk; n/a ; Mongolian -blk; n/a ; Musical_Symbols -blk; n/a ; Myanmar -blk; n/a ; Myanmar_Extended_A -blk; n/a ; New_Tai_Lue -blk; n/a ; NKo -blk; n/a ; No_Block -blk; n/a ; Number_Forms -blk; n/a ; Ogham -blk; n/a ; Ol_Chiki -blk; n/a ; Old_Italic -blk; n/a ; Old_Persian -blk; n/a ; Old_South_Arabian -blk; n/a ; Old_Turkic -blk; n/a ; Optical_Character_Recognition -blk; n/a ; Oriya -blk; n/a ; Osmanya -blk; n/a ; Phags_Pa -blk; n/a ; Phaistos_Disc -blk; n/a ; Phoenician -blk; n/a ; Phonetic_Extensions -blk; n/a ; Phonetic_Extensions_Supplement -blk; n/a ; Playing_Cards -blk; n/a ; Private_Use_Area ; Private_Use -blk; n/a ; Rejang -blk; n/a ; Rumi_Numeral_Symbols -blk; n/a ; Runic -blk; n/a ; Samaritan -blk; n/a ; Saurashtra -blk; n/a ; Shavian -blk; n/a ; Sinhala -blk; n/a ; Small_Form_Variants -blk; n/a ; Spacing_Modifier_Letters -blk; n/a ; Specials -blk; n/a ; Sundanese -blk; n/a ; Superscripts_And_Subscripts -blk; n/a ; Supplemental_Arrows_A -blk; n/a ; Supplemental_Arrows_B -blk; n/a ; Supplemental_Mathematical_Operators -blk; n/a ; Supplemental_Punctuation -blk; n/a ; Supplementary_Private_Use_Area_A -blk; n/a ; Supplementary_Private_Use_Area_B -blk; n/a ; Syloti_Nagri -blk; n/a ; Syriac -blk; n/a ; Tagalog -blk; n/a ; Tagbanwa -blk; n/a ; Tags -blk; n/a ; Tai_Le -blk; n/a ; Tai_Tham -blk; n/a ; Tai_Viet -blk; n/a ; Tai_Xuan_Jing_Symbols -blk; n/a ; Tamil -blk; n/a ; Telugu -blk; n/a ; Thaana -blk; n/a ; Thai -blk; n/a ; Tibetan -blk; n/a ; Tifinagh -blk; n/a ; Transport_And_Map_Symbols -blk; n/a ; Ugaritic -blk; n/a ; Unified_Canadian_Aboriginal_Syllabics; Canadian_Syllabics -blk; n/a ; Unified_Canadian_Aboriginal_Syllabics_Extended -blk; n/a ; Vai -blk; n/a ; Variation_Selectors -blk; n/a ; Variation_Selectors_Supplement -blk; n/a ; Vedic_Extensions -blk; n/a ; Vertical_Forms -blk; n/a ; Yi_Radicals -blk; n/a ; Yi_Syllables -blk; n/a ; Yijing_Hexagram_Symbols +blk; Aegean_Numbers ; Aegean_Numbers +blk; Alchemical ; Alchemical_Symbols +blk; Alphabetic_PF ; Alphabetic_Presentation_Forms +blk; Ancient_Greek_Music ; Ancient_Greek_Musical_Notation +blk; Ancient_Greek_Numbers ; Ancient_Greek_Numbers +blk; Ancient_Symbols ; Ancient_Symbols +blk; Arabic ; Arabic +blk; Arabic_Ext_A ; Arabic_Extended_A +blk; Arabic_Math ; Arabic_Mathematical_Alphabetic_Symbols +blk; Arabic_PF_A ; Arabic_Presentation_Forms_A ; Arabic_Presentation_Forms-A +blk; Arabic_PF_B ; Arabic_Presentation_Forms_B +blk; Arabic_Sup ; Arabic_Supplement +blk; Armenian ; Armenian +blk; Arrows ; Arrows +blk; ASCII ; Basic_Latin +blk; Avestan ; Avestan +blk; Balinese ; Balinese +blk; Bamum ; Bamum +blk; Bamum_Sup ; Bamum_Supplement +blk; Batak ; Batak +blk; Bengali ; Bengali +blk; Block_Elements ; Block_Elements +blk; Bopomofo ; Bopomofo +blk; Bopomofo_Ext ; Bopomofo_Extended +blk; Box_Drawing ; Box_Drawing +blk; Brahmi ; Brahmi +blk; Braille ; Braille_Patterns +blk; Buginese ; Buginese +blk; Buhid ; Buhid +blk; Byzantine_Music ; Byzantine_Musical_Symbols +blk; Carian ; Carian +blk; Chakma ; Chakma +blk; Cham ; Cham +blk; Cherokee ; Cherokee +blk; CJK ; CJK_Unified_Ideographs +blk; CJK_Compat ; CJK_Compatibility +blk; CJK_Compat_Forms ; CJK_Compatibility_Forms +blk; CJK_Compat_Ideographs ; CJK_Compatibility_Ideographs +blk; CJK_Compat_Ideographs_Sup ; CJK_Compatibility_Ideographs_Supplement +blk; CJK_Ext_A ; CJK_Unified_Ideographs_Extension_A +blk; CJK_Ext_B ; CJK_Unified_Ideographs_Extension_B +blk; CJK_Ext_C ; CJK_Unified_Ideographs_Extension_C +blk; CJK_Ext_D ; CJK_Unified_Ideographs_Extension_D +blk; CJK_Radicals_Sup ; CJK_Radicals_Supplement +blk; CJK_Strokes ; CJK_Strokes +blk; CJK_Symbols ; CJK_Symbols_And_Punctuation +blk; Compat_Jamo ; Hangul_Compatibility_Jamo +blk; Control_Pictures ; Control_Pictures +blk; Coptic ; Coptic +blk; Counting_Rod ; Counting_Rod_Numerals +blk; Cuneiform ; Cuneiform +blk; Cuneiform_Numbers ; Cuneiform_Numbers_And_Punctuation +blk; Currency_Symbols ; Currency_Symbols +blk; Cypriot_Syllabary ; Cypriot_Syllabary +blk; Cyrillic ; Cyrillic +blk; Cyrillic_Ext_A ; Cyrillic_Extended_A +blk; Cyrillic_Ext_B ; Cyrillic_Extended_B +blk; Cyrillic_Sup ; Cyrillic_Supplement ; Cyrillic_Supplementary +blk; Deseret ; Deseret +blk; Devanagari ; Devanagari +blk; Devanagari_Ext ; Devanagari_Extended +blk; Diacriticals ; Combining_Diacritical_Marks +blk; Diacriticals_For_Symbols ; Combining_Diacritical_Marks_For_Symbols; Combining_Marks_For_Symbols +blk; Diacriticals_Sup ; Combining_Diacritical_Marks_Supplement +blk; Dingbats ; Dingbats +blk; Domino ; Domino_Tiles +blk; Egyptian_Hieroglyphs ; Egyptian_Hieroglyphs +blk; Emoticons ; Emoticons +blk; Enclosed_Alphanum ; Enclosed_Alphanumerics +blk; Enclosed_Alphanum_Sup ; Enclosed_Alphanumeric_Supplement +blk; Enclosed_CJK ; Enclosed_CJK_Letters_And_Months +blk; Enclosed_Ideographic_Sup ; Enclosed_Ideographic_Supplement +blk; Ethiopic ; Ethiopic +blk; Ethiopic_Ext ; Ethiopic_Extended +blk; Ethiopic_Ext_A ; Ethiopic_Extended_A +blk; Ethiopic_Sup ; Ethiopic_Supplement +blk; Geometric_Shapes ; Geometric_Shapes +blk; Georgian ; Georgian +blk; Georgian_Sup ; Georgian_Supplement +blk; Glagolitic ; Glagolitic +blk; Gothic ; Gothic +blk; Greek ; Greek_And_Coptic +blk; Greek_Ext ; Greek_Extended +blk; Gujarati ; Gujarati +blk; Gurmukhi ; Gurmukhi +blk; Half_And_Full_Forms ; Halfwidth_And_Fullwidth_Forms +blk; Half_Marks ; Combining_Half_Marks +blk; Hangul ; Hangul_Syllables +blk; Hanunoo ; Hanunoo +blk; Hebrew ; Hebrew +blk; High_PU_Surrogates ; High_Private_Use_Surrogates +blk; High_Surrogates ; High_Surrogates +blk; Hiragana ; Hiragana +blk; IDC ; Ideographic_Description_Characters +blk; Imperial_Aramaic ; Imperial_Aramaic +blk; Indic_Number_Forms ; Common_Indic_Number_Forms +blk; Inscriptional_Pahlavi ; Inscriptional_Pahlavi +blk; Inscriptional_Parthian ; Inscriptional_Parthian +blk; IPA_Ext ; IPA_Extensions +blk; Jamo ; Hangul_Jamo +blk; Jamo_Ext_A ; Hangul_Jamo_Extended_A +blk; Jamo_Ext_B ; Hangul_Jamo_Extended_B +blk; Javanese ; Javanese +blk; Kaithi ; Kaithi +blk; Kana_Sup ; Kana_Supplement +blk; Kanbun ; Kanbun +blk; Kangxi ; Kangxi_Radicals +blk; Kannada ; Kannada +blk; Katakana ; Katakana +blk; Katakana_Ext ; Katakana_Phonetic_Extensions +blk; Kayah_Li ; Kayah_Li +blk; Kharoshthi ; Kharoshthi +blk; Khmer ; Khmer +blk; Khmer_Symbols ; Khmer_Symbols +blk; Lao ; Lao +blk; Latin_1_Sup ; Latin_1_Supplement ; Latin_1 +blk; Latin_Ext_A ; Latin_Extended_A +blk; Latin_Ext_Additional ; Latin_Extended_Additional +blk; Latin_Ext_B ; Latin_Extended_B +blk; Latin_Ext_C ; Latin_Extended_C +blk; Latin_Ext_D ; Latin_Extended_D +blk; Lepcha ; Lepcha +blk; Letterlike_Symbols ; Letterlike_Symbols +blk; Limbu ; Limbu +blk; Linear_B_Ideograms ; Linear_B_Ideograms +blk; Linear_B_Syllabary ; Linear_B_Syllabary +blk; Lisu ; Lisu +blk; Low_Surrogates ; Low_Surrogates +blk; Lycian ; Lycian +blk; Lydian ; Lydian +blk; Mahjong ; Mahjong_Tiles +blk; Malayalam ; Malayalam +blk; Mandaic ; Mandaic +blk; Math_Alphanum ; Mathematical_Alphanumeric_Symbols +blk; Math_Operators ; Mathematical_Operators +blk; Meetei_Mayek ; Meetei_Mayek +blk; Meetei_Mayek_Ext ; Meetei_Mayek_Extensions +blk; Meroitic_Cursive ; Meroitic_Cursive +blk; Meroitic_Hieroglyphs ; Meroitic_Hieroglyphs +blk; Miao ; Miao +blk; Misc_Arrows ; Miscellaneous_Symbols_And_Arrows +blk; Misc_Math_Symbols_A ; Miscellaneous_Mathematical_Symbols_A +blk; Misc_Math_Symbols_B ; Miscellaneous_Mathematical_Symbols_B +blk; Misc_Pictographs ; Miscellaneous_Symbols_And_Pictographs +blk; Misc_Symbols ; Miscellaneous_Symbols +blk; Misc_Technical ; Miscellaneous_Technical +blk; Modifier_Letters ; Spacing_Modifier_Letters +blk; Modifier_Tone_Letters ; Modifier_Tone_Letters +blk; Mongolian ; Mongolian +blk; Music ; Musical_Symbols +blk; Myanmar ; Myanmar +blk; Myanmar_Ext_A ; Myanmar_Extended_A +blk; NB ; No_Block +blk; New_Tai_Lue ; New_Tai_Lue +blk; NKo ; NKo +blk; Number_Forms ; Number_Forms +blk; OCR ; Optical_Character_Recognition +blk; Ogham ; Ogham +blk; Ol_Chiki ; Ol_Chiki +blk; Old_Italic ; Old_Italic +blk; Old_Persian ; Old_Persian +blk; Old_South_Arabian ; Old_South_Arabian +blk; Old_Turkic ; Old_Turkic +blk; Oriya ; Oriya +blk; Osmanya ; Osmanya +blk; Phags_Pa ; Phags_Pa +blk; Phaistos ; Phaistos_Disc +blk; Phoenician ; Phoenician +blk; Phonetic_Ext ; Phonetic_Extensions +blk; Phonetic_Ext_Sup ; Phonetic_Extensions_Supplement +blk; Playing_Cards ; Playing_Cards +blk; PUA ; Private_Use_Area ; Private_Use +blk; Punctuation ; General_Punctuation +blk; Rejang ; Rejang +blk; Rumi ; Rumi_Numeral_Symbols +blk; Runic ; Runic +blk; Samaritan ; Samaritan +blk; Saurashtra ; Saurashtra +blk; Sharada ; Sharada +blk; Shavian ; Shavian +blk; Sinhala ; Sinhala +blk; Small_Forms ; Small_Form_Variants +blk; Sora_Sompeng ; Sora_Sompeng +blk; Specials ; Specials +blk; Sundanese ; Sundanese +blk; Sundanese_Sup ; Sundanese_Supplement +blk; Sup_Arrows_A ; Supplemental_Arrows_A +blk; Sup_Arrows_B ; Supplemental_Arrows_B +blk; Sup_Math_Operators ; Supplemental_Mathematical_Operators +blk; Sup_PUA_A ; Supplementary_Private_Use_Area_A +blk; Sup_PUA_B ; Supplementary_Private_Use_Area_B +blk; Sup_Punctuation ; Supplemental_Punctuation +blk; Super_And_Sub ; Superscripts_And_Subscripts +blk; Syloti_Nagri ; Syloti_Nagri +blk; Syriac ; Syriac +blk; Tagalog ; Tagalog +blk; Tagbanwa ; Tagbanwa +blk; Tags ; Tags +blk; Tai_Le ; Tai_Le +blk; Tai_Tham ; Tai_Tham +blk; Tai_Viet ; Tai_Viet +blk; Tai_Xuan_Jing ; Tai_Xuan_Jing_Symbols +blk; Takri ; Takri +blk; Tamil ; Tamil +blk; Telugu ; Telugu +blk; Thaana ; Thaana +blk; Thai ; Thai +blk; Tibetan ; Tibetan +blk; Tifinagh ; Tifinagh +blk; Transport_And_Map ; Transport_And_Map_Symbols +blk; UCAS ; Unified_Canadian_Aboriginal_Syllabics; Canadian_Syllabics +blk; UCAS_Ext ; Unified_Canadian_Aboriginal_Syllabics_Extended +blk; Ugaritic ; Ugaritic +blk; Vai ; Vai +blk; Vedic_Ext ; Vedic_Extensions +blk; Vertical_Forms ; Vertical_Forms +blk; VS ; Variation_Selectors +blk; VS_Sup ; Variation_Selectors_Supplement +blk; Yi_Radicals ; Yi_Radicals +blk; Yi_Syllables ; Yi_Syllables +blk; Yijing ; Yijing_Hexagram_Symbols # Canonical_Combining_Class (ccc) -ccc; 0; NR ; Not_Reordered -ccc; 1; OV ; Overlay -ccc; 7; NK ; Nukta -ccc; 8; KV ; Kana_Voicing -ccc; 9; VR ; Virama -ccc; 200; ATBL ; Attached_Below_Left -ccc; 202; ATB ; Attached_Below -ccc; 214; ATA ; Attached_Above -ccc; 216; ATAR ; Attached_Above_Right -ccc; 218; BL ; Below_Left -ccc; 220; B ; Below -ccc; 222; BR ; Below_Right -ccc; 224; L ; Left -ccc; 226; R ; Right -ccc; 228; AL ; Above_Left -ccc; 230; A ; Above -ccc; 232; AR ; Above_Right -ccc; 233; DB ; Double_Below -ccc; 234; DA ; Double_Above -ccc; 240; IS ; Iota_Subscript +ccc; 0; NR ; Not_Reordered +ccc; 1; OV ; Overlay +ccc; 7; NK ; Nukta +ccc; 8; KV ; Kana_Voicing +ccc; 9; VR ; Virama +ccc; 10; CCC10 ; CCC10 +ccc; 11; CCC11 ; CCC11 +ccc; 12; CCC12 ; CCC12 +ccc; 13; CCC13 ; CCC13 +ccc; 14; CCC14 ; CCC14 +ccc; 15; CCC15 ; CCC15 +ccc; 16; CCC16 ; CCC16 +ccc; 17; CCC17 ; CCC17 +ccc; 18; CCC18 ; CCC18 +ccc; 19; CCC19 ; CCC19 +ccc; 20; CCC20 ; CCC20 +ccc; 21; CCC21 ; CCC21 +ccc; 22; CCC22 ; CCC22 +ccc; 23; CCC23 ; CCC23 +ccc; 24; CCC24 ; CCC24 +ccc; 25; CCC25 ; CCC25 +ccc; 26; CCC26 ; CCC26 +ccc; 27; CCC27 ; CCC27 +ccc; 28; CCC28 ; CCC28 +ccc; 29; CCC29 ; CCC29 +ccc; 30; CCC30 ; CCC30 +ccc; 31; CCC31 ; CCC31 +ccc; 32; CCC32 ; CCC32 +ccc; 33; CCC33 ; CCC33 +ccc; 34; CCC34 ; CCC34 +ccc; 35; CCC35 ; CCC35 +ccc; 36; CCC36 ; CCC36 +ccc; 84; CCC84 ; CCC84 +ccc; 91; CCC91 ; CCC91 +ccc; 103; CCC103 ; CCC103 +ccc; 107; CCC107 ; CCC107 +ccc; 118; CCC118 ; CCC118 +ccc; 122; CCC122 ; CCC122 +ccc; 129; CCC129 ; CCC129 +ccc; 130; CCC130 ; CCC130 +ccc; 132; CCC133 ; CCC133 +ccc; 200; ATBL ; Attached_Below_Left +ccc; 202; ATB ; Attached_Below +ccc; 214; ATA ; Attached_Above +ccc; 216; ATAR ; Attached_Above_Right +ccc; 218; BL ; Below_Left +ccc; 220; B ; Below +ccc; 222; BR ; Below_Right +ccc; 224; L ; Left +ccc; 226; R ; Right +ccc; 228; AL ; Above_Left +ccc; 230; A ; Above +ccc; 232; AR ; Above_Right +ccc; 233; DB ; Double_Below +ccc; 234; DA ; Double_Above +ccc; 240; IS ; Iota_Subscript # Case_Folding (cf) @@ -359,53 +405,53 @@ ccc; 240; IS ; Iota_Subscript # Case_Ignorable (CI) -CI ; N ; No ; F ; False -CI ; Y ; Yes ; T ; True +CI ; N ; No ; F ; False +CI ; Y ; Yes ; T ; True # Cased (Cased) -Cased; N ; No ; F ; False -Cased; Y ; Yes ; T ; True +Cased; N ; No ; F ; False +Cased; Y ; Yes ; T ; True # Changes_When_Casefolded (CWCF) -CWCF; N ; No ; F ; False -CWCF; Y ; Yes ; T ; True +CWCF; N ; No ; F ; False +CWCF; Y ; Yes ; T ; True # Changes_When_Casemapped (CWCM) -CWCM; N ; No ; F ; False -CWCM; Y ; Yes ; T ; True +CWCM; N ; No ; F ; False +CWCM; Y ; Yes ; T ; True # Changes_When_Lowercased (CWL) -CWL; N ; No ; F ; False -CWL; Y ; Yes ; T ; True +CWL; N ; No ; F ; False +CWL; Y ; Yes ; T ; True # Changes_When_NFKC_Casefolded (CWKCF) -CWKCF; N ; No ; F ; False -CWKCF; Y ; Yes ; T ; True +CWKCF; N ; No ; F ; False +CWKCF; Y ; Yes ; T ; True # Changes_When_Titlecased (CWT) -CWT; N ; No ; F ; False -CWT; Y ; Yes ; T ; True +CWT; N ; No ; F ; False +CWT; Y ; Yes ; T ; True # Changes_When_Uppercased (CWU) -CWU; N ; No ; F ; False -CWU; Y ; Yes ; T ; True +CWU; N ; No ; F ; False +CWU; Y ; Yes ; T ; True # Composition_Exclusion (CE) -CE ; N ; No ; F ; False -CE ; Y ; Yes ; T ; True +CE ; N ; No ; F ; False +CE ; Y ; Yes ; T ; True # Dash (Dash) -Dash; N ; No ; F ; False -Dash; Y ; Yes ; T ; True +Dash; N ; No ; F ; False +Dash; Y ; Yes ; T ; True # Decomposition_Mapping (dm) @@ -413,73 +459,73 @@ Dash; Y ; Yes ; T # Decomposition_Type (dt) -dt ; Can ; Canonical ; can -dt ; Com ; Compat ; com -dt ; Enc ; Circle ; enc -dt ; Fin ; Final ; fin -dt ; Font ; font -dt ; Fra ; Fraction ; fra -dt ; Init ; Initial ; init -dt ; Iso ; Isolated ; iso -dt ; Med ; Medial ; med -dt ; Nar ; Narrow ; nar -dt ; Nb ; Nobreak ; nb -dt ; None ; none -dt ; Sml ; Small ; sml -dt ; Sqr ; Square ; sqr -dt ; Sub ; sub -dt ; Sup ; Super ; sup -dt ; Vert ; Vertical ; vert -dt ; Wide ; wide +dt ; Can ; Canonical ; can +dt ; Com ; Compat ; com +dt ; Enc ; Circle ; enc +dt ; Fin ; Final ; fin +dt ; Font ; Font ; font +dt ; Fra ; Fraction ; fra +dt ; Init ; Initial ; init +dt ; Iso ; Isolated ; iso +dt ; Med ; Medial ; med +dt ; Nar ; Narrow ; nar +dt ; Nb ; Nobreak ; nb +dt ; None ; None ; none +dt ; Sml ; Small ; sml +dt ; Sqr ; Square ; sqr +dt ; Sub ; Sub ; sub +dt ; Sup ; Super ; sup +dt ; Vert ; Vertical ; vert +dt ; Wide ; Wide ; wide # Default_Ignorable_Code_Point (DI) -DI ; N ; No ; F ; False -DI ; Y ; Yes ; T ; True +DI ; N ; No ; F ; False +DI ; Y ; Yes ; T ; True # Deprecated (Dep) -Dep; N ; No ; F ; False -Dep; Y ; Yes ; T ; True +Dep; N ; No ; F ; False +Dep; Y ; Yes ; T ; True # Diacritic (Dia) -Dia; N ; No ; F ; False -Dia; Y ; Yes ; T ; True +Dia; N ; No ; F ; False +Dia; Y ; Yes ; T ; True # East_Asian_Width (ea) -ea ; A ; Ambiguous -ea ; F ; Fullwidth -ea ; H ; Halfwidth -ea ; N ; Neutral -ea ; Na ; Narrow -ea ; W ; Wide +ea ; A ; Ambiguous +ea ; F ; Fullwidth +ea ; H ; Halfwidth +ea ; N ; Neutral +ea ; Na ; Narrow +ea ; W ; Wide # Expands_On_NFC (XO_NFC) -XO_NFC; N ; No ; F ; False -XO_NFC; Y ; Yes ; T ; True +XO_NFC; N ; No ; F ; False +XO_NFC; Y ; Yes ; T ; True # Expands_On_NFD (XO_NFD) -XO_NFD; N ; No ; F ; False -XO_NFD; Y ; Yes ; T ; True +XO_NFD; N ; No ; F ; False +XO_NFD; Y ; Yes ; T ; True # Expands_On_NFKC (XO_NFKC) -XO_NFKC; N ; No ; F ; False -XO_NFKC; Y ; Yes ; T ; True +XO_NFKC; N ; No ; F ; False +XO_NFKC; Y ; Yes ; T ; True # Expands_On_NFKD (XO_NFKD) -XO_NFKD; N ; No ; F ; False -XO_NFKD; Y ; Yes ; T ; True +XO_NFKD; N ; No ; F ; False +XO_NFKD; Y ; Yes ; T ; True # Extender (Ext) -Ext; N ; No ; F ; False -Ext; Y ; Yes ; T ; True +Ext; N ; No ; F ; False +Ext; Y ; Yes ; T ; True # FC_NFKC_Closure (FC_NFKC) @@ -487,118 +533,118 @@ Ext; Y ; Yes ; T # Full_Composition_Exclusion (Comp_Ex) -Comp_Ex; N ; No ; F ; False -Comp_Ex; Y ; Yes ; T ; True +Comp_Ex; N ; No ; F ; False +Comp_Ex; Y ; Yes ; T ; True # General_Category (gc) -gc ; C ; Other # Cc | Cf | Cn | Co | Cs -gc ; Cc ; Control ; cntrl -gc ; Cf ; Format -gc ; Cn ; Unassigned -gc ; Co ; Private_Use -gc ; Cs ; Surrogate -gc ; L ; Letter # Ll | Lm | Lo | Lt | Lu -gc ; LC ; Cased_Letter # Ll | Lt | Lu -gc ; Ll ; Lowercase_Letter -gc ; Lm ; Modifier_Letter -gc ; Lo ; Other_Letter -gc ; Lt ; Titlecase_Letter -gc ; Lu ; Uppercase_Letter -gc ; M ; Mark # Mc | Me | Mn -gc ; Mc ; Spacing_Mark -gc ; Me ; Enclosing_Mark -gc ; Mn ; Nonspacing_Mark -gc ; N ; Number # Nd | Nl | No -gc ; Nd ; Decimal_Number ; digit -gc ; Nl ; Letter_Number -gc ; No ; Other_Number -gc ; P ; Punctuation ; punct # Pc | Pd | Pe | Pf | Pi | Po | Ps -gc ; Pc ; Connector_Punctuation -gc ; Pd ; Dash_Punctuation -gc ; Pe ; Close_Punctuation -gc ; Pf ; Final_Punctuation -gc ; Pi ; Initial_Punctuation -gc ; Po ; Other_Punctuation -gc ; Ps ; Open_Punctuation -gc ; S ; Symbol # Sc | Sk | Sm | So -gc ; Sc ; Currency_Symbol -gc ; Sk ; Modifier_Symbol -gc ; Sm ; Math_Symbol -gc ; So ; Other_Symbol -gc ; Z ; Separator # Zl | Zp | Zs -gc ; Zl ; Line_Separator -gc ; Zp ; Paragraph_Separator -gc ; Zs ; Space_Separator +gc ; C ; Other # Cc | Cf | Cn | Co | Cs +gc ; Cc ; Control ; cntrl +gc ; Cf ; Format +gc ; Cn ; Unassigned +gc ; Co ; Private_Use +gc ; Cs ; Surrogate +gc ; L ; Letter # Ll | Lm | Lo | Lt | Lu +gc ; LC ; Cased_Letter # Ll | Lt | Lu +gc ; Ll ; Lowercase_Letter +gc ; Lm ; Modifier_Letter +gc ; Lo ; Other_Letter +gc ; Lt ; Titlecase_Letter +gc ; Lu ; Uppercase_Letter +gc ; M ; Mark ; Combining_Mark # Mc | Me | Mn +gc ; Mc ; Spacing_Mark +gc ; Me ; Enclosing_Mark +gc ; Mn ; Nonspacing_Mark +gc ; N ; Number # Nd | Nl | No +gc ; Nd ; Decimal_Number ; digit +gc ; Nl ; Letter_Number +gc ; No ; Other_Number +gc ; P ; Punctuation ; punct # Pc | Pd | Pe | Pf | Pi | Po | Ps +gc ; Pc ; Connector_Punctuation +gc ; Pd ; Dash_Punctuation +gc ; Pe ; Close_Punctuation +gc ; Pf ; Final_Punctuation +gc ; Pi ; Initial_Punctuation +gc ; Po ; Other_Punctuation +gc ; Ps ; Open_Punctuation +gc ; S ; Symbol # Sc | Sk | Sm | So +gc ; Sc ; Currency_Symbol +gc ; Sk ; Modifier_Symbol +gc ; Sm ; Math_Symbol +gc ; So ; Other_Symbol +gc ; Z ; Separator # Zl | Zp | Zs +gc ; Zl ; Line_Separator +gc ; Zp ; Paragraph_Separator +gc ; Zs ; Space_Separator # Grapheme_Base (Gr_Base) -Gr_Base; N ; No ; F ; False -Gr_Base; Y ; Yes ; T ; True +Gr_Base; N ; No ; F ; False +Gr_Base; Y ; Yes ; T ; True # Grapheme_Cluster_Break (GCB) -GCB; CN ; Control -GCB; CR ; CR -GCB; EX ; Extend -GCB; L ; L -GCB; LF ; LF -GCB; LV ; LV -GCB; LVT ; LVT -GCB; PP ; Prepend -GCB; SM ; SpacingMark -GCB; T ; T -GCB; V ; V -GCB; XX ; Other +GCB; CN ; Control +GCB; CR ; CR +GCB; EX ; Extend +GCB; L ; L +GCB; LF ; LF +GCB; LV ; LV +GCB; LVT ; LVT +GCB; PP ; Prepend +GCB; SM ; SpacingMark +GCB; T ; T +GCB; V ; V +GCB; XX ; Other # Grapheme_Extend (Gr_Ext) -Gr_Ext; N ; No ; F ; False -Gr_Ext; Y ; Yes ; T ; True +Gr_Ext; N ; No ; F ; False +Gr_Ext; Y ; Yes ; T ; True # Grapheme_Link (Gr_Link) -Gr_Link; N ; No ; F ; False -Gr_Link; Y ; Yes ; T ; True +Gr_Link; N ; No ; F ; False +Gr_Link; Y ; Yes ; T ; True # Hangul_Syllable_Type (hst) -hst; L ; Leading_Jamo -hst; LV ; LV_Syllable -hst; LVT ; LVT_Syllable -hst; NA ; Not_Applicable -hst; T ; Trailing_Jamo -hst; V ; Vowel_Jamo +hst; L ; Leading_Jamo +hst; LV ; LV_Syllable +hst; LVT ; LVT_Syllable +hst; NA ; Not_Applicable +hst; T ; Trailing_Jamo +hst; V ; Vowel_Jamo # Hex_Digit (Hex) -Hex; N ; No ; F ; False -Hex; Y ; Yes ; T ; True +Hex; N ; No ; F ; False +Hex; Y ; Yes ; T ; True # Hyphen (Hyphen) -Hyphen; N ; No ; F ; False -Hyphen; Y ; Yes ; T ; True +Hyphen; N ; No ; F ; False +Hyphen; Y ; Yes ; T ; True # IDS_Binary_Operator (IDSB) -IDSB; N ; No ; F ; False -IDSB; Y ; Yes ; T ; True +IDSB; N ; No ; F ; False +IDSB; Y ; Yes ; T ; True # IDS_Trinary_Operator (IDST) -IDST; N ; No ; F ; False -IDST; Y ; Yes ; T ; True +IDST; N ; No ; F ; False +IDST; Y ; Yes ; T ; True # ID_Continue (IDC) -IDC; N ; No ; F ; False -IDC; Y ; Yes ; T ; True +IDC; N ; No ; F ; False +IDC; Y ; Yes ; T ; True # ID_Start (IDS) -IDS; N ; No ; F ; False -IDS; Y ; Yes ; T ; True +IDS; N ; No ; F ; False +IDS; Y ; Yes ; T ; True # ISO_Comment (isc) @@ -606,188 +652,233 @@ IDS; Y ; Yes ; T # Ideographic (Ideo) -Ideo; N ; No ; F ; False -Ideo; Y ; Yes ; T ; True +Ideo; N ; No ; F ; False +Ideo; Y ; Yes ; T ; True + +# Indic_Matra_Category (InMC) + +InMC; Bottom ; Bottom +InMC; Bottom_And_Right ; Bottom_And_Right +InMC; Invisible ; Invisible +InMC; Left ; Left +InMC; Left_And_Right ; Left_And_Right +InMC; NA ; NA +InMC; Overstruck ; Overstruck +InMC; Right ; Right +InMC; Top ; Top +InMC; Top_And_Bottom ; Top_And_Bottom +InMC; Top_And_Bottom_And_Right ; Top_And_Bottom_And_Right +InMC; Top_And_Left ; Top_And_Left +InMC; Top_And_Left_And_Right ; Top_And_Left_And_Right +InMC; Top_And_Right ; Top_And_Right +InMC; Visual_Order_Left ; Visual_Order_Left + +# Indic_Syllabic_Category (InSC) + +InSC; Avagraha ; Avagraha +InSC; Bindu ; Bindu +InSC; Consonant ; Consonant +InSC; Consonant_Dead ; Consonant_Dead +InSC; Consonant_Final ; Consonant_Final +InSC; Consonant_Head_Letter ; Consonant_Head_Letter +InSC; Consonant_Medial ; Consonant_Medial +InSC; Consonant_Placeholder ; Consonant_Placeholder +InSC; Consonant_Repha ; Consonant_Repha +InSC; Consonant_Subjoined ; Consonant_Subjoined +InSC; Modifying_Letter ; Modifying_Letter +InSC; Nukta ; Nukta +InSC; Other ; Other +InSC; Register_Shifter ; Register_Shifter +InSC; Tone_Letter ; Tone_Letter +InSC; Tone_Mark ; Tone_Mark +InSC; Virama ; Virama +InSC; Visarga ; Visarga +InSC; Vowel ; Vowel +InSC; Vowel_Dependent ; Vowel_Dependent +InSC; Vowel_Independent ; Vowel_Independent # Jamo_Short_Name (JSN) # @missing: 0000..10FFFF; Jamo_Short_Name; <none> -JSN; A ; A -JSN; AE ; AE -JSN; B ; B -JSN; BB ; BB -JSN; BS ; BS -JSN; C ; C -JSN; D ; D -JSN; DD ; DD -JSN; E ; E -JSN; EO ; EO -JSN; EU ; EU -JSN; G ; G -JSN; GG ; GG -JSN; GS ; GS -JSN; H ; H -JSN; I ; I -JSN; J ; J -JSN; JJ ; JJ -JSN; K ; K -JSN; L ; L -JSN; LB ; LB -JSN; LG ; LG -JSN; LH ; LH -JSN; LM ; LM -JSN; LP ; LP -JSN; LS ; LS -JSN; LT ; LT -JSN; M ; M -JSN; N ; N -JSN; NG ; NG -JSN; NH ; NH -JSN; NJ ; NJ -JSN; O ; O -JSN; OE ; OE -JSN; P ; P -JSN; R ; R -JSN; S ; S -JSN; SS ; SS -JSN; T ; T -JSN; U ; U -JSN; WA ; WA -JSN; WAE ; WAE -JSN; WE ; WE -JSN; WEO ; WEO -JSN; WI ; WI -JSN; YA ; YA -JSN; YAE ; YAE -JSN; YE ; YE -JSN; YEO ; YEO -JSN; YI ; YI -JSN; YO ; YO -JSN; YU ; YU +JSN; A ; A +JSN; AE ; AE +JSN; B ; B +JSN; BB ; BB +JSN; BS ; BS +JSN; C ; C +JSN; D ; D +JSN; DD ; DD +JSN; E ; E +JSN; EO ; EO +JSN; EU ; EU +JSN; G ; G +JSN; GG ; GG +JSN; GS ; GS +JSN; H ; H +JSN; I ; I +JSN; J ; J +JSN; JJ ; JJ +JSN; K ; K +JSN; L ; L +JSN; LB ; LB +JSN; LG ; LG +JSN; LH ; LH +JSN; LM ; LM +JSN; LP ; LP +JSN; LS ; LS +JSN; LT ; LT +JSN; M ; M +JSN; N ; N +JSN; NG ; NG +JSN; NH ; NH +JSN; NJ ; NJ +JSN; O ; O +JSN; OE ; OE +JSN; P ; P +JSN; R ; R +JSN; S ; S +JSN; SS ; SS +JSN; T ; T +JSN; U ; U +JSN; WA ; WA +JSN; WAE ; WAE +JSN; WE ; WE +JSN; WEO ; WEO +JSN; WI ; WI +JSN; YA ; YA +JSN; YAE ; YAE +JSN; YE ; YE +JSN; YEO ; YEO +JSN; YI ; YI +JSN; YO ; YO +JSN; YU ; YU # Join_Control (Join_C) -Join_C; N ; No ; F ; False -Join_C; Y ; Yes ; T ; True +Join_C; N ; No ; F ; False +Join_C; Y ; Yes ; T ; True # Joining_Group (jg) -jg ; n/a ; Ain -jg ; n/a ; Alaph -jg ; n/a ; Alef -jg ; n/a ; Beh -jg ; n/a ; Beth -jg ; n/a ; Burushaski_Yeh_Barree -jg ; n/a ; Dal -jg ; n/a ; Dalath_Rish -jg ; n/a ; E -jg ; n/a ; Farsi_Yeh -jg ; n/a ; Fe -jg ; n/a ; Feh -jg ; n/a ; Final_Semkath -jg ; n/a ; Gaf -jg ; n/a ; Gamal -jg ; n/a ; Hah -jg ; n/a ; He -jg ; n/a ; Heh -jg ; n/a ; Heh_Goal -jg ; n/a ; Heth -jg ; n/a ; Kaf -jg ; n/a ; Kaph -jg ; n/a ; Khaph -jg ; n/a ; Knotted_Heh -jg ; n/a ; Lam -jg ; n/a ; Lamadh -jg ; n/a ; Meem -jg ; n/a ; Mim -jg ; n/a ; No_Joining_Group -jg ; n/a ; Noon -jg ; n/a ; Nun -jg ; n/a ; Nya -jg ; n/a ; Pe -jg ; n/a ; Qaf -jg ; n/a ; Qaph -jg ; n/a ; Reh -jg ; n/a ; Reversed_Pe -jg ; n/a ; Sad -jg ; n/a ; Sadhe -jg ; n/a ; Seen -jg ; n/a ; Semkath -jg ; n/a ; Shin -jg ; n/a ; Swash_Kaf -jg ; n/a ; Syriac_Waw -jg ; n/a ; Tah -jg ; n/a ; Taw -jg ; n/a ; Teh_Marbuta -jg ; n/a ; Teh_Marbuta_Goal ; Hamza_On_Heh_Goal -jg ; n/a ; Teth -jg ; n/a ; Waw -jg ; n/a ; Yeh -jg ; n/a ; Yeh_Barree -jg ; n/a ; Yeh_With_Tail -jg ; n/a ; Yudh -jg ; n/a ; Yudh_He -jg ; n/a ; Zain -jg ; n/a ; Zhain +jg ; Ain ; Ain +jg ; Alaph ; Alaph +jg ; Alef ; Alef +jg ; Beh ; Beh +jg ; Beth ; Beth +jg ; Burushaski_Yeh_Barree ; Burushaski_Yeh_Barree +jg ; Dal ; Dal +jg ; Dalath_Rish ; Dalath_Rish +jg ; E ; E +jg ; Farsi_Yeh ; Farsi_Yeh +jg ; Fe ; Fe +jg ; Feh ; Feh +jg ; Final_Semkath ; Final_Semkath +jg ; Gaf ; Gaf +jg ; Gamal ; Gamal +jg ; Hah ; Hah +jg ; He ; He +jg ; Heh ; Heh +jg ; Heh_Goal ; Heh_Goal +jg ; Heth ; Heth +jg ; Kaf ; Kaf +jg ; Kaph ; Kaph +jg ; Khaph ; Khaph +jg ; Knotted_Heh ; Knotted_Heh +jg ; Lam ; Lam +jg ; Lamadh ; Lamadh +jg ; Meem ; Meem +jg ; Mim ; Mim +jg ; No_Joining_Group ; No_Joining_Group +jg ; Noon ; Noon +jg ; Nun ; Nun +jg ; Nya ; Nya +jg ; Pe ; Pe +jg ; Qaf ; Qaf +jg ; Qaph ; Qaph +jg ; Reh ; Reh +jg ; Reversed_Pe ; Reversed_Pe +jg ; Rohingya_Yeh ; Rohingya_Yeh +jg ; Sad ; Sad +jg ; Sadhe ; Sadhe +jg ; Seen ; Seen +jg ; Semkath ; Semkath +jg ; Shin ; Shin +jg ; Swash_Kaf ; Swash_Kaf +jg ; Syriac_Waw ; Syriac_Waw +jg ; Tah ; Tah +jg ; Taw ; Taw +jg ; Teh_Marbuta ; Teh_Marbuta +jg ; Teh_Marbuta_Goal ; Hamza_On_Heh_Goal +jg ; Teth ; Teth +jg ; Waw ; Waw +jg ; Yeh ; Yeh +jg ; Yeh_Barree ; Yeh_Barree +jg ; Yeh_With_Tail ; Yeh_With_Tail +jg ; Yudh ; Yudh +jg ; Yudh_He ; Yudh_He +jg ; Zain ; Zain +jg ; Zhain ; Zhain # Joining_Type (jt) -jt ; C ; Join_Causing -jt ; D ; Dual_Joining -jt ; L ; Left_Joining -jt ; R ; Right_Joining -jt ; T ; Transparent -jt ; U ; Non_Joining +jt ; C ; Join_Causing +jt ; D ; Dual_Joining +jt ; L ; Left_Joining +jt ; R ; Right_Joining +jt ; T ; Transparent +jt ; U ; Non_Joining # Line_Break (lb) -lb ; AI ; Ambiguous -lb ; AL ; Alphabetic -lb ; B2 ; Break_Both -lb ; BA ; Break_After -lb ; BB ; Break_Before -lb ; BK ; Mandatory_Break -lb ; CB ; Contingent_Break -lb ; CL ; Close_Punctuation -lb ; CM ; Combining_Mark -lb ; CP ; Close_Parenthesis -lb ; CR ; Carriage_Return -lb ; EX ; Exclamation -lb ; GL ; Glue -lb ; H2 ; H2 -lb ; H3 ; H3 -lb ; HY ; Hyphen -lb ; ID ; Ideographic -lb ; IN ; Inseparable ; Inseperable -lb ; IS ; Infix_Numeric -lb ; JL ; JL -lb ; JT ; JT -lb ; JV ; JV -lb ; LF ; Line_Feed -lb ; NL ; Next_Line -lb ; NS ; Nonstarter -lb ; NU ; Numeric -lb ; OP ; Open_Punctuation -lb ; PO ; Postfix_Numeric -lb ; PR ; Prefix_Numeric -lb ; QU ; Quotation -lb ; SA ; Complex_Context -lb ; SG ; Surrogate -lb ; SP ; Space -lb ; SY ; Break_Symbols -lb ; WJ ; Word_Joiner -lb ; XX ; Unknown -lb ; ZW ; ZWSpace +lb ; AI ; Ambiguous +lb ; AL ; Alphabetic +lb ; B2 ; Break_Both +lb ; BA ; Break_After +lb ; BB ; Break_Before +lb ; BK ; Mandatory_Break +lb ; CB ; Contingent_Break +lb ; CJ ; Conditional_Japanese_Starter +lb ; CL ; Close_Punctuation +lb ; CM ; Combining_Mark +lb ; CP ; Close_Parenthesis +lb ; CR ; Carriage_Return +lb ; EX ; Exclamation +lb ; GL ; Glue +lb ; H2 ; H2 +lb ; H3 ; H3 +lb ; HL ; Hebrew_Letter +lb ; HY ; Hyphen +lb ; ID ; Ideographic +lb ; IN ; Inseparable ; Inseperable +lb ; IS ; Infix_Numeric +lb ; JL ; JL +lb ; JT ; JT +lb ; JV ; JV +lb ; LF ; Line_Feed +lb ; NL ; Next_Line +lb ; NS ; Nonstarter +lb ; NU ; Numeric +lb ; OP ; Open_Punctuation +lb ; PO ; Postfix_Numeric +lb ; PR ; Prefix_Numeric +lb ; QU ; Quotation +lb ; SA ; Complex_Context +lb ; SG ; Surrogate +lb ; SP ; Space +lb ; SY ; Break_Symbols +lb ; WJ ; Word_Joiner +lb ; XX ; Unknown +lb ; ZW ; ZWSpace # Logical_Order_Exception (LOE) -LOE; N ; No ; F ; False -LOE; Y ; Yes ; T ; True +LOE; N ; No ; F ; False +LOE; Y ; Yes ; T ; True # Lowercase (Lower) -Lower; N ; No ; F ; False -Lower; Y ; Yes ; T ; True +Lower; N ; No ; F ; False +Lower; Y ; Yes ; T ; True # Lowercase_Mapping (lc) @@ -795,19 +886,19 @@ Lower; Y ; Yes ; T # Math (Math) -Math; N ; No ; F ; False -Math; Y ; Yes ; T ; True +Math; N ; No ; F ; False +Math; Y ; Yes ; T ; True # NFC_Quick_Check (NFC_QC) -NFC_QC; M ; Maybe -NFC_QC; N ; No -NFC_QC; Y ; Yes +NFC_QC; M ; Maybe +NFC_QC; N ; No +NFC_QC; Y ; Yes # NFD_Quick_Check (NFD_QC) -NFD_QC; N ; No -NFD_QC; Y ; Yes +NFD_QC; N ; No +NFD_QC; Y ; Yes # NFKC_Casefold (NFKC_CF) @@ -815,14 +906,14 @@ NFD_QC; Y ; Yes # NFKC_Quick_Check (NFKC_QC) -NFKC_QC; M ; Maybe -NFKC_QC; N ; No -NFKC_QC; Y ; Yes +NFKC_QC; M ; Maybe +NFKC_QC; N ; No +NFKC_QC; Y ; Yes # NFKD_Quick_Check (NFKD_QC) -NFKD_QC; N ; No -NFKD_QC; Y ; Yes +NFKD_QC; N ; No +NFKD_QC; Y ; Yes # Name (na) @@ -834,15 +925,15 @@ NFKD_QC; Y ; Yes # Noncharacter_Code_Point (NChar) -NChar; N ; No ; F ; False -NChar; Y ; Yes ; T ; True +NChar; N ; No ; F ; False +NChar; Y ; Yes ; T ; True # Numeric_Type (nt) -nt ; De ; Decimal -nt ; Di ; Digit -nt ; None ; None -nt ; Nu ; Numeric +nt ; De ; Decimal +nt ; Di ; Digit +nt ; None ; None +nt ; Nu ; Numeric # Numeric_Value (nv) @@ -850,186 +941,197 @@ nt ; Nu ; Numeric # Other_Alphabetic (OAlpha) -OAlpha; N ; No ; F ; False -OAlpha; Y ; Yes ; T ; True +OAlpha; N ; No ; F ; False +OAlpha; Y ; Yes ; T ; True # Other_Default_Ignorable_Code_Point (ODI) -ODI; N ; No ; F ; False -ODI; Y ; Yes ; T ; True +ODI; N ; No ; F ; False +ODI; Y ; Yes ; T ; True # Other_Grapheme_Extend (OGr_Ext) -OGr_Ext; N ; No ; F ; False -OGr_Ext; Y ; Yes ; T ; True +OGr_Ext; N ; No ; F ; False +OGr_Ext; Y ; Yes ; T ; True # Other_ID_Continue (OIDC) -OIDC; N ; No ; F ; False -OIDC; Y ; Yes ; T ; True +OIDC; N ; No ; F ; False +OIDC; Y ; Yes ; T ; True # Other_ID_Start (OIDS) -OIDS; N ; No ; F ; False -OIDS; Y ; Yes ; T ; True +OIDS; N ; No ; F ; False +OIDS; Y ; Yes ; T ; True # Other_Lowercase (OLower) -OLower; N ; No ; F ; False -OLower; Y ; Yes ; T ; True +OLower; N ; No ; F ; False +OLower; Y ; Yes ; T ; True # Other_Math (OMath) -OMath; N ; No ; F ; False -OMath; Y ; Yes ; T ; True +OMath; N ; No ; F ; False +OMath; Y ; Yes ; T ; True # Other_Uppercase (OUpper) -OUpper; N ; No ; F ; False -OUpper; Y ; Yes ; T ; True +OUpper; N ; No ; F ; False +OUpper; Y ; Yes ; T ; True # Pattern_Syntax (Pat_Syn) -Pat_Syn; N ; No ; F ; False -Pat_Syn; Y ; Yes ; T ; True +Pat_Syn; N ; No ; F ; False +Pat_Syn; Y ; Yes ; T ; True # Pattern_White_Space (Pat_WS) -Pat_WS; N ; No ; F ; False -Pat_WS; Y ; Yes ; T ; True +Pat_WS; N ; No ; F ; False +Pat_WS; Y ; Yes ; T ; True # Quotation_Mark (QMark) -QMark; N ; No ; F ; False -QMark; Y ; Yes ; T ; True +QMark; N ; No ; F ; False +QMark; Y ; Yes ; T ; True # Radical (Radical) -Radical; N ; No ; F ; False -Radical; Y ; Yes ; T ; True +Radical; N ; No ; F ; False +Radical; Y ; Yes ; T ; True # STerm (STerm) -STerm; N ; No ; F ; False -STerm; Y ; Yes ; T ; True +STerm; N ; No ; F ; False +STerm; Y ; Yes ; T ; True # Script (sc) -sc ; Arab ; Arabic -sc ; Armi ; Imperial_Aramaic -sc ; Armn ; Armenian -sc ; Avst ; Avestan -sc ; Bali ; Balinese -sc ; Bamu ; Bamum -sc ; Batk ; Batak -sc ; Beng ; Bengali -sc ; Bopo ; Bopomofo -sc ; Brah ; Brahmi -sc ; Brai ; Braille -sc ; Bugi ; Buginese -sc ; Buhd ; Buhid -sc ; Cans ; Canadian_Aboriginal -sc ; Cari ; Carian -sc ; Cham ; Cham -sc ; Cher ; Cherokee -sc ; Copt ; Coptic ; Qaac -sc ; Cprt ; Cypriot -sc ; Cyrl ; Cyrillic -sc ; Deva ; Devanagari -sc ; Dsrt ; Deseret -sc ; Egyp ; Egyptian_Hieroglyphs -sc ; Ethi ; Ethiopic -sc ; Geor ; Georgian -sc ; Glag ; Glagolitic -sc ; Goth ; Gothic -sc ; Grek ; Greek -sc ; Gujr ; Gujarati -sc ; Guru ; Gurmukhi -sc ; Hang ; Hangul -sc ; Hani ; Han -sc ; Hano ; Hanunoo -sc ; Hebr ; Hebrew -sc ; Hira ; Hiragana -sc ; Hrkt ; Katakana_Or_Hiragana -sc ; Ital ; Old_Italic -sc ; Java ; Javanese -sc ; Kali ; Kayah_Li -sc ; Kana ; Katakana -sc ; Khar ; Kharoshthi -sc ; Khmr ; Khmer -sc ; Knda ; Kannada -sc ; Kthi ; Kaithi -sc ; Lana ; Tai_Tham -sc ; Laoo ; Lao -sc ; Latn ; Latin -sc ; Lepc ; Lepcha -sc ; Limb ; Limbu -sc ; Linb ; Linear_B -sc ; Lisu ; Lisu -sc ; Lyci ; Lycian -sc ; Lydi ; Lydian -sc ; Mand ; Mandaic -sc ; Mlym ; Malayalam -sc ; Mong ; Mongolian -sc ; Mtei ; Meetei_Mayek -sc ; Mymr ; Myanmar -sc ; Nkoo ; Nko -sc ; Ogam ; Ogham -sc ; Olck ; Ol_Chiki -sc ; Orkh ; Old_Turkic -sc ; Orya ; Oriya -sc ; Osma ; Osmanya -sc ; Phag ; Phags_Pa -sc ; Phli ; Inscriptional_Pahlavi -sc ; Phnx ; Phoenician -sc ; Prti ; Inscriptional_Parthian -sc ; Rjng ; Rejang -sc ; Runr ; Runic -sc ; Samr ; Samaritan -sc ; Sarb ; Old_South_Arabian -sc ; Saur ; Saurashtra -sc ; Shaw ; Shavian -sc ; Sinh ; Sinhala -sc ; Sund ; Sundanese -sc ; Sylo ; Syloti_Nagri -sc ; Syrc ; Syriac -sc ; Tagb ; Tagbanwa -sc ; Tale ; Tai_Le -sc ; Talu ; New_Tai_Lue -sc ; Taml ; Tamil -sc ; Tavt ; Tai_Viet -sc ; Telu ; Telugu -sc ; Tfng ; Tifinagh -sc ; Tglg ; Tagalog -sc ; Thaa ; Thaana -sc ; Thai ; Thai -sc ; Tibt ; Tibetan -sc ; Ugar ; Ugaritic -sc ; Vaii ; Vai -sc ; Xpeo ; Old_Persian -sc ; Xsux ; Cuneiform -sc ; Yiii ; Yi -sc ; Zinh ; Inherited ; Qaai -sc ; Zyyy ; Common -sc ; Zzzz ; Unknown +sc ; Arab ; Arabic +sc ; Armi ; Imperial_Aramaic +sc ; Armn ; Armenian +sc ; Avst ; Avestan +sc ; Bali ; Balinese +sc ; Bamu ; Bamum +sc ; Batk ; Batak +sc ; Beng ; Bengali +sc ; Bopo ; Bopomofo +sc ; Brah ; Brahmi +sc ; Brai ; Braille +sc ; Bugi ; Buginese +sc ; Buhd ; Buhid +sc ; Cakm ; Chakma +sc ; Cans ; Canadian_Aboriginal +sc ; Cari ; Carian +sc ; Cham ; Cham +sc ; Cher ; Cherokee +sc ; Copt ; Coptic ; Qaac +sc ; Cprt ; Cypriot +sc ; Cyrl ; Cyrillic +sc ; Deva ; Devanagari +sc ; Dsrt ; Deseret +sc ; Egyp ; Egyptian_Hieroglyphs +sc ; Ethi ; Ethiopic +sc ; Geor ; Georgian +sc ; Glag ; Glagolitic +sc ; Goth ; Gothic +sc ; Grek ; Greek +sc ; Gujr ; Gujarati +sc ; Guru ; Gurmukhi +sc ; Hang ; Hangul +sc ; Hani ; Han +sc ; Hano ; Hanunoo +sc ; Hebr ; Hebrew +sc ; Hira ; Hiragana +sc ; Hrkt ; Katakana_Or_Hiragana +sc ; Ital ; Old_Italic +sc ; Java ; Javanese +sc ; Kali ; Kayah_Li +sc ; Kana ; Katakana +sc ; Khar ; Kharoshthi +sc ; Khmr ; Khmer +sc ; Knda ; Kannada +sc ; Kthi ; Kaithi +sc ; Lana ; Tai_Tham +sc ; Laoo ; Lao +sc ; Latn ; Latin +sc ; Lepc ; Lepcha +sc ; Limb ; Limbu +sc ; Linb ; Linear_B +sc ; Lisu ; Lisu +sc ; Lyci ; Lycian +sc ; Lydi ; Lydian +sc ; Mand ; Mandaic +sc ; Merc ; Meroitic_Cursive +sc ; Mero ; Meroitic_Hieroglyphs +sc ; Mlym ; Malayalam +sc ; Mong ; Mongolian +sc ; Mtei ; Meetei_Mayek +sc ; Mymr ; Myanmar +sc ; Nkoo ; Nko +sc ; Ogam ; Ogham +sc ; Olck ; Ol_Chiki +sc ; Orkh ; Old_Turkic +sc ; Orya ; Oriya +sc ; Osma ; Osmanya +sc ; Phag ; Phags_Pa +sc ; Phli ; Inscriptional_Pahlavi +sc ; Phnx ; Phoenician +sc ; Plrd ; Miao +sc ; Prti ; Inscriptional_Parthian +sc ; Rjng ; Rejang +sc ; Runr ; Runic +sc ; Samr ; Samaritan +sc ; Sarb ; Old_South_Arabian +sc ; Saur ; Saurashtra +sc ; Shaw ; Shavian +sc ; Shrd ; Sharada +sc ; Sinh ; Sinhala +sc ; Sora ; Sora_Sompeng +sc ; Sund ; Sundanese +sc ; Sylo ; Syloti_Nagri +sc ; Syrc ; Syriac +sc ; Tagb ; Tagbanwa +sc ; Takr ; Takri +sc ; Tale ; Tai_Le +sc ; Talu ; New_Tai_Lue +sc ; Taml ; Tamil +sc ; Tavt ; Tai_Viet +sc ; Telu ; Telugu +sc ; Tfng ; Tifinagh +sc ; Tglg ; Tagalog +sc ; Thaa ; Thaana +sc ; Thai ; Thai +sc ; Tibt ; Tibetan +sc ; Ugar ; Ugaritic +sc ; Vaii ; Vai +sc ; Xpeo ; Old_Persian +sc ; Xsux ; Cuneiform +sc ; Yiii ; Yi +sc ; Zinh ; Inherited ; Qaai +sc ; Zyyy ; Common +sc ; Zzzz ; Unknown + +# Script_Extensions (scx) + +# @missing: 0000..10FFFF; Script_Extensions; <script> # Sentence_Break (SB) -SB ; AT ; ATerm -SB ; CL ; Close -SB ; CR ; CR -SB ; EX ; Extend -SB ; FO ; Format -SB ; LE ; OLetter -SB ; LF ; LF -SB ; LO ; Lower -SB ; NU ; Numeric -SB ; SC ; SContinue -SB ; SE ; Sep -SB ; SP ; Sp -SB ; ST ; STerm -SB ; UP ; Upper -SB ; XX ; Other +SB ; AT ; ATerm +SB ; CL ; Close +SB ; CR ; CR +SB ; EX ; Extend +SB ; FO ; Format +SB ; LE ; OLetter +SB ; LF ; LF +SB ; LO ; Lower +SB ; NU ; Numeric +SB ; SC ; SContinue +SB ; SE ; Sep +SB ; SP ; Sp +SB ; ST ; STerm +SB ; UP ; Upper +SB ; XX ; Other # Simple_Case_Folding (scf) @@ -1049,13 +1151,13 @@ SB ; XX ; Other # Soft_Dotted (SD) -SD ; N ; No ; F ; False -SD ; Y ; Yes ; T ; True +SD ; N ; No ; F ; False +SD ; Y ; Yes ; T ; True # Terminal_Punctuation (Term) -Term; N ; No ; F ; False -Term; Y ; Yes ; T ; True +Term; N ; No ; F ; False +Term; Y ; Yes ; T ; True # Titlecase_Mapping (tc) @@ -1067,13 +1169,13 @@ Term; Y ; Yes ; T # Unified_Ideograph (UIdeo) -UIdeo; N ; No ; F ; False -UIdeo; Y ; Yes ; T ; True +UIdeo; N ; No ; F ; False +UIdeo; Y ; Yes ; T ; True # Uppercase (Upper) -Upper; N ; No ; F ; False -Upper; Y ; Yes ; T ; True +Upper; N ; No ; F ; False +Upper; Y ; Yes ; T ; True # Uppercase_Mapping (uc) @@ -1081,39 +1183,39 @@ Upper; Y ; Yes ; T # Variation_Selector (VS) -VS ; N ; No ; F ; False -VS ; Y ; Yes ; T ; True +VS ; N ; No ; F ; False +VS ; Y ; Yes ; T ; True # White_Space (WSpace) -WSpace; N ; No ; F ; False -WSpace; Y ; Yes ; T ; True +WSpace; N ; No ; F ; False +WSpace; Y ; Yes ; T ; True # Word_Break (WB) -WB ; CR ; CR -WB ; EX ; ExtendNumLet -WB ; Extend ; Extend -WB ; FO ; Format -WB ; KA ; Katakana -WB ; LE ; ALetter -WB ; LF ; LF -WB ; MB ; MidNumLet -WB ; ML ; MidLetter -WB ; MN ; MidNum -WB ; NL ; Newline -WB ; NU ; Numeric -WB ; XX ; Other +WB ; CR ; CR +WB ; EX ; ExtendNumLet +WB ; Extend ; Extend +WB ; FO ; Format +WB ; KA ; Katakana +WB ; LE ; ALetter +WB ; LF ; LF +WB ; MB ; MidNumLet +WB ; ML ; MidLetter +WB ; MN ; MidNum +WB ; NL ; Newline +WB ; NU ; Numeric +WB ; XX ; Other # XID_Continue (XIDC) -XIDC; N ; No ; F ; False -XIDC; Y ; Yes ; T ; True +XIDC; N ; No ; F ; False +XIDC; Y ; Yes ; T ; True # XID_Start (XIDS) -XIDS; N ; No ; F ; False -XIDS; Y ; Yes ; T ; True +XIDS; N ; No ; F ; False +XIDS; Y ; Yes ; T ; True # cjkAccountingNumeric (cjkAccountingNumeric) diff --git a/lib/unicore/PropertyAliases.txt b/lib/unicore/PropertyAliases.txt index 9a4a8c77b2..f891ff254e 100644 --- a/lib/unicore/PropertyAliases.txt +++ b/lib/unicore/PropertyAliases.txt @@ -1,15 +1,14 @@ -# PropertyAliases-6.0.0.txt -# Date: 2010-05-18, 00:49:38 GMT [MD] +# PropertyAliases-6.1.0.txt +# Date: 2011-12-07, 23:40:57 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # # This file contains aliases for properties used in the UCD. # These names can be used for XML formats of UCD data, for regular-expression # property tests, and other programmatic textual descriptions of Unicode data. -# For information on which properties are normative, see UCD.html. # # The names may be translated in appropriate environments, and additional # aliases may be useful. @@ -27,7 +26,7 @@ # Loose matching should be applied to all property names and property values, with # the exception of String Property values. With loose matching of property names and # values, the case distinctions, whitespace, and '_' are ignored. For Numeric Property -# values, numeric equivalences are applied: thus "01.00" is equivalent to "1". +# values, numeric equivalencies are applied: thus "01.00" is equivalent to "1". # # NOTE: Property value names are NOT unique across properties. For example: # @@ -50,140 +49,143 @@ # ================================================ # Numeric Properties # ================================================ -cjkAccountingNumeric; kAccountingNumeric -cjkOtherNumeric; kOtherNumeric -cjkPrimaryNumeric; kPrimaryNumeric -nv ; Numeric_Value +cjkAccountingNumeric ; kAccountingNumeric +cjkOtherNumeric ; kOtherNumeric +cjkPrimaryNumeric ; kPrimaryNumeric +nv ; Numeric_Value # ================================================ # String Properties # ================================================ -bmg ; Bidi_Mirroring_Glyph -cf ; Case_Folding -cjkCompatibilityVariant; kCompatibilityVariant -dm ; Decomposition_Mapping -FC_NFKC ; FC_NFKC_Closure -lc ; Lowercase_Mapping -NFKC_CF ; NFKC_Casefold -scf ; Simple_Case_Folding ; sfc -slc ; Simple_Lowercase_Mapping -stc ; Simple_Titlecase_Mapping -suc ; Simple_Uppercase_Mapping -tc ; Titlecase_Mapping -uc ; Uppercase_Mapping +bmg ; Bidi_Mirroring_Glyph +cf ; Case_Folding +cjkCompatibilityVariant ; kCompatibilityVariant +dm ; Decomposition_Mapping +FC_NFKC ; FC_NFKC_Closure +lc ; Lowercase_Mapping +NFKC_CF ; NFKC_Casefold +scf ; Simple_Case_Folding ; sfc +slc ; Simple_Lowercase_Mapping +stc ; Simple_Titlecase_Mapping +suc ; Simple_Uppercase_Mapping +tc ; Titlecase_Mapping +uc ; Uppercase_Mapping # ================================================ # Miscellaneous Properties # ================================================ -cjkIICore ; kIICore -cjkIRG_GSource; kIRG_GSource -cjkIRG_HSource; kIRG_HSource -cjkIRG_JSource; kIRG_JSource -cjkIRG_KPSource; kIRG_KPSource -cjkIRG_KSource; kIRG_KSource -cjkIRG_MSource; kIRG_MSource -cjkIRG_TSource; kIRG_TSource -cjkIRG_USource; kIRG_USource -cjkIRG_VSource; kIRG_VSource -cjkRSUnicode; kRSUnicode ; Unicode_Radical_Stroke; URS -isc ; ISO_Comment -JSN ; Jamo_Short_Name -na ; Name -na1 ; Unicode_1_Name -Name_Alias; Name_Alias +cjkIICore ; kIICore +cjkIRG_GSource ; kIRG_GSource +cjkIRG_HSource ; kIRG_HSource +cjkIRG_JSource ; kIRG_JSource +cjkIRG_KPSource ; kIRG_KPSource +cjkIRG_KSource ; kIRG_KSource +cjkIRG_MSource ; kIRG_MSource +cjkIRG_TSource ; kIRG_TSource +cjkIRG_USource ; kIRG_USource +cjkIRG_VSource ; kIRG_VSource +cjkRSUnicode ; kRSUnicode ; Unicode_Radical_Stroke; URS +isc ; ISO_Comment +JSN ; Jamo_Short_Name +na ; Name +na1 ; Unicode_1_Name +Name_Alias ; Name_Alias +scx ; Script_Extensions # ================================================ # Catalog Properties # ================================================ -age ; Age -blk ; Block -sc ; Script +age ; Age +blk ; Block +sc ; Script # ================================================ # Enumerated Properties # ================================================ -bc ; Bidi_Class -ccc ; Canonical_Combining_Class -dt ; Decomposition_Type -ea ; East_Asian_Width -gc ; General_Category -GCB ; Grapheme_Cluster_Break -hst ; Hangul_Syllable_Type -jg ; Joining_Group -jt ; Joining_Type -lb ; Line_Break -NFC_QC ; NFC_Quick_Check -NFD_QC ; NFD_Quick_Check -NFKC_QC ; NFKC_Quick_Check -NFKD_QC ; NFKD_Quick_Check -nt ; Numeric_Type -SB ; Sentence_Break -WB ; Word_Break +bc ; Bidi_Class +ccc ; Canonical_Combining_Class +dt ; Decomposition_Type +ea ; East_Asian_Width +gc ; General_Category +GCB ; Grapheme_Cluster_Break +hst ; Hangul_Syllable_Type +InMC ; Indic_Matra_Category +InSC ; Indic_Syllabic_Category +jg ; Joining_Group +jt ; Joining_Type +lb ; Line_Break +NFC_QC ; NFC_Quick_Check +NFD_QC ; NFD_Quick_Check +NFKC_QC ; NFKC_Quick_Check +NFKD_QC ; NFKD_Quick_Check +nt ; Numeric_Type +SB ; Sentence_Break +WB ; Word_Break # ================================================ # Binary Properties # ================================================ -AHex ; ASCII_Hex_Digit -Alpha ; Alphabetic -Bidi_C ; Bidi_Control -Bidi_M ; Bidi_Mirrored -Cased ; Cased -CE ; Composition_Exclusion -CI ; Case_Ignorable -Comp_Ex ; Full_Composition_Exclusion -CWCF ; Changes_When_Casefolded -CWCM ; Changes_When_Casemapped -CWKCF ; Changes_When_NFKC_Casefolded -CWL ; Changes_When_Lowercased -CWT ; Changes_When_Titlecased -CWU ; Changes_When_Uppercased -Dash ; Dash -Dep ; Deprecated -DI ; Default_Ignorable_Code_Point -Dia ; Diacritic -Ext ; Extender -Gr_Base ; Grapheme_Base -Gr_Ext ; Grapheme_Extend -Gr_Link ; Grapheme_Link -Hex ; Hex_Digit -Hyphen ; Hyphen -IDC ; ID_Continue -Ideo ; Ideographic -IDS ; ID_Start -IDSB ; IDS_Binary_Operator -IDST ; IDS_Trinary_Operator -Join_C ; Join_Control -LOE ; Logical_Order_Exception -Lower ; Lowercase -Math ; Math -NChar ; Noncharacter_Code_Point -OAlpha ; Other_Alphabetic -ODI ; Other_Default_Ignorable_Code_Point -OGr_Ext ; Other_Grapheme_Extend -OIDC ; Other_ID_Continue -OIDS ; Other_ID_Start -OLower ; Other_Lowercase -OMath ; Other_Math -OUpper ; Other_Uppercase -Pat_Syn ; Pattern_Syntax -Pat_WS ; Pattern_White_Space -QMark ; Quotation_Mark -Radical ; Radical -SD ; Soft_Dotted -STerm ; STerm -Term ; Terminal_Punctuation -UIdeo ; Unified_Ideograph -Upper ; Uppercase -VS ; Variation_Selector -WSpace ; White_Space ; space -XIDC ; XID_Continue -XIDS ; XID_Start -XO_NFC ; Expands_On_NFC -XO_NFD ; Expands_On_NFD -XO_NFKC ; Expands_On_NFKC -XO_NFKD ; Expands_On_NFKD +AHex ; ASCII_Hex_Digit +Alpha ; Alphabetic +Bidi_C ; Bidi_Control +Bidi_M ; Bidi_Mirrored +Cased ; Cased +CE ; Composition_Exclusion +CI ; Case_Ignorable +Comp_Ex ; Full_Composition_Exclusion +CWCF ; Changes_When_Casefolded +CWCM ; Changes_When_Casemapped +CWKCF ; Changes_When_NFKC_Casefolded +CWL ; Changes_When_Lowercased +CWT ; Changes_When_Titlecased +CWU ; Changes_When_Uppercased +Dash ; Dash +Dep ; Deprecated +DI ; Default_Ignorable_Code_Point +Dia ; Diacritic +Ext ; Extender +Gr_Base ; Grapheme_Base +Gr_Ext ; Grapheme_Extend +Gr_Link ; Grapheme_Link +Hex ; Hex_Digit +Hyphen ; Hyphen +IDC ; ID_Continue +Ideo ; Ideographic +IDS ; ID_Start +IDSB ; IDS_Binary_Operator +IDST ; IDS_Trinary_Operator +Join_C ; Join_Control +LOE ; Logical_Order_Exception +Lower ; Lowercase +Math ; Math +NChar ; Noncharacter_Code_Point +OAlpha ; Other_Alphabetic +ODI ; Other_Default_Ignorable_Code_Point +OGr_Ext ; Other_Grapheme_Extend +OIDC ; Other_ID_Continue +OIDS ; Other_ID_Start +OLower ; Other_Lowercase +OMath ; Other_Math +OUpper ; Other_Uppercase +Pat_Syn ; Pattern_Syntax +Pat_WS ; Pattern_White_Space +QMark ; Quotation_Mark +Radical ; Radical +SD ; Soft_Dotted +STerm ; STerm +Term ; Terminal_Punctuation +UIdeo ; Unified_Ideograph +Upper ; Uppercase +VS ; Variation_Selector +WSpace ; White_Space ; space +XIDC ; XID_Continue +XIDS ; XID_Start +XO_NFC ; Expands_On_NFC +XO_NFD ; Expands_On_NFD +XO_NFKC ; Expands_On_NFKC +XO_NFKD ; Expands_On_NFKD # ================================================ -# Total: 112 +# Total: 115 # EOF diff --git a/lib/unicore/ReadMe.txt b/lib/unicore/ReadMe.txt index 89d5cb39ef..9fd93d8960 100644 --- a/lib/unicore/ReadMe.txt +++ b/lib/unicore/ReadMe.txt @@ -1,7 +1,7 @@ -# Date: 2010-10-05, 16:26:38 PDT [KW] +# Date: 2012-01-26, 22:03:00 GMT [KW] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2012 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # # For documentation, see NamesList.html, @@ -10,4 +10,4 @@ # This directory contains final data files -for the Unicode Character Database (UCD) for Unicode 6.0.0. +for the Unicode Character Database (UCD) for Unicode 6.1.0. diff --git a/lib/unicore/ScriptExtensions.txt b/lib/unicore/ScriptExtensions.txt index 0ddb8bcb73..301ccc21f4 100644 --- a/lib/unicore/ScriptExtensions.txt +++ b/lib/unicore/ScriptExtensions.txt @@ -1,15 +1,22 @@ -# ScriptExtensions-6.0.0.txt -# Date: 2010-08-30, 01:48:36 GMT [MD] +# ScriptExtensions-6.1.0.txt +# Date: 2011-12-05, 22:51:22 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ +# The Script_Extensions property indicates which characters are commonly used +# with a limited number of scripts, but with more than one. +# The property is provisional: values are expected to change over time as more information becomes available. +# For each code point, there is one or more property values. Each such value is a Script property value. +# For more information, see: +# UAX #24: http://www.unicode.org/reports/tr24/ and +# UAX #44: http://www.unicode.org/reports/tr44/ # -# The Script Extensions contain data about characters that belong to multiple scripts. -# This data is provisional, and expected to change over time, as more information becomes available. -# The script values are space-delimited short values, such as Hang for Hangul. -# For more information, see UAX #24: http://www.unicode.org/reports/tr24/. +# All code points not explicitly listed for Script_Extensions +# have as their value the corresponding Script property value +# +# @missing: 0000..10FFFF; <script> # ================================================ @@ -19,11 +26,10 @@ # Script_Extensions=Arab Syrc -0640 ; Arab Syrc # Lm ARABIC TATWEEL 064B..0655 ; Arab Syrc # Mn [11] ARABIC FATHATAN..ARABIC HAMZA BELOW 0670 ; Arab Syrc # Mn ARABIC LETTER SUPERSCRIPT ALEF -# Total code points: 13 +# Total code points: 12 # ================================================ @@ -53,6 +59,16 @@ FDFD ; Arab Thaa # So ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHE # ================================================ +# Script_Extensions=Cprt Linb + +10100..10102 ; Cprt Linb # Po [3] AEGEAN WORD SEPARATOR LINE..AEGEAN CHECK MARK +10107..10133 ; Cprt Linb # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND +10137..1013F ; Cprt Linb # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT + +# Total code points: 57 + +# ================================================ + # Script_Extensions=Hira Kana 3031..3035 ; Hira Kana # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF @@ -76,6 +92,14 @@ FF9E..FF9F ; Hira Kana # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFW # ================================================ +# Script_Extensions=Arab Mand Syrc + +0640 ; Arab Mand Syrc # Lm ARABIC TATWEEL + +# Total code points: 1 + +# ================================================ + # Script_Extensions=Arab Syrc Thaa 060C ; Arab Syrc Thaa # Po ARABIC COMMA @@ -140,6 +164,17 @@ FE45..FE46 ; Bopo Hang Hani Hira Kana # Po [2] SESAME DOT..WHITE SESAME DOT # ================================================ +# Script_Extensions=Deva Gujr Guru Kthi Takr + +A830..A835 ; Deva Gujr Guru Kthi Takr # No [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE SIXTEENTHS +A836..A837 ; Deva Gujr Guru Kthi Takr # So [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK +A838 ; Deva Gujr Guru Kthi Takr # Sc NORTH INDIC RUPEE MARK +A839 ; Deva Gujr Guru Kthi Takr # So NORTH INDIC QUANTITY MARK + +# Total code points: 10 + +# ================================================ + # Script_Extensions=Bopo Hang Hani Hira Kana Yiii 3001..3002 ; Bopo Hang Hani Hira Kana Yiii # Po [2] IDEOGRAPHIC COMMA..IDEOGRAPHIC FULL STOP diff --git a/lib/unicore/Scripts.txt b/lib/unicore/Scripts.txt index 70a670703a..2516f889d6 100644 --- a/lib/unicore/Scripts.txt +++ b/lib/unicore/Scripts.txt @@ -1,8 +1,8 @@ -# Scripts-6.0.0.txt -# Date: 2010-08-19, 00:48:47 GMT [MD] +# Scripts-6.1.0.txt +# Date: 2011-11-27, 05:10:50 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -47,7 +47,8 @@ 00A0 ; Common # Zs NO-BREAK SPACE 00A1 ; Common # Po INVERTED EXCLAMATION MARK 00A2..00A5 ; Common # Sc [4] CENT SIGN..YEN SIGN -00A6..00A7 ; Common # So [2] BROKEN BAR..SECTION SIGN +00A6 ; Common # So BROKEN BAR +00A7 ; Common # Po SECTION SIGN 00A8 ; Common # Sk DIAERESIS 00A9 ; Common # So COPYRIGHT SIGN 00AB ; Common # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK @@ -60,8 +61,7 @@ 00B2..00B3 ; Common # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE 00B4 ; Common # Sk ACUTE ACCENT 00B5 ; Common # L& MICRO SIGN -00B6 ; Common # So PILCROW SIGN -00B7 ; Common # Po MIDDLE DOT +00B6..00B7 ; Common # Po [2] PILCROW SIGN..MIDDLE DOT 00B8 ; Common # Sk CEDILLA 00B9 ; Common # No SUPERSCRIPT ONE 00BB ; Common # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK @@ -90,7 +90,6 @@ 0660..0669 ; Common # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE 06DD ; Common # Cf ARABIC END OF AYAH 0964..0965 ; Common # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA -0970 ; Common # Po DEVANAGARI ABBREVIATION SIGN 0E3F ; Common # Sc THAI CURRENCY SYMBOL BAHT 0FD5..0FD8 ; Common # So [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS 10FB ; Common # Po GEORGIAN PARAGRAPH SEPARATOR @@ -102,7 +101,8 @@ 1CE1 ; Common # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA 1CE9..1CEC ; Common # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL 1CEE..1CF1 ; Common # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA -1CF2 ; Common # Mc VEDIC SIGN ARDHAVISARGA +1CF2..1CF3 ; Common # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF5..1CF6 ; Common # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 2000..200A ; Common # Zs [11] EN QUAD..HAIR SPACE 200B ; Common # Cf ZERO WIDTH SPACE 200E..200F ; Common # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK @@ -247,9 +247,7 @@ 27C0..27C4 ; Common # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET 27C5 ; Common # Ps LEFT S-SHAPED BAG DELIMITER 27C6 ; Common # Pe RIGHT S-SHAPED BAG DELIMITER -27C7..27CA ; Common # Sm [4] OR WITH DOT INSIDE..VERTICAL BAR WITH HORIZONTAL STROKE -27CC ; Common # Sm LONG DIVISION -27CE..27E5 ; Common # Sm [24] SQUARED LOGICAL AND..WHITE SQUARE WITH RIGHTWARDS TICK +27C7..27E5 ; Common # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK 27E6 ; Common # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET 27E7 ; Common # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET 27E8 ; Common # Ps MATHEMATICAL LEFT ANGLE BRACKET @@ -329,7 +327,8 @@ 2E29 ; Common # Pe RIGHT DOUBLE PARENTHESIS 2E2A..2E2E ; Common # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK 2E2F ; Common # Lm VERTICAL TILDE -2E30..2E31 ; Common # Po [2] RING POINT..WORD SEPARATOR MIDDLE DOT +2E30..2E39 ; Common # Po [10] RING POINT..TOP HALF SECTION SIGN +2E3A..2E3B ; Common # Pd [2] TWO-EM DASH..THREE-EM DASH 2FF0..2FFB ; Common # So [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID 3000 ; Common # Zs IDEOGRAPHIC SPACE 3001..3003 ; Common # Po [3] IDEOGRAPHIC COMMA..DITTO MARK @@ -373,7 +372,9 @@ 3196..319F ; Common # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK 31C0..31E3 ; Common # So [36] CJK STROKE T..CJK STROKE Q 3220..3229 ; Common # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN -322A..3250 ; Common # So [39] PARENTHESIZED IDEOGRAPH MOON..PARTNERSHIP SIGN +322A..3247 ; Common # So [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO +3248..324F ; Common # No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE +3250 ; Common # So PARTNERSHIP SIGN 3251..325F ; Common # No [15] CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE 327F ; Common # So KOREAN STANDARD SYMBOL 3280..3289 ; Common # No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN @@ -481,8 +482,7 @@ FFE9..FFEC ; Common # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS FFED..FFEE ; Common # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE FFF9..FFFB ; Common # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER -10100..10101 ; Common # Po [2] AEGEAN WORD SEPARATOR LINE..AEGEAN WORD SEPARATOR DOT -10102 ; Common # So AEGEAN CHECK MARK +10100..10102 ; Common # Po [3] AEGEAN WORD SEPARATOR LINE..AEGEAN CHECK MARK 10107..10133 ; Common # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND 10137..1013F ; Common # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT 10190..1019B ; Common # So [12] ROMAN SEXTANS SIGN..ROMAN CENTURIAL SIGN @@ -548,7 +548,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR 1F0D1..1F0DF ; Common # So [15] PLAYING CARD ACE OF CLUBS..PLAYING CARD WHITE JOKER 1F100..1F10A ; Common # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA 1F110..1F12E ; Common # So [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ -1F130..1F169 ; Common # So [58] SQUARED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F130..1F16B ; Common # So [60] SQUARED LATIN CAPITAL LETTER A..RAISED MD SIGN 1F170..1F19A ; Common # So [43] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VS 1F1E6..1F1FF ; Common # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z 1F201..1F202 ; Common # So [2] SQUARED KATAKANA KOKO..SQUARED KATAKANA SA @@ -567,33 +567,23 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR 1F442..1F4F7 ; Common # So [182] EAR..CAMERA 1F4F9..1F4FC ; Common # So [4] VIDEO CAMERA..VIDEOCASSETTE 1F500..1F53D ; Common # So [62] TWISTED RIGHTWARDS ARROWS..DOWN-POINTING SMALL RED TRIANGLE +1F540..1F543 ; Common # So [4] CIRCLED CROSS POMMEE..NOTCHED LEFT SEMICIRCLE WITH THREE DOTS 1F550..1F567 ; Common # So [24] CLOCK FACE ONE OCLOCK..CLOCK FACE TWELVE-THIRTY -1F5FB..1F5FF ; Common # So [5] MOUNT FUJI..MOYAI -1F601..1F610 ; Common # So [16] GRINNING FACE WITH SMILING EYES..NEUTRAL FACE -1F612..1F614 ; Common # So [3] UNAMUSED FACE..PENSIVE FACE -1F616 ; Common # So CONFOUNDED FACE -1F618 ; Common # So FACE THROWING A KISS -1F61A ; Common # So KISSING FACE WITH CLOSED EYES -1F61C..1F61E ; Common # So [3] FACE WITH STUCK-OUT TONGUE AND WINKING EYE..DISAPPOINTED FACE -1F620..1F625 ; Common # So [6] ANGRY FACE..DISAPPOINTED BUT RELIEVED FACE -1F628..1F62B ; Common # So [4] FEARFUL FACE..TIRED FACE -1F62D ; Common # So LOUDLY CRYING FACE -1F630..1F633 ; Common # So [4] FACE WITH OPEN MOUTH AND COLD SWEAT..FLUSHED FACE -1F635..1F640 ; Common # So [12] DIZZY FACE..WEARY CAT FACE +1F5FB..1F640 ; Common # So [70] MOUNT FUJI..WEARY CAT FACE 1F645..1F64F ; Common # So [11] FACE WITH NO GOOD GESTURE..PERSON WITH FOLDED HANDS 1F680..1F6C5 ; Common # So [70] ROCKET..LEFT LUGGAGE 1F700..1F773 ; Common # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE E0001 ; Common # Cf LANGUAGE TAG E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG -# Total code points: 6379 +# Total code points: 6412 # ================================================ 0041..005A ; Latin # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z 0061..007A ; Latin # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z -00AA ; Latin # L& FEMININE ORDINAL INDICATOR -00BA ; Latin # L& MASCULINE ORDINAL INDICATOR +00AA ; Latin # Lo FEMININE ORDINAL INDICATOR +00BA ; Latin # Lo MASCULINE ORDINAL INDICATOR 00C0..00D6 ; Latin # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS 00D8..00F6 ; Latin # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS 00F8..01BA ; Latin # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL @@ -607,7 +597,7 @@ E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG 02E0..02E4 ; Latin # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP 1D00..1D25 ; Latin # L& [38] LATIN LETTER SMALL CAPITAL A..LATIN LETTER AIN 1D2C..1D5C ; Latin # Lm [49] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL AIN -1D62..1D65 ; Latin # L& [4] LATIN SUBSCRIPT SMALL LETTER I..LATIN SUBSCRIPT SMALL LETTER V +1D62..1D65 ; Latin # Lm [4] LATIN SUBSCRIPT SMALL LETTER I..LATIN SUBSCRIPT SMALL LETTER V 1D6B..1D77 ; Latin # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D79..1D9A ; Latin # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBE ; Latin # Lm [36] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL EZH @@ -621,22 +611,23 @@ E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG 2160..2182 ; Latin # Nl [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND 2183..2184 ; Latin # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C 2185..2188 ; Latin # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND -2C60..2C7C ; Latin # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; Latin # Lm MODIFIER LETTER CAPITAL V +2C60..2C7B ; Latin # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; Latin # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2C7F ; Latin # L& [2] LATIN CAPITAL LETTER S WITH SWASH TAIL..LATIN CAPITAL LETTER Z WITH SWASH TAIL A722..A76F ; Latin # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON A770 ; Latin # Lm MODIFIER LETTER US A771..A787 ; Latin # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A78B..A78E ; Latin # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A791 ; Latin # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER -A7A0..A7A9 ; Latin # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE +A790..A793 ; Latin # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR +A7A0..A7AA ; Latin # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK +A7F8..A7F9 ; Latin # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; Latin # L& LATIN LETTER SMALL CAPITAL TURNED M A7FB..A7FF ; Latin # Lo [5] LATIN EPIGRAPHIC LETTER REVERSED F..LATIN EPIGRAPHIC LETTER ARCHAIC M FB00..FB06 ; Latin # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FF21..FF3A ; Latin # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z -# Total code points: 1267 +# Total code points: 1272 # ================================================ @@ -656,7 +647,7 @@ FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 03F7..03FF ; Greek # L& [9] GREEK CAPITAL LETTER SHO..GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL 1D26..1D2A ; Greek # L& [5] GREEK LETTER SMALL CAPITAL GAMMA..GREEK LETTER SMALL CAPITAL PSI 1D5D..1D61 ; Greek # Lm [5] MODIFIER LETTER SMALL BETA..MODIFIER LETTER SMALL CHI -1D66..1D6A ; Greek # L& [5] GREEK SUBSCRIPT SMALL LETTER BETA..GREEK SUBSCRIPT SMALL LETTER CHI +1D66..1D6A ; Greek # Lm [5] GREEK SUBSCRIPT SMALL LETTER BETA..GREEK SUBSCRIPT SMALL LETTER CHI 1DBF ; Greek # Lm MODIFIER LETTER SMALL THETA 1F00..1F15 ; Greek # L& [22] GREEK SMALL LETTER ALPHA WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA 1F18..1F1D ; Greek # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA @@ -710,12 +701,13 @@ A66E ; Cyrillic # Lo CYRILLIC LETTER MULTIOCULAR O A66F ; Cyrillic # Mn COMBINING CYRILLIC VZMET A670..A672 ; Cyrillic # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN A673 ; Cyrillic # Po SLAVONIC ASTERISK -A67C..A67D ; Cyrillic # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A674..A67D ; Cyrillic # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK A67E ; Cyrillic # Po CYRILLIC KAVYKA A67F ; Cyrillic # Lm CYRILLIC PAYEROK A680..A697 ; Cyrillic # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE +A69F ; Cyrillic # Mn COMBINING CYRILLIC LETTER IOTIFIED E -# Total code points: 408 +# Total code points: 417 # ================================================ @@ -724,9 +716,10 @@ A680..A697 ; Cyrillic # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL 055A..055F ; Armenian # Po [6] ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION MARK 0561..0587 ; Armenian # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN 058A ; Armenian # Pd ARMENIAN HYPHEN +058F ; Armenian # Sc ARMENIAN DRAM SIGN FB13..FB17 ; Armenian # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH -# Total code points: 90 +# Total code points: 91 # ================================================ @@ -757,7 +750,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU # ================================================ -0600..0603 ; Arabic # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA +0600..0604 ; Arabic # Cf [5] ARABIC NUMBER SIGN..ARABIC SIGN SAMVAT 0606..0608 ; Arabic # Sm [3] ARABIC-INDIC CUBE ROOT..ARABIC RAY 0609..060A ; Arabic # Po [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN 060B ; Arabic # Sc AFGHANI SIGN @@ -786,6 +779,9 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU 06FD..06FE ; Arabic # So [2] ARABIC SIGN SINDHI AMPERSAND..ARABIC SIGN SINDHI POSTPOSITION MEN 06FF ; Arabic # Lo ARABIC LETTER HEH WITH INVERTED V 0750..077F ; Arabic # Lo [48] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS ABOVE +08A0 ; Arabic # Lo ARABIC LETTER BEH WITH SMALL V BELOW +08A2..08AC ; Arabic # Lo [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH +08E4..08FE ; Arabic # Mn [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT FB50..FBB1 ; Arabic # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM FBB2..FBC1 ; Arabic # Sk [16] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL SMALL TAH BELOW FBD3..FD3D ; Arabic # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM @@ -796,8 +792,42 @@ FDFC ; Arabic # Sc RIAL SIGN FE70..FE74 ; Arabic # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM 10E60..10E7E ; Arabic # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS - -# Total code points: 1051 +1EE00..1EE03 ; Arabic # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; Arabic # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; Arabic # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; Arabic # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; Arabic # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; Arabic # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; Arabic # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; Arabic # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; Arabic # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; Arabic # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; Arabic # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; Arabic # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; Arabic # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; Arabic # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; Arabic # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; Arabic # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; Arabic # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; Arabic # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; Arabic # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; Arabic # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; Arabic # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; Arabic # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; Arabic # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; Arabic # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; Arabic # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; Arabic # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; Arabic # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; Arabic # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; Arabic # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; Arabic # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; Arabic # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; Arabic # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; Arabic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1EEF0..1EEF1 ; Arabic # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL + +# Total code points: 1234 # ================================================ @@ -838,6 +868,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA 0958..0961 ; Devanagari # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL 0962..0963 ; Devanagari # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL 0966..096F ; Devanagari # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE +0970 ; Devanagari # Po DEVANAGARI ABBREVIATION SIGN 0971 ; Devanagari # Lm DEVANAGARI SIGN HIGH SPACING DOT 0972..0977 ; Devanagari # Lo [6] DEVANAGARI LETTER CANDRA A..DEVANAGARI LETTER UUE 0979..097F ; Devanagari # Lo [7] DEVANAGARI LETTER ZHA..DEVANAGARI LETTER BBA @@ -846,7 +877,7 @@ A8F2..A8F7 ; Devanagari # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVAN A8F8..A8FA ; Devanagari # Po [3] DEVANAGARI SIGN PUSHPIKA..DEVANAGARI CARET A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE -# Total code points: 150 +# Total code points: 151 # ================================================ @@ -927,9 +958,10 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE 0AE0..0AE1 ; Gujarati # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL 0AE2..0AE3 ; Gujarati # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL 0AE6..0AEF ; Gujarati # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE +0AF0 ; Gujarati # Po GUJARATI ABBREVIATION SIGN 0AF1 ; Gujarati # Sc GUJARATI RUPEE SIGN -# Total code points: 83 +# Total code points: 84 # ================================================ @@ -1119,16 +1151,18 @@ A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE 0EC6 ; Lao # Lm LAO KO LA 0EC8..0ECD ; Lao # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA 0ED0..0ED9 ; Lao # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE -0EDC..0EDD ; Lao # Lo [2] LAO HO NO..LAO HO MO +0EDC..0EDF ; Lao # Lo [4] LAO HO NO..LAO LETTER KHMU NYO -# Total code points: 65 +# Total code points: 67 # ================================================ 0F00 ; Tibetan # Lo TIBETAN SYLLABLE OM 0F01..0F03 ; Tibetan # So [3] TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA 0F04..0F12 ; Tibetan # Po [15] TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK RGYA GRAM SHAD -0F13..0F17 ; Tibetan # So [5] TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS +0F13 ; Tibetan # So TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN +0F14 ; Tibetan # Po TIBETAN MARK GTER TSHEG +0F15..0F17 ; Tibetan # So [3] TIBETAN LOGOTYPE SIGN CHAD RTAGS..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS 0F18..0F19 ; Tibetan # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS 0F1A..0F1F ; Tibetan # So [6] TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RDEL DKAR RDEL NAG 0F20..0F29 ; Tibetan # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE @@ -1212,16 +1246,21 @@ AA7B ; Myanmar # Mc MYANMAR SIGN PAO KAREN TONE # ================================================ 10A0..10C5 ; Georgian # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Georgian # L& GEORGIAN CAPITAL LETTER YN +10CD ; Georgian # L& GEORGIAN CAPITAL LETTER AEN 10D0..10FA ; Georgian # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN 10FC ; Georgian # Lm MODIFIER LETTER GEORGIAN NAR +10FD..10FF ; Georgian # Lo [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN 2D00..2D25 ; Georgian # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Georgian # L& GEORGIAN SMALL LETTER YN +2D2D ; Georgian # L& GEORGIAN SMALL LETTER AEN -# Total code points: 120 +# Total code points: 127 # ================================================ 1100..11FF ; Hangul # Lo [256] HANGUL CHOSEONG KIYEOK..HANGUL JONGSEONG SSANGNIEUN -302E..302F ; Hangul # Mn [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK +302E..302F ; Hangul # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK 3131..318E ; Hangul # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE 3200..321E ; Hangul # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU 3260..327E ; Hangul # So [31] CIRCLED HANGUL KIYEOK..CIRCLED HANGUL IEUNG U @@ -1256,8 +1295,7 @@ FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL L 1312..1315 ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE 1318..135A ; Ethiopic # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA 135D..135F ; Ethiopic # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK -1360 ; Ethiopic # So ETHIOPIC SECTION MARK -1361..1368 ; Ethiopic # Po [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR +1360..1368 ; Ethiopic # Po [9] ETHIOPIC SECTION MARK..ETHIOPIC PARAGRAPH SEPARATOR 1369..137C ; Ethiopic # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND 1380..138F ; Ethiopic # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE 1390..1399 ; Ethiopic # So [10] ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT @@ -1313,7 +1351,7 @@ AB28..AB2E ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO # ================================================ 1780..17B3 ; Khmer # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU -17B4..17B5 ; Khmer # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B4..17B5 ; Khmer # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 17B6 ; Khmer # Mc KHMER VOWEL SIGN AA 17B7..17BD ; Khmer # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA 17BE..17C5 ; Khmer # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU @@ -1393,16 +1431,15 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 3038..303A ; Han # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY 303B ; Han # Lm VERTICAL IDEOGRAPHIC ITERATION MARK 3400..4DB5 ; Han # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 -4E00..9FCB ; Han # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB -F900..FA2D ; Han # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; Han # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +4E00..9FCC ; Han # Lo [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC +F900..FA6D ; Han # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 20000..2A6D6 ; Han # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 2A700..2B734 ; Han # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 2B740..2B81D ; Han # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2F800..2FA1D ; Han # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 75960 +# Total code points: 75963 # ================================================ @@ -1447,6 +1484,7 @@ A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE 1CD4..1CE0 ; Inherited # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA 1CE2..1CE8 ; Inherited # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL 1CED ; Inherited # Mn VEDIC SIGN TIRYAK +1CF4 ; Inherited # Mn VEDIC TONE CANDRA ABOVE 1DC0..1DE6 ; Inherited # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z 1DFC..1DFF ; Inherited # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW 200C..200D ; Inherited # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER @@ -1466,7 +1504,7 @@ FE20..FE26 ; Inherited # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CON 1D1AA..1D1AD ; Inherited # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 523 +# Total code points: 524 # ================================================ @@ -1587,11 +1625,12 @@ E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-2 2CE5..2CEA ; Coptic # So [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA 2CEB..2CEE ; Coptic # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA 2CEF..2CF1 ; Coptic # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2CF2..2CF3 ; Coptic # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2CF9..2CFC ; Coptic # Po [4] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN VERSE DIVIDER 2CFD ; Coptic # No COPTIC FRACTION ONE HALF 2CFE..2CFF ; Coptic # Po [2] COPTIC FULL STOP..COPTIC MORPHOLOGICAL DIVIDER -# Total code points: 135 +# Total code points: 137 # ================================================ @@ -1614,12 +1653,12 @@ E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-2 # ================================================ -2D30..2D65 ; Tifinagh # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D30..2D67 ; Tifinagh # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO 2D6F ; Tifinagh # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D70 ; Tifinagh # Po TIFINAGH SEPARATOR MARK 2D7F ; Tifinagh # Mn TIFINAGH CONSONANT JOINER -# Total code points: 57 +# Total code points: 59 # ================================================ @@ -1729,10 +1768,14 @@ A874..A877 ; Phags_Pa # Po [4] PHAGS-PA SINGLE HEAD MARK..PHAGS-PA MARK DOU 1BA6..1BA7 ; Sundanese # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG 1BA8..1BA9 ; Sundanese # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG 1BAA ; Sundanese # Mc SUNDANESE SIGN PAMAAEH +1BAB ; Sundanese # Mn SUNDANESE SIGN VIRAMA +1BAC..1BAD ; Sundanese # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA 1BAE..1BAF ; Sundanese # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA 1BB0..1BB9 ; Sundanese # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE +1BBA..1BBF ; Sundanese # Lo [6] SUNDANESE AVAGRAHA..SUNDANESE LETTER FINAL M +1CC0..1CC7 ; Sundanese # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA -# Total code points: 55 +# Total code points: 72 # ================================================ @@ -1940,6 +1983,15 @@ A9DE..A9DF ; Javanese # Po [2] JAVANESE PADA TIRTA TUMETES..JAVANESE PADA I # ================================================ +AAE0..AAEA ; Meetei_Mayek # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAEB ; Meetei_Mayek # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; Meetei_Mayek # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; Meetei_Mayek # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF0..AAF1 ; Meetei_Mayek # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM +AAF2 ; Meetei_Mayek # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; Meetei_Mayek # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF5 ; Meetei_Mayek # Mc MEETEI MAYEK VOWEL SIGN VISARGA +AAF6 ; Meetei_Mayek # Mn MEETEI MAYEK VIRAMA ABC0..ABE2 ; Meetei_Mayek # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM ABE3..ABE4 ; Meetei_Mayek # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP ABE5 ; Meetei_Mayek # Mn MEETEI MAYEK VOWEL SIGN ANAP @@ -1951,7 +2003,7 @@ ABEC ; Meetei_Mayek # Mc MEETEI MAYEK LUM IYEK ABED ; Meetei_Mayek # Mn MEETEI MAYEK APUN IYEK ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE -# Total code points: 56 +# Total code points: 79 # ================================================ @@ -2040,4 +2092,74 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI # Total code points: 29 +# ================================================ + +11100..11102 ; Chakma # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11103..11126 ; Chakma # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11127..1112B ; Chakma # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; Chakma # Mc CHAKMA VOWEL SIGN E +1112D..11134 ; Chakma # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11136..1113F ; Chakma # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +11140..11143 ; Chakma # Po [4] CHAKMA SECTION MARK..CHAKMA QUESTION MARK + +# Total code points: 67 + +# ================================================ + +109A0..109B7 ; Meroitic_Cursive # Lo [24] MEROITIC CURSIVE LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; Meroitic_Cursive # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN + +# Total code points: 26 + +# ================================================ + +10980..1099F ; Meroitic_Hieroglyphs # Lo [32] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC HIEROGLYPHIC SYMBOL VIDJ-2 + +# Total code points: 32 + +# ================================================ + +16F00..16F44 ; Miao # Lo [69] MIAO LETTER PA..MIAO LETTER HHA +16F50 ; Miao # Lo MIAO LETTER NASALIZATION +16F51..16F7E ; Miao # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG +16F8F..16F92 ; Miao # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16F93..16F9F ; Miao # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 + +# Total code points: 133 + +# ================================================ + +11180..11181 ; Sharada # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; Sharada # Mc SHARADA SIGN VISARGA +11183..111B2 ; Sharada # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111B3..111B5 ; Sharada # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; Sharada # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF..111C0 ; Sharada # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +111C1..111C4 ; Sharada # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111C5..111C8 ; Sharada # Po [4] SHARADA DANDA..SHARADA SEPARATOR +111D0..111D9 ; Sharada # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE + +# Total code points: 83 + +# ================================================ + +110D0..110E8 ; Sora_Sompeng # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +110F0..110F9 ; Sora_Sompeng # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE + +# Total code points: 35 + +# ================================================ + +11680..116AA ; Takri # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116AB ; Takri # Mn TAKRI SIGN ANUSVARA +116AC ; Takri # Mc TAKRI SIGN VISARGA +116AD ; Takri # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; Takri # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; Takri # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B6 ; Takri # Mc TAKRI SIGN VIRAMA +116B7 ; Takri # Mn TAKRI SIGN NUKTA +116C0..116C9 ; Takri # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE + +# Total code points: 66 + # EOF diff --git a/lib/unicore/SpecialCasing.txt b/lib/unicore/SpecialCasing.txt index 62a0ec9f3a..d650b6d9dc 100644 --- a/lib/unicore/SpecialCasing.txt +++ b/lib/unicore/SpecialCasing.txt @@ -1,8 +1,8 @@ -# SpecialCasing-6.0.0.txt -# Date: 2010-05-18, 00:49:39 GMT [MD] +# SpecialCasing-6.1.0.txt +# Date: 2011-11-27, 05:10:51 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # @@ -47,7 +47,9 @@ # * Additional contexts # * Additional fields # ================================================================================ -# @missing 0000..10FFFF; <slc>; <stc>; <suc> + +# @missing: 0000..10FFFF; <slc>; <stc>; <suc>; + # ================================================================================ # Unconditional mappings # ================================================================================ diff --git a/lib/unicore/StandardizedVariants.txt b/lib/unicore/StandardizedVariants.txt index a55af629b4..331b831e32 100644 --- a/lib/unicore/StandardizedVariants.txt +++ b/lib/unicore/StandardizedVariants.txt @@ -1,13 +1,13 @@ -# StandardizedVariants-6.0.0.txt -# Date: 2010-05-19, 11:22:00 PDT [KW] +# StandardizedVariants-6.1.0.txt +# Date: 2011-11-10, 20:28:00 GMT [KW, LI] # -# Specification of the variant sequences that are defined in the +# Specification of the variation sequences that are defined in the # Unicode Standard. # # This file is a normative contributory data file in the # Unicode Character Database. # -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # # Standardized variation sequences are defined in this file. @@ -21,9 +21,9 @@ # # For more information on standardized variation sequences, # see Section 16.4, Variation Selectors, -# in The Unicode Standard, Version 6.0. +# in The Unicode Standard, Version 6.1. # -# For more information on the Ideographic Variation Database +# For more information on the Ideographic Variation Database, # see http://www.unicode.org/ivd/ # # Format: @@ -44,7 +44,7 @@ 2273 FE00; following the slant of the lower leg; # GREATER-THAN OR EQUIVALENT TO # The following two entries were originally defined for Unicode 3.2 # but were determined to be in error and were removed from the list -# of standardized variation sequences. The entries are left commented +# of standardized variation sequences. The entries are left commented out # in the file for the historical record of changes made to the data. #2278 FE00; with vertical stroke; # NEITHER LESS-THAN NOR GREATER-THAN #2279 FE00; with vertical stroke; # NEITHER GREATER-THAN NOR LESS-THAN @@ -141,3 +141,225 @@ A868 FE00; phags-pa letter reversed shaping subjoined ya; # PHAGS-PA SUBJOINED L 1887 180D; fourth form; final # MONGOLIAN LETTER ALI GALI A 1888 180B; second form; final # MONGOLIAN LETTER ALI GALI I 188A 180B; second form; initial medial # MONGOLIAN LETTER ALI GALI NGA + +# Emoji variation sequences for use as part of keycap symbols + +0023 FE0E; text style; # NUMBER SIGN +0023 FE0F; emoji style; # NUMBER SIGN +0030 FE0E; text style; # DIGIT ZERO +0030 FE0F; emoji style; # DIGIT ZERO +0031 FE0E; text style; # DIGIT ONE +0031 FE0F; emoji style; # DIGIT ONE +0032 FE0E; text style; # DIGIT TWO +0032 FE0F; emoji style; # DIGIT TWO +0033 FE0E; text style; # DIGIT THREE +0033 FE0F; emoji style; # DIGIT THREE +0034 FE0E; text style; # DIGIT FOUR +0034 FE0F; emoji style; # DIGIT FOUR +0035 FE0E; text style; # DIGIT FIVE +0035 FE0F; emoji style; # DIGIT FIVE +0036 FE0E; text style; # DIGIT SIX +0036 FE0F; emoji style; # DIGIT SIX +0037 FE0E; text style; # DIGIT SEVEN +0037 FE0F; emoji style; # DIGIT SEVEN +0038 FE0E; text style; # DIGIT EIGHT +0038 FE0F; emoji style; # DIGIT EIGHT +0039 FE0E; text style; # DIGIT NINE +0039 FE0F; emoji style; # DIGIT NINE + +# Emoji variation sequences + +203C FE0E; text style; # DOUBLE EXCLAMATION MARK +203C FE0F; emoji style; # DOUBLE EXCLAMATION MARK +2049 FE0E; text style; # EXCLAMATION QUESTION MARK +2049 FE0F; emoji style; # EXCLAMATION QUESTION MARK +2139 FE0E; text style; # INFORMATION SOURCE +2139 FE0F; emoji style; # INFORMATION SOURCE +2194 FE0E; text style; # LEFT RIGHT ARROW +2194 FE0F; emoji style; # LEFT RIGHT ARROW +2195 FE0E; text style; # UP DOWN ARROW +2195 FE0F; emoji style; # UP DOWN ARROW +2196 FE0E; text style; # NORTH WEST ARROW +2196 FE0F; emoji style; # NORTH WEST ARROW +2197 FE0E; text style; # NORTH EAST ARROW +2197 FE0F; emoji style; # NORTH EAST ARROW +2198 FE0E; text style; # SOUTH EAST ARROW +2198 FE0F; emoji style; # SOUTH EAST ARROW +2199 FE0E; text style; # SOUTH WEST ARROW +2199 FE0F; emoji style; # SOUTH WEST ARROW +21A9 FE0E; text style; # LEFTWARDS ARROW WITH HOOK +21A9 FE0F; emoji style; # LEFTWARDS ARROW WITH HOOK +21AA FE0E; text style; # RIGHTWARDS ARROW WITH HOOK +21AA FE0F; emoji style; # RIGHTWARDS ARROW WITH HOOK +231A FE0E; text style; # WATCH +231A FE0F; emoji style; # WATCH +231B FE0E; text style; # HOURGLASS +231B FE0F; emoji style; # HOURGLASS +24C2 FE0E; text style; # CIRCLED LATIN CAPITAL LETTER M +24C2 FE0F; emoji style; # CIRCLED LATIN CAPITAL LETTER M +25AA FE0E; text style; # BLACK SMALL SQUARE +25AA FE0F; emoji style; # BLACK SMALL SQUARE +25AB FE0E; text style; # WHITE SMALL SQUARE +25AB FE0F; emoji style; # WHITE SMALL SQUARE +25B6 FE0E; text style; # BLACK RIGHT-POINTING TRIANGLE +25B6 FE0F; emoji style; # BLACK RIGHT-POINTING TRIANGLE +25C0 FE0E; text style; # BLACK LEFT-POINTING TRIANGLE +25C0 FE0F; emoji style; # BLACK LEFT-POINTING TRIANGLE +25FB FE0E; text style; # WHITE MEDIUM SQUARE +25FB FE0F; emoji style; # WHITE MEDIUM SQUARE +25FC FE0E; text style; # BLACK MEDIUM SQUARE +25FC FE0F; emoji style; # BLACK MEDIUM SQUARE +25FD FE0E; text style; # WHITE MEDIUM SMALL SQUARE +25FD FE0F; emoji style; # WHITE MEDIUM SMALL SQUARE +25FE FE0E; text style; # BLACK MEDIUM SMALL SQUARE +25FE FE0F; emoji style; # BLACK MEDIUM SMALL SQUARE +2600 FE0E; text style; # BLACK SUN WITH RAYS +2600 FE0F; emoji style; # BLACK SUN WITH RAYS +2601 FE0E; text style; # CLOUD +2601 FE0F; emoji style; # CLOUD +260E FE0E; text style; # BLACK TELEPHONE +260E FE0F; emoji style; # BLACK TELEPHONE +2611 FE0E; text style; # BALLOT BOX WITH CHECK +2611 FE0F; emoji style; # BALLOT BOX WITH CHECK +2614 FE0E; text style; # UMBRELLA WITH RAIN DROPS +2614 FE0F; emoji style; # UMBRELLA WITH RAIN DROPS +2615 FE0E; text style; # HOT BEVERAGE +2615 FE0F; emoji style; # HOT BEVERAGE +261D FE0E; text style; # WHITE UP POINTING INDEX +261D FE0F; emoji style; # WHITE UP POINTING INDEX +263A FE0E; text style; # WHITE SMILING FACE +263A FE0F; emoji style; # WHITE SMILING FACE +2648 FE0E; text style; # ARIES +2648 FE0F; emoji style; # ARIES +2649 FE0E; text style; # TAURUS +2649 FE0F; emoji style; # TAURUS +264A FE0E; text style; # GEMINI +264A FE0F; emoji style; # GEMINI +264B FE0E; text style; # CANCER +264B FE0F; emoji style; # CANCER +264C FE0E; text style; # LEO +264C FE0F; emoji style; # LEO +264D FE0E; text style; # VIRGO +264D FE0F; emoji style; # VIRGO +264E FE0E; text style; # LIBRA +264E FE0F; emoji style; # LIBRA +264F FE0E; text style; # SCORPIUS +264F FE0F; emoji style; # SCORPIUS +2650 FE0E; text style; # SAGITTARIUS +2650 FE0F; emoji style; # SAGITTARIUS +2651 FE0E; text style; # CAPRICORN +2651 FE0F; emoji style; # CAPRICORN +2652 FE0E; text style; # AQUARIUS +2652 FE0F; emoji style; # AQUARIUS +2653 FE0E; text style; # PISCES +2653 FE0F; emoji style; # PISCES +2660 FE0E; text style; # BLACK SPADE SUIT +2660 FE0F; emoji style; # BLACK SPADE SUIT +2663 FE0E; text style; # BLACK CLUB SUIT +2663 FE0F; emoji style; # BLACK CLUB SUIT +2665 FE0E; text style; # BLACK HEART SUIT +2665 FE0F; emoji style; # BLACK HEART SUIT +2666 FE0E; text style; # BLACK DIAMOND SUIT +2666 FE0F; emoji style; # BLACK DIAMOND SUIT +2668 FE0E; text style; # HOT SPRINGS +2668 FE0F; emoji style; # HOT SPRINGS +267B FE0E; text style; # BLACK UNIVERSAL RECYCLING SYMBOL +267B FE0F; emoji style; # BLACK UNIVERSAL RECYCLING SYMBOL +267F FE0E; text style; # WHEELCHAIR SYMBOL +267F FE0F; emoji style; # WHEELCHAIR SYMBOL +2693 FE0E; text style; # ANCHOR +2693 FE0F; emoji style; # ANCHOR +26A0 FE0E; text style; # WARNING SIGN +26A0 FE0F; emoji style; # WARNING SIGN +26A1 FE0E; text style; # HIGH VOLTAGE SIGN +26A1 FE0F; emoji style; # HIGH VOLTAGE SIGN +26AA FE0E; text style; # MEDIUM WHITE CIRCLE +26AA FE0F; emoji style; # MEDIUM WHITE CIRCLE +26AB FE0E; text style; # MEDIUM BLACK CIRCLE +26AB FE0F; emoji style; # MEDIUM BLACK CIRCLE +26BD FE0E; text style; # SOCCER BALL +26BD FE0F; emoji style; # SOCCER BALL +26BE FE0E; text style; # BASEBALL +26BE FE0F; emoji style; # BASEBALL +26C4 FE0E; text style; # SNOWMAN WITHOUT SNOW +26C4 FE0F; emoji style; # SNOWMAN WITHOUT SNOW +26C5 FE0E; text style; # SUN BEHIND CLOUD +26C5 FE0F; emoji style; # SUN BEHIND CLOUD +26D4 FE0E; text style; # NO ENTRY +26D4 FE0F; emoji style; # NO ENTRY +26EA FE0E; text style; # CHURCH +26EA FE0F; emoji style; # CHURCH +26F2 FE0E; text style; # FOUNTAIN +26F2 FE0F; emoji style; # FOUNTAIN +26F3 FE0E; text style; # FLAG IN HOLE +26F3 FE0F; emoji style; # FLAG IN HOLE +26F5 FE0E; text style; # SAILBOAT +26F5 FE0F; emoji style; # SAILBOAT +26FA FE0E; text style; # TENT +26FA FE0F; emoji style; # TENT +26FD FE0E; text style; # FUEL PUMP +26FD FE0F; emoji style; # FUEL PUMP +2702 FE0E; text style; # BLACK SCISSORS +2702 FE0F; emoji style; # BLACK SCISSORS +2708 FE0E; text style; # AIRPLANE +2708 FE0F; emoji style; # AIRPLANE +2709 FE0E; text style; # ENVELOPE +2709 FE0F; emoji style; # ENVELOPE +270C FE0E; text style; # VICTORY HAND +270C FE0F; emoji style; # VICTORY HAND +270F FE0E; text style; # PENCIL +270F FE0F; emoji style; # PENCIL +2712 FE0E; text style; # BLACK NIB +2712 FE0F; emoji style; # BLACK NIB +2714 FE0E; text style; # HEAVY CHECK MARK +2714 FE0F; emoji style; # HEAVY CHECK MARK +2716 FE0E; text style; # HEAVY MULTIPLICATION X +2716 FE0F; emoji style; # HEAVY MULTIPLICATION X +2733 FE0E; text style; # EIGHT SPOKED ASTERISK +2733 FE0F; emoji style; # EIGHT SPOKED ASTERISK +2734 FE0E; text style; # EIGHT POINTED BLACK STAR +2734 FE0F; emoji style; # EIGHT POINTED BLACK STAR +2744 FE0E; text style; # SNOWFLAKE +2744 FE0F; emoji style; # SNOWFLAKE +2747 FE0E; text style; # SPARKLE +2747 FE0F; emoji style; # SPARKLE +2757 FE0E; text style; # HEAVY EXCLAMATION MARK SYMBOL +2757 FE0F; emoji style; # HEAVY EXCLAMATION MARK SYMBOL +2764 FE0E; text style; # HEAVY BLACK HEART +2764 FE0F; emoji style; # HEAVY BLACK HEART +27A1 FE0E; text style; # BLACK RIGHTWARDS ARROW +27A1 FE0F; emoji style; # BLACK RIGHTWARDS ARROW +2934 FE0E; text style; # ARROW POINTING RIGHTWARDS THEN CURVING UPWARDS +2934 FE0F; emoji style; # ARROW POINTING RIGHTWARDS THEN CURVING UPWARDS +2935 FE0E; text style; # ARROW POINTING RIGHTWARDS THEN CURVING DOWNWARDS +2935 FE0F; emoji style; # ARROW POINTING RIGHTWARDS THEN CURVING DOWNWARDS +2B05 FE0E; text style; # LEFTWARDS BLACK ARROW +2B05 FE0F; emoji style; # LEFTWARDS BLACK ARROW +2B06 FE0E; text style; # UPWARDS BLACK ARROW +2B06 FE0F; emoji style; # UPWARDS BLACK ARROW +2B07 FE0E; text style; # DOWNWARDS BLACK ARROW +2B07 FE0F; emoji style; # DOWNWARDS BLACK ARROW +2B1B FE0E; text style; # BLACK LARGE SQUARE +2B1B FE0F; emoji style; # BLACK LARGE SQUARE +2B1C FE0E; text style; # WHITE LARGE SQUARE +2B1C FE0F; emoji style; # WHITE LARGE SQUARE +2B50 FE0E; text style; # WHITE MEDIUM STAR +2B50 FE0F; emoji style; # WHITE MEDIUM STAR +2B55 FE0E; text style; # HEAVY LARGE CIRCLE +2B55 FE0F; emoji style; # HEAVY LARGE CIRCLE +303D FE0E; text style; # PART ALTERNATION MARK +303D FE0F; emoji style; # PART ALTERNATION MARK +3297 FE0E; text style; # CIRCLED IDEOGRAPH CONGRATULATION +3297 FE0F; emoji style; # CIRCLED IDEOGRAPH CONGRATULATION +3299 FE0E; text style; # CIRCLED IDEOGRAPH SECRET +3299 FE0F; emoji style; # CIRCLED IDEOGRAPH SECRET +1F004 FE0E; text style; # MAHJONG TILE RED DRAGON +1F004 FE0F; emoji style; # MAHJONG TILE RED DRAGON +1F17F FE0E; text style; # NEGATIVE SQUARED LATIN CAPITAL LETTER P +1F17F FE0F; emoji style; # NEGATIVE SQUARED LATIN CAPITAL LETTER P +1F21A FE0E; text style; # SQUARED CJK UNIFIED IDEOGRAPH-7121 +1F21A FE0F; emoji style; # SQUARED CJK UNIFIED IDEOGRAPH-7121 +1F22F FE0E; text style; # SQUARED CJK UNIFIED IDEOGRAPH-6307 +1F22F FE0F; emoji style; # SQUARED CJK UNIFIED IDEOGRAPH-6307 + +# EOF diff --git a/lib/unicore/UnicodeData.txt b/lib/unicore/UnicodeData.txt index 8d7222b137..9f204050c6 100644 --- a/lib/unicore/UnicodeData.txt +++ b/lib/unicore/UnicodeData.txt @@ -165,10 +165,10 @@ 00A4;CURRENCY SIGN;Sc;0;ET;;;;;N;;;;; 00A5;YEN SIGN;Sc;0;ET;;;;;N;;;;; 00A6;BROKEN BAR;So;0;ON;;;;;N;BROKEN VERTICAL BAR;;;; -00A7;SECTION SIGN;So;0;ON;;;;;N;;;;; +00A7;SECTION SIGN;Po;0;ON;;;;;N;;;;; 00A8;DIAERESIS;Sk;0;ON;<compat> 0020 0308;;;;N;SPACING DIAERESIS;;;; 00A9;COPYRIGHT SIGN;So;0;ON;;;;;N;;;;; -00AA;FEMININE ORDINAL INDICATOR;Ll;0;L;<super> 0061;;;;N;;;;; +00AA;FEMININE ORDINAL INDICATOR;Lo;0;L;<super> 0061;;;;N;;;;; 00AB;LEFT-POINTING DOUBLE ANGLE QUOTATION MARK;Pi;0;ON;;;;;Y;LEFT POINTING GUILLEMET;;;; 00AC;NOT SIGN;Sm;0;ON;;;;;N;;;;; 00AD;SOFT HYPHEN;Cf;0;BN;;;;;N;;;;; @@ -180,11 +180,11 @@ 00B3;SUPERSCRIPT THREE;No;0;EN;<super> 0033;;3;3;N;SUPERSCRIPT DIGIT THREE;;;; 00B4;ACUTE ACCENT;Sk;0;ON;<compat> 0020 0301;;;;N;SPACING ACUTE;;;; 00B5;MICRO SIGN;Ll;0;L;<compat> 03BC;;;;N;;;039C;;039C -00B6;PILCROW SIGN;So;0;ON;;;;;N;PARAGRAPH SIGN;;;; +00B6;PILCROW SIGN;Po;0;ON;;;;;N;PARAGRAPH SIGN;;;; 00B7;MIDDLE DOT;Po;0;ON;;;;;N;;;;; 00B8;CEDILLA;Sk;0;ON;<compat> 0020 0327;;;;N;SPACING CEDILLA;;;; 00B9;SUPERSCRIPT ONE;No;0;EN;<super> 0031;;1;1;N;SUPERSCRIPT DIGIT ONE;;;; -00BA;MASCULINE ORDINAL INDICATOR;Ll;0;L;<super> 006F;;;;N;;;;; +00BA;MASCULINE ORDINAL INDICATOR;Lo;0;L;<super> 006F;;;;N;;;;; 00BB;RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK;Pf;0;ON;;;;;Y;RIGHT POINTING GUILLEMET;;;; 00BC;VULGAR FRACTION ONE QUARTER;No;0;ON;<fraction> 0031 2044 0034;;;1/4;N;FRACTION ONE QUARTER;;;; 00BD;VULGAR FRACTION ONE HALF;No;0;ON;<fraction> 0031 2044 0032;;;1/2;N;FRACTION ONE HALF;;;; @@ -612,7 +612,7 @@ 0263;LATIN SMALL LETTER GAMMA;Ll;0;L;;;;;N;;;0194;;0194 0264;LATIN SMALL LETTER RAMS HORN;Ll;0;L;;;;;N;LATIN SMALL LETTER BABY GAMMA;;;; 0265;LATIN SMALL LETTER TURNED H;Ll;0;L;;;;;N;;;A78D;;A78D -0266;LATIN SMALL LETTER H WITH HOOK;Ll;0;L;;;;;N;LATIN SMALL LETTER H HOOK;;;; +0266;LATIN SMALL LETTER H WITH HOOK;Ll;0;L;;;;;N;LATIN SMALL LETTER H HOOK;;A7AA;;A7AA 0267;LATIN SMALL LETTER HENG WITH HOOK;Ll;0;L;;;;;N;LATIN SMALL LETTER HENG HOOK;;;; 0268;LATIN SMALL LETTER I WITH STROKE;Ll;0;L;;;;;N;LATIN SMALL LETTER BARRED I;;0197;;0197 0269;LATIN SMALL LETTER IOTA;Ll;0;L;;;;;N;;;0196;;0196 @@ -1394,6 +1394,7 @@ 0587;ARMENIAN SMALL LIGATURE ECH YIWN;Ll;0;L;<compat> 0565 0582;;;;N;;;;; 0589;ARMENIAN FULL STOP;Po;0;L;;;;;N;ARMENIAN PERIOD;;;; 058A;ARMENIAN HYPHEN;Pd;0;ON;;;;;N;;;;; +058F;ARMENIAN DRAM SIGN;Sc;0;ET;;;;;N;;;;; 0591;HEBREW ACCENT ETNAHTA;Mn;220;NSM;;;;;N;;;;; 0592;HEBREW ACCENT SEGOL;Mn;230;NSM;;;;;N;;;;; 0593;HEBREW ACCENT SHALSHELET;Mn;230;NSM;;;;;N;;;;; @@ -1485,6 +1486,7 @@ 0601;ARABIC SIGN SANAH;Cf;0;AN;;;;;N;;;;; 0602;ARABIC FOOTNOTE MARKER;Cf;0;AN;;;;;N;;;;; 0603;ARABIC SIGN SAFHA;Cf;0;AN;;;;;N;;;;; +0604;ARABIC SIGN SAMVAT;Cf;0;AN;;;;;N;;;;; 0606;ARABIC-INDIC CUBE ROOT;Sm;0;ON;;;;;N;;;;; 0607;ARABIC-INDIC FOURTH ROOT;Sm;0;ON;;;;;N;;;;; 0608;ARABIC RAY;Sm;0;AL;;;;;N;;;;; @@ -1747,7 +1749,7 @@ 070B;SYRIAC HARKLEAN OBELUS;Po;0;AL;;;;;N;;;;; 070C;SYRIAC HARKLEAN METOBELUS;Po;0;AL;;;;;N;;;;; 070D;SYRIAC HARKLEAN ASTERISCUS;Po;0;AL;;;;;N;;;;; -070F;SYRIAC ABBREVIATION MARK;Cf;0;AN;;;;;N;;;;; +070F;SYRIAC ABBREVIATION MARK;Cf;0;AL;;;;;N;;;;; 0710;SYRIAC LETTER ALAPH;Lo;0;AL;;;;;N;;;;; 0711;SYRIAC LETTER SUPERSCRIPT ALAPH;Mn;36;NSM;;;;;N;;;;; 0712;SYRIAC LETTER BETH;Lo;0;AL;;;;;N;;;;; @@ -2057,6 +2059,45 @@ 085A;MANDAIC VOCALIZATION MARK;Mn;220;NSM;;;;;N;;;;; 085B;MANDAIC GEMINATION MARK;Mn;220;NSM;;;;;N;;;;; 085E;MANDAIC PUNCTUATION;Po;0;R;;;;;N;;;;; +08A0;ARABIC LETTER BEH WITH SMALL V BELOW;Lo;0;AL;;;;;N;;;;; +08A2;ARABIC LETTER JEEM WITH TWO DOTS ABOVE;Lo;0;AL;;;;;N;;;;; +08A3;ARABIC LETTER TAH WITH TWO DOTS ABOVE;Lo;0;AL;;;;;N;;;;; +08A4;ARABIC LETTER FEH WITH DOT BELOW AND THREE DOTS ABOVE;Lo;0;AL;;;;;N;;;;; +08A5;ARABIC LETTER QAF WITH DOT BELOW;Lo;0;AL;;;;;N;;;;; +08A6;ARABIC LETTER LAM WITH DOUBLE BAR;Lo;0;AL;;;;;N;;;;; +08A7;ARABIC LETTER MEEM WITH THREE DOTS ABOVE;Lo;0;AL;;;;;N;;;;; +08A8;ARABIC LETTER YEH WITH TWO DOTS BELOW AND HAMZA ABOVE;Lo;0;AL;;;;;N;;;;; +08A9;ARABIC LETTER YEH WITH TWO DOTS BELOW AND DOT ABOVE;Lo;0;AL;;;;;N;;;;; +08AA;ARABIC LETTER REH WITH LOOP;Lo;0;AL;;;;;N;;;;; +08AB;ARABIC LETTER WAW WITH DOT WITHIN;Lo;0;AL;;;;;N;;;;; +08AC;ARABIC LETTER ROHINGYA YEH;Lo;0;AL;;;;;N;;;;; +08E4;ARABIC CURLY FATHA;Mn;230;NSM;;;;;N;;;;; +08E5;ARABIC CURLY DAMMA;Mn;230;NSM;;;;;N;;;;; +08E6;ARABIC CURLY KASRA;Mn;220;NSM;;;;;N;;;;; +08E7;ARABIC CURLY FATHATAN;Mn;230;NSM;;;;;N;;;;; +08E8;ARABIC CURLY DAMMATAN;Mn;230;NSM;;;;;N;;;;; +08E9;ARABIC CURLY KASRATAN;Mn;220;NSM;;;;;N;;;;; +08EA;ARABIC TONE ONE DOT ABOVE;Mn;230;NSM;;;;;N;;;;; +08EB;ARABIC TONE TWO DOTS ABOVE;Mn;230;NSM;;;;;N;;;;; +08EC;ARABIC TONE LOOP ABOVE;Mn;230;NSM;;;;;N;;;;; +08ED;ARABIC TONE ONE DOT BELOW;Mn;220;NSM;;;;;N;;;;; +08EE;ARABIC TONE TWO DOTS BELOW;Mn;220;NSM;;;;;N;;;;; +08EF;ARABIC TONE LOOP BELOW;Mn;220;NSM;;;;;N;;;;; +08F0;ARABIC OPEN FATHATAN;Mn;27;NSM;;;;;N;;;;; +08F1;ARABIC OPEN DAMMATAN;Mn;28;NSM;;;;;N;;;;; +08F2;ARABIC OPEN KASRATAN;Mn;29;NSM;;;;;N;;;;; +08F3;ARABIC SMALL HIGH WAW;Mn;230;NSM;;;;;N;;;;; +08F4;ARABIC FATHA WITH RING;Mn;230;NSM;;;;;N;;;;; +08F5;ARABIC FATHA WITH DOT ABOVE;Mn;230;NSM;;;;;N;;;;; +08F6;ARABIC KASRA WITH DOT BELOW;Mn;220;NSM;;;;;N;;;;; +08F7;ARABIC LEFT ARROWHEAD ABOVE;Mn;230;NSM;;;;;N;;;;; +08F8;ARABIC RIGHT ARROWHEAD ABOVE;Mn;230;NSM;;;;;N;;;;; +08F9;ARABIC LEFT ARROWHEAD BELOW;Mn;220;NSM;;;;;N;;;;; +08FA;ARABIC RIGHT ARROWHEAD BELOW;Mn;220;NSM;;;;;N;;;;; +08FB;ARABIC DOUBLE RIGHT ARROWHEAD ABOVE;Mn;230;NSM;;;;;N;;;;; +08FC;ARABIC DOUBLE RIGHT ARROWHEAD ABOVE WITH DOT;Mn;230;NSM;;;;;N;;;;; +08FD;ARABIC RIGHT ARROWHEAD ABOVE WITH DOT;Mn;230;NSM;;;;;N;;;;; +08FE;ARABIC DAMMA WITH DOT;Mn;230;NSM;;;;;N;;;;; 0900;DEVANAGARI SIGN INVERTED CANDRABINDU;Mn;0;NSM;;;;;N;;;;; 0901;DEVANAGARI SIGN CANDRABINDU;Mn;0;NSM;;;;;N;;;;; 0902;DEVANAGARI SIGN ANUSVARA;Mn;0;NSM;;;;;N;;;;; @@ -2437,6 +2478,7 @@ 0AED;GUJARATI DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;; 0AEE;GUJARATI DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;; 0AEF;GUJARATI DIGIT NINE;Nd;0;L;;9;9;9;N;;;;; +0AF0;GUJARATI ABBREVIATION SIGN;Po;0;L;;;;;N;;;;; 0AF1;GUJARATI RUPEE SIGN;Sc;0;ET;;;;;N;;;;; 0B01;ORIYA SIGN CANDRABINDU;Mn;0;NSM;;;;;N;;;;; 0B02;ORIYA SIGN ANUSVARA;Mc;0;L;;;;;N;;;;; @@ -3109,6 +3151,8 @@ 0ED9;LAO DIGIT NINE;Nd;0;L;;9;9;9;N;;;;; 0EDC;LAO HO NO;Lo;0;L;<compat> 0EAB 0E99;;;;N;;;;; 0EDD;LAO HO MO;Lo;0;L;<compat> 0EAB 0EA1;;;;N;;;;; +0EDE;LAO LETTER KHMU GO;Lo;0;L;;;;;N;;;;; +0EDF;LAO LETTER KHMU NYO;Lo;0;L;;;;;N;;;;; 0F00;TIBETAN SYLLABLE OM;Lo;0;L;;;;;N;;;;; 0F01;TIBETAN MARK GTER YIG MGO TRUNCATED A;So;0;L;;;;;N;;;;; 0F02;TIBETAN MARK GTER YIG MGO -UM RNAM BCAD MA;So;0;L;;;;;N;;;;; @@ -3129,7 +3173,7 @@ 0F11;TIBETAN MARK RIN CHEN SPUNGS SHAD;Po;0;L;;;;;N;TIBETAN RINCHANPHUNGSHAD;;;; 0F12;TIBETAN MARK RGYA GRAM SHAD;Po;0;L;;;;;N;;;;; 0F13;TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN;So;0;L;;;;;N;;;;; -0F14;TIBETAN MARK GTER TSHEG;So;0;L;;;;;N;TIBETAN COMMA;;;; +0F14;TIBETAN MARK GTER TSHEG;Po;0;L;;;;;N;TIBETAN COMMA;;;; 0F15;TIBETAN LOGOTYPE SIGN CHAD RTAGS;So;0;L;;;;;N;;;;; 0F16;TIBETAN LOGOTYPE SIGN LHAG RTAGS;So;0;L;;;;;N;;;;; 0F17;TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS;So;0;L;;;;;N;;;;; @@ -3518,6 +3562,8 @@ 10C3;GEORGIAN CAPITAL LETTER WE;Lu;0;L;;;;;N;;;;2D23; 10C4;GEORGIAN CAPITAL LETTER HAR;Lu;0;L;;;;;N;;;;2D24; 10C5;GEORGIAN CAPITAL LETTER HOE;Lu;0;L;;;;;N;;;;2D25; +10C7;GEORGIAN CAPITAL LETTER YN;Lu;0;L;;;;;N;;;;2D27; +10CD;GEORGIAN CAPITAL LETTER AEN;Lu;0;L;;;;;N;;;;2D2D; 10D0;GEORGIAN LETTER AN;Lo;0;L;;;;;N;GEORGIAN SMALL LETTER AN;;;; 10D1;GEORGIAN LETTER BAN;Lo;0;L;;;;;N;GEORGIAN SMALL LETTER BAN;;;; 10D2;GEORGIAN LETTER GAN;Lo;0;L;;;;;N;GEORGIAN SMALL LETTER GAN;;;; @@ -3563,6 +3609,9 @@ 10FA;GEORGIAN LETTER AIN;Lo;0;L;;;;;N;;;;; 10FB;GEORGIAN PARAGRAPH SEPARATOR;Po;0;L;;;;;N;;;;; 10FC;MODIFIER LETTER GEORGIAN NAR;Lm;0;L;<super> 10DC;;;;N;;;;; +10FD;GEORGIAN LETTER AEN;Lo;0;L;;;;;N;;;;; +10FE;GEORGIAN LETTER HARD SIGN;Lo;0;L;;;;;N;;;;; +10FF;GEORGIAN LETTER LABIAL SIGN;Lo;0;L;;;;;N;;;;; 1100;HANGUL CHOSEONG KIYEOK;Lo;0;L;;;;;N;;;;; 1101;HANGUL CHOSEONG SSANGKIYEOK;Lo;0;L;;;;;N;;;;; 1102;HANGUL CHOSEONG NIEUN;Lo;0;L;;;;;N;;;;; @@ -4148,7 +4197,7 @@ 135D;ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK;Mn;230;NSM;;;;;N;;;;; 135E;ETHIOPIC COMBINING VOWEL LENGTH MARK;Mn;230;NSM;;;;;N;;;;; 135F;ETHIOPIC COMBINING GEMINATION MARK;Mn;230;NSM;;;;;N;;;;; -1360;ETHIOPIC SECTION MARK;So;0;L;;;;;N;;;;; +1360;ETHIOPIC SECTION MARK;Po;0;L;;;;;N;;;;; 1361;ETHIOPIC WORDSPACE;Po;0;L;;;;;N;;;;; 1362;ETHIOPIC FULL STOP;Po;0;L;;;;;N;;;;; 1363;ETHIOPIC COMMA;Po;0;L;;;;;N;;;;; @@ -5171,8 +5220,8 @@ 17B1;KHMER INDEPENDENT VOWEL QOO TYPE ONE;Lo;0;L;;;;;N;;;;; 17B2;KHMER INDEPENDENT VOWEL QOO TYPE TWO;Lo;0;L;;;;;N;;;;; 17B3;KHMER INDEPENDENT VOWEL QAU;Lo;0;L;;;;;N;;;;; -17B4;KHMER VOWEL INHERENT AQ;Cf;0;L;;;;;N;;;;; -17B5;KHMER VOWEL INHERENT AA;Cf;0;L;;;;;N;;;;; +17B4;KHMER VOWEL INHERENT AQ;Mn;0;NSM;;;;;N;;;;; +17B5;KHMER VOWEL INHERENT AA;Mn;0;NSM;;;;;N;;;;; 17B6;KHMER VOWEL SIGN AA;Mc;0;L;;;;;N;;;;; 17B7;KHMER VOWEL SIGN I;Mn;0;NSM;;;;;N;;;;; 17B8;KHMER VOWEL SIGN II;Mn;0;NSM;;;;;N;;;;; @@ -5996,6 +6045,9 @@ 1BA8;SUNDANESE VOWEL SIGN PAMEPET;Mn;0;NSM;;;;;N;;;;; 1BA9;SUNDANESE VOWEL SIGN PANEULEUNG;Mn;0;NSM;;;;;N;;;;; 1BAA;SUNDANESE SIGN PAMAAEH;Mc;9;L;;;;;N;;;;; +1BAB;SUNDANESE SIGN VIRAMA;Mn;9;NSM;;;;;N;;;;; +1BAC;SUNDANESE CONSONANT SIGN PASANGAN MA;Mc;0;L;;;;;N;;;;; +1BAD;SUNDANESE CONSONANT SIGN PASANGAN WA;Mc;0;L;;;;;N;;;;; 1BAE;SUNDANESE LETTER KHA;Lo;0;L;;;;;N;;;;; 1BAF;SUNDANESE LETTER SYA;Lo;0;L;;;;;N;;;;; 1BB0;SUNDANESE DIGIT ZERO;Nd;0;L;;0;0;0;N;;;;; @@ -6008,6 +6060,12 @@ 1BB7;SUNDANESE DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;; 1BB8;SUNDANESE DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;; 1BB9;SUNDANESE DIGIT NINE;Nd;0;L;;9;9;9;N;;;;; +1BBA;SUNDANESE AVAGRAHA;Lo;0;L;;;;;N;;;;; +1BBB;SUNDANESE LETTER REU;Lo;0;L;;;;;N;;;;; +1BBC;SUNDANESE LETTER LEU;Lo;0;L;;;;;N;;;;; +1BBD;SUNDANESE LETTER BHA;Lo;0;L;;;;;N;;;;; +1BBE;SUNDANESE LETTER FINAL K;Lo;0;L;;;;;N;;;;; +1BBF;SUNDANESE LETTER FINAL M;Lo;0;L;;;;;N;;;;; 1BC0;BATAK LETTER A;Lo;0;L;;;;;N;;;;; 1BC1;BATAK LETTER SIMALUNGUN A;Lo;0;L;;;;;N;;;;; 1BC2;BATAK LETTER HA;Lo;0;L;;;;;N;;;;; @@ -6186,6 +6244,14 @@ 1C7D;OL CHIKI AHAD;Lm;0;L;;;;;N;;;;; 1C7E;OL CHIKI PUNCTUATION MUCAAD;Po;0;L;;;;;N;;;;; 1C7F;OL CHIKI PUNCTUATION DOUBLE MUCAAD;Po;0;L;;;;;N;;;;; +1CC0;SUNDANESE PUNCTUATION BINDU SURYA;Po;0;L;;;;;N;;;;; +1CC1;SUNDANESE PUNCTUATION BINDU PANGLONG;Po;0;L;;;;;N;;;;; +1CC2;SUNDANESE PUNCTUATION BINDU PURNAMA;Po;0;L;;;;;N;;;;; +1CC3;SUNDANESE PUNCTUATION BINDU CAKRA;Po;0;L;;;;;N;;;;; +1CC4;SUNDANESE PUNCTUATION BINDU LEU SATANGA;Po;0;L;;;;;N;;;;; +1CC5;SUNDANESE PUNCTUATION BINDU KA SATANGA;Po;0;L;;;;;N;;;;; +1CC6;SUNDANESE PUNCTUATION BINDU DA SATANGA;Po;0;L;;;;;N;;;;; +1CC7;SUNDANESE PUNCTUATION BINDU BA SATANGA;Po;0;L;;;;;N;;;;; 1CD0;VEDIC TONE KARSHANA;Mn;230;NSM;;;;;N;;;;; 1CD1;VEDIC TONE SHARA;Mn;230;NSM;;;;;N;;;;; 1CD2;VEDIC TONE PRENKHA;Mn;230;NSM;;;;;N;;;;; @@ -6221,6 +6287,10 @@ 1CF0;VEDIC SIGN RTHANG LONG ANUSVARA;Lo;0;L;;;;;N;;;;; 1CF1;VEDIC SIGN ANUSVARA UBHAYATO MUKHA;Lo;0;L;;;;;N;;;;; 1CF2;VEDIC SIGN ARDHAVISARGA;Mc;0;L;;;;;N;;;;; +1CF3;VEDIC SIGN ROTATED ARDHAVISARGA;Mc;0;L;;;;;N;;;;; +1CF4;VEDIC TONE CANDRA ABOVE;Mn;230;NSM;;;;;N;;;;; +1CF5;VEDIC SIGN JIHVAMULIYA;Lo;0;L;;;;;N;;;;; +1CF6;VEDIC SIGN UPADHMANIYA;Lo;0;L;;;;;N;;;;; 1D00;LATIN LETTER SMALL CAPITAL A;Ll;0;L;;;;;N;;;;; 1D01;LATIN LETTER SMALL CAPITAL AE;Ll;0;L;;;;;N;;;;; 1D02;LATIN SMALL LETTER TURNED AE;Ll;0;L;;;;;N;;;;; @@ -6319,15 +6389,15 @@ 1D5F;MODIFIER LETTER SMALL DELTA;Lm;0;L;<super> 03B4;;;;N;;;;; 1D60;MODIFIER LETTER SMALL GREEK PHI;Lm;0;L;<super> 03C6;;;;N;;;;; 1D61;MODIFIER LETTER SMALL CHI;Lm;0;L;<super> 03C7;;;;N;;;;; -1D62;LATIN SUBSCRIPT SMALL LETTER I;Ll;0;L;<sub> 0069;;;;N;;;;; -1D63;LATIN SUBSCRIPT SMALL LETTER R;Ll;0;L;<sub> 0072;;;;N;;;;; -1D64;LATIN SUBSCRIPT SMALL LETTER U;Ll;0;L;<sub> 0075;;;;N;;;;; -1D65;LATIN SUBSCRIPT SMALL LETTER V;Ll;0;L;<sub> 0076;;;;N;;;;; -1D66;GREEK SUBSCRIPT SMALL LETTER BETA;Ll;0;L;<sub> 03B2;;;;N;;;;; -1D67;GREEK SUBSCRIPT SMALL LETTER GAMMA;Ll;0;L;<sub> 03B3;;;;N;;;;; -1D68;GREEK SUBSCRIPT SMALL LETTER RHO;Ll;0;L;<sub> 03C1;;;;N;;;;; -1D69;GREEK SUBSCRIPT SMALL LETTER PHI;Ll;0;L;<sub> 03C6;;;;N;;;;; -1D6A;GREEK SUBSCRIPT SMALL LETTER CHI;Ll;0;L;<sub> 03C7;;;;N;;;;; +1D62;LATIN SUBSCRIPT SMALL LETTER I;Lm;0;L;<sub> 0069;;;;N;;;;; +1D63;LATIN SUBSCRIPT SMALL LETTER R;Lm;0;L;<sub> 0072;;;;N;;;;; +1D64;LATIN SUBSCRIPT SMALL LETTER U;Lm;0;L;<sub> 0075;;;;N;;;;; +1D65;LATIN SUBSCRIPT SMALL LETTER V;Lm;0;L;<sub> 0076;;;;N;;;;; +1D66;GREEK SUBSCRIPT SMALL LETTER BETA;Lm;0;L;<sub> 03B2;;;;N;;;;; +1D67;GREEK SUBSCRIPT SMALL LETTER GAMMA;Lm;0;L;<sub> 03B3;;;;N;;;;; +1D68;GREEK SUBSCRIPT SMALL LETTER RHO;Lm;0;L;<sub> 03C1;;;;N;;;;; +1D69;GREEK SUBSCRIPT SMALL LETTER PHI;Lm;0;L;<sub> 03C6;;;;N;;;;; +1D6A;GREEK SUBSCRIPT SMALL LETTER CHI;Lm;0;L;<sub> 03C7;;;;N;;;;; 1D6B;LATIN SMALL LETTER UE;Ll;0;L;;;;;N;;;;; 1D6C;LATIN SMALL LETTER B WITH MIDDLE TILDE;Ll;0;L;;;;;N;;;;; 1D6D;LATIN SMALL LETTER D WITH MIDDLE TILDE;Ll;0;L;;;;;N;;;;; @@ -8827,7 +8897,9 @@ 27C8;REVERSE SOLIDUS PRECEDING SUBSET;Sm;0;ON;;;;;Y;;;;; 27C9;SUPERSET PRECEDING SOLIDUS;Sm;0;ON;;;;;Y;;;;; 27CA;VERTICAL BAR WITH HORIZONTAL STROKE;Sm;0;ON;;;;;N;;;;; +27CB;MATHEMATICAL RISING DIAGONAL;Sm;0;ON;;;;;Y;;;;; 27CC;LONG DIVISION;Sm;0;ON;;;;;Y;;;;; +27CD;MATHEMATICAL FALLING DIAGONAL;Sm;0;ON;;;;;Y;;;;; 27CE;SQUARED LOGICAL AND;Sm;0;ON;;;;;N;;;;; 27CF;SQUARED LOGICAL OR;Sm;0;ON;;;;;N;;;;; 27D0;WHITE DIAMOND WITH CENTRED DOT;Sm;0;ON;;;;;N;;;;; @@ -9855,7 +9927,7 @@ 2C79;LATIN SMALL LETTER TURNED R WITH TAIL;Ll;0;L;;;;;N;;;;; 2C7A;LATIN SMALL LETTER O WITH LOW RING INSIDE;Ll;0;L;;;;;N;;;;; 2C7B;LATIN LETTER SMALL CAPITAL TURNED E;Ll;0;L;;;;;N;;;;; -2C7C;LATIN SUBSCRIPT SMALL LETTER J;Ll;0;L;<sub> 006A;;;;N;;;;; +2C7C;LATIN SUBSCRIPT SMALL LETTER J;Lm;0;L;<sub> 006A;;;;N;;;;; 2C7D;MODIFIER LETTER CAPITAL V;Lm;0;L;<super> 0056;;;;N;;;;; 2C7E;LATIN CAPITAL LETTER S WITH SWASH TAIL;Lu;0;L;;;;;N;;;;023F; 2C7F;LATIN CAPITAL LETTER Z WITH SWASH TAIL;Lu;0;L;;;;;N;;;;0240; @@ -9973,6 +10045,8 @@ 2CEF;COPTIC COMBINING NI ABOVE;Mn;230;NSM;;;;;N;;;;; 2CF0;COPTIC COMBINING SPIRITUS ASPER;Mn;230;NSM;;;;;N;;;;; 2CF1;COPTIC COMBINING SPIRITUS LENIS;Mn;230;NSM;;;;;N;;;;; +2CF2;COPTIC CAPITAL LETTER BOHAIRIC KHEI;Lu;0;L;;;;;N;;;;2CF3; +2CF3;COPTIC SMALL LETTER BOHAIRIC KHEI;Ll;0;L;;;;;N;;;2CF2;;2CF2 2CF9;COPTIC OLD NUBIAN FULL STOP;Po;0;ON;;;;;N;;;;; 2CFA;COPTIC OLD NUBIAN DIRECT QUESTION MARK;Po;0;ON;;;;;N;;;;; 2CFB;COPTIC OLD NUBIAN INDIRECT QUESTION MARK;Po;0;ON;;;;;N;;;;; @@ -10018,6 +10092,8 @@ 2D23;GEORGIAN SMALL LETTER WE;Ll;0;L;;;;;N;;;10C3;;10C3 2D24;GEORGIAN SMALL LETTER HAR;Ll;0;L;;;;;N;;;10C4;;10C4 2D25;GEORGIAN SMALL LETTER HOE;Ll;0;L;;;;;N;;;10C5;;10C5 +2D27;GEORGIAN SMALL LETTER YN;Ll;0;L;;;;;N;;;10C7;;10C7 +2D2D;GEORGIAN SMALL LETTER AEN;Ll;0;L;;;;;N;;;10CD;;10CD 2D30;TIFINAGH LETTER YA;Lo;0;L;;;;;N;;;;; 2D31;TIFINAGH LETTER YAB;Lo;0;L;;;;;N;;;;; 2D32;TIFINAGH LETTER YABH;Lo;0;L;;;;;N;;;;; @@ -10072,6 +10148,8 @@ 2D63;TIFINAGH LETTER YAZ;Lo;0;L;;;;;N;;;;; 2D64;TIFINAGH LETTER TAWELLEMET YAZ;Lo;0;L;;;;;N;;;;; 2D65;TIFINAGH LETTER YAZZ;Lo;0;L;;;;;N;;;;; +2D66;TIFINAGH LETTER YE;Lo;0;L;;;;;N;;;;; +2D67;TIFINAGH LETTER YO;Lo;0;L;;;;;N;;;;; 2D6F;TIFINAGH MODIFIER LETTER LABIALIZATION MARK;Lm;0;L;<super> 2D61;;;;N;;;;; 2D70;TIFINAGH SEPARATOR MARK;Po;0;L;;;;;N;;;;; 2D7F;TIFINAGH CONSONANT JOINER;Mn;9;NSM;;;;;N;;;;; @@ -10236,6 +10314,16 @@ 2E2F;VERTICAL TILDE;Lm;0;ON;;;;;N;;;;; 2E30;RING POINT;Po;0;ON;;;;;N;;;;; 2E31;WORD SEPARATOR MIDDLE DOT;Po;0;ON;;;;;N;;;;; +2E32;TURNED COMMA;Po;0;ON;;;;;N;;;;; +2E33;RAISED DOT;Po;0;ON;;;;;N;;;;; +2E34;RAISED COMMA;Po;0;ON;;;;;N;;;;; +2E35;TURNED SEMICOLON;Po;0;ON;;;;;N;;;;; +2E36;DAGGER WITH LEFT GUARD;Po;0;ON;;;;;N;;;;; +2E37;DAGGER WITH RIGHT GUARD;Po;0;ON;;;;;N;;;;; +2E38;TURNED DAGGER;Po;0;ON;;;;;N;;;;; +2E39;TOP HALF SECTION SIGN;Po;0;ON;;;;;N;;;;; +2E3A;TWO-EM DASH;Pd;0;ON;;;;;N;;;;; +2E3B;THREE-EM DASH;Pd;0;ON;;;;;N;;;;; 2E80;CJK RADICAL REPEAT;So;0;ON;;;;;N;;;;; 2E81;CJK RADICAL CLIFF;So;0;ON;;;;;N;;;;; 2E82;CJK RADICAL SECOND ONE;So;0;ON;;;;;N;;;;; @@ -10623,8 +10711,8 @@ 302B;IDEOGRAPHIC RISING TONE MARK;Mn;228;NSM;;;;;N;;;;; 302C;IDEOGRAPHIC DEPARTING TONE MARK;Mn;232;NSM;;;;;N;;;;; 302D;IDEOGRAPHIC ENTERING TONE MARK;Mn;222;NSM;;;;;N;;;;; -302E;HANGUL SINGLE DOT TONE MARK;Mn;224;NSM;;;;;N;;;;; -302F;HANGUL DOUBLE DOT TONE MARK;Mn;224;NSM;;;;;N;;;;; +302E;HANGUL SINGLE DOT TONE MARK;Mc;224;L;;;;;N;;;;; +302F;HANGUL DOUBLE DOT TONE MARK;Mc;224;L;;;;;N;;;;; 3030;WAVY DASH;Pd;0;ON;;;;;N;;;;; 3031;VERTICAL KANA REPEAT MARK;Lm;0;L;;;;;N;;;;; 3032;VERTICAL KANA REPEAT WITH VOICED SOUND MARK;Lm;0;L;;;;;N;;;;; @@ -11131,14 +11219,14 @@ 3245;CIRCLED IDEOGRAPH KINDERGARTEN;So;0;L;<circle> 5E7C;;;;N;;;;; 3246;CIRCLED IDEOGRAPH SCHOOL;So;0;L;<circle> 6587;;;;N;;;;; 3247;CIRCLED IDEOGRAPH KOTO;So;0;L;<circle> 7B8F;;;;N;;;;; -3248;CIRCLED NUMBER TEN ON BLACK SQUARE;So;0;L;;;;;N;;;;; -3249;CIRCLED NUMBER TWENTY ON BLACK SQUARE;So;0;L;;;;;N;;;;; -324A;CIRCLED NUMBER THIRTY ON BLACK SQUARE;So;0;L;;;;;N;;;;; -324B;CIRCLED NUMBER FORTY ON BLACK SQUARE;So;0;L;;;;;N;;;;; -324C;CIRCLED NUMBER FIFTY ON BLACK SQUARE;So;0;L;;;;;N;;;;; -324D;CIRCLED NUMBER SIXTY ON BLACK SQUARE;So;0;L;;;;;N;;;;; -324E;CIRCLED NUMBER SEVENTY ON BLACK SQUARE;So;0;L;;;;;N;;;;; -324F;CIRCLED NUMBER EIGHTY ON BLACK SQUARE;So;0;L;;;;;N;;;;; +3248;CIRCLED NUMBER TEN ON BLACK SQUARE;No;0;L;;;;10;N;;;;; +3249;CIRCLED NUMBER TWENTY ON BLACK SQUARE;No;0;L;;;;20;N;;;;; +324A;CIRCLED NUMBER THIRTY ON BLACK SQUARE;No;0;L;;;;30;N;;;;; +324B;CIRCLED NUMBER FORTY ON BLACK SQUARE;No;0;L;;;;40;N;;;;; +324C;CIRCLED NUMBER FIFTY ON BLACK SQUARE;No;0;L;;;;50;N;;;;; +324D;CIRCLED NUMBER SIXTY ON BLACK SQUARE;No;0;L;;;;60;N;;;;; +324E;CIRCLED NUMBER SEVENTY ON BLACK SQUARE;No;0;L;;;;70;N;;;;; +324F;CIRCLED NUMBER EIGHTY ON BLACK SQUARE;No;0;L;;;;80;N;;;;; 3250;PARTNERSHIP SIGN;So;0;ON;<square> 0050 0054 0045;;;;N;;;;; 3251;CIRCLED NUMBER TWENTY ONE;No;0;ON;<circle> 0032 0031;;;21;N;;;;; 3252;CIRCLED NUMBER TWENTY TWO;No;0;ON;<circle> 0032 0032;;;22;N;;;;; @@ -11637,7 +11725,7 @@ 4DFE;HEXAGRAM FOR AFTER COMPLETION;So;0;ON;;;;;N;;;;; 4DFF;HEXAGRAM FOR BEFORE COMPLETION;So;0;ON;;;;;N;;;;; 4E00;<CJK Ideograph, First>;Lo;0;L;;;;;N;;;;; -9FCB;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;; +9FCC;<CJK Ideograph, Last>;Lo;0;L;;;;;N;;;;; A000;YI SYLLABLE IT;Lo;0;L;;;;;N;;;;; A001;YI SYLLABLE IX;Lo;0;L;;;;;N;;;;; A002;YI SYLLABLE I;Lo;0;L;;;;;N;;;;; @@ -13258,6 +13346,14 @@ A670;COMBINING CYRILLIC TEN MILLIONS SIGN;Me;0;NSM;;;;;N;;;;; A671;COMBINING CYRILLIC HUNDRED MILLIONS SIGN;Me;0;NSM;;;;;N;;;;; A672;COMBINING CYRILLIC THOUSAND MILLIONS SIGN;Me;0;NSM;;;;;N;;;;; A673;SLAVONIC ASTERISK;Po;0;ON;;;;;N;;;;; +A674;COMBINING CYRILLIC LETTER UKRAINIAN IE;Mn;230;NSM;;;;;N;;;;; +A675;COMBINING CYRILLIC LETTER I;Mn;230;NSM;;;;;N;;;;; +A676;COMBINING CYRILLIC LETTER YI;Mn;230;NSM;;;;;N;;;;; +A677;COMBINING CYRILLIC LETTER U;Mn;230;NSM;;;;;N;;;;; +A678;COMBINING CYRILLIC LETTER HARD SIGN;Mn;230;NSM;;;;;N;;;;; +A679;COMBINING CYRILLIC LETTER YERU;Mn;230;NSM;;;;;N;;;;; +A67A;COMBINING CYRILLIC LETTER SOFT SIGN;Mn;230;NSM;;;;;N;;;;; +A67B;COMBINING CYRILLIC LETTER OMEGA;Mn;230;NSM;;;;;N;;;;; A67C;COMBINING CYRILLIC KAVYKA;Mn;230;NSM;;;;;N;;;;; A67D;COMBINING CYRILLIC PAYEROK;Mn;230;NSM;;;;;N;;;;; A67E;CYRILLIC KAVYKA;Po;0;ON;;;;;N;;;;; @@ -13286,6 +13382,7 @@ A694;CYRILLIC CAPITAL LETTER HWE;Lu;0;L;;;;;N;;;;A695; A695;CYRILLIC SMALL LETTER HWE;Ll;0;L;;;;;N;;;A694;;A694 A696;CYRILLIC CAPITAL LETTER SHWE;Lu;0;L;;;;;N;;;;A697; A697;CYRILLIC SMALL LETTER SHWE;Ll;0;L;;;;;N;;;A696;;A696 +A69F;COMBINING CYRILLIC LETTER IOTIFIED E;Mn;230;NSM;;;;;N;;;;; A6A0;BAMUM LETTER A;Lo;0;L;;;;;N;;;;; A6A1;BAMUM LETTER KA;Lo;0;L;;;;;N;;;;; A6A2;BAMUM LETTER U;Lo;0;L;;;;;N;;;;; @@ -13519,6 +13616,8 @@ A78D;LATIN CAPITAL LETTER TURNED H;Lu;0;L;;;;;N;;;;0265; A78E;LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT;Ll;0;L;;;;;N;;;;; A790;LATIN CAPITAL LETTER N WITH DESCENDER;Lu;0;L;;;;;N;;;;A791; A791;LATIN SMALL LETTER N WITH DESCENDER;Ll;0;L;;;;;N;;;A790;;A790 +A792;LATIN CAPITAL LETTER C WITH BAR;Lu;0;L;;;;;N;;;;A793; +A793;LATIN SMALL LETTER C WITH BAR;Ll;0;L;;;;;N;;;A792;;A792 A7A0;LATIN CAPITAL LETTER G WITH OBLIQUE STROKE;Lu;0;L;;;;;N;;;;A7A1; A7A1;LATIN SMALL LETTER G WITH OBLIQUE STROKE;Ll;0;L;;;;;N;;;A7A0;;A7A0 A7A2;LATIN CAPITAL LETTER K WITH OBLIQUE STROKE;Lu;0;L;;;;;N;;;;A7A3; @@ -13529,6 +13628,9 @@ A7A6;LATIN CAPITAL LETTER R WITH OBLIQUE STROKE;Lu;0;L;;;;;N;;;;A7A7; A7A7;LATIN SMALL LETTER R WITH OBLIQUE STROKE;Ll;0;L;;;;;N;;;A7A6;;A7A6 A7A8;LATIN CAPITAL LETTER S WITH OBLIQUE STROKE;Lu;0;L;;;;;N;;;;A7A9; A7A9;LATIN SMALL LETTER S WITH OBLIQUE STROKE;Ll;0;L;;;;;N;;;A7A8;;A7A8 +A7AA;LATIN CAPITAL LETTER H WITH HOOK;Lu;0;L;;;;;N;;;;0266; +A7F8;MODIFIER LETTER CAPITAL H WITH STROKE;Lm;0;L;<super> 0126;;;;N;;;;; +A7F9;MODIFIER LETTER SMALL LIGATURE OE;Lm;0;L;<super> 0153;;;;N;;;;; A7FA;LATIN LETTER SMALL CAPITAL TURNED M;Ll;0;L;;;;;N;;;;; A7FB;LATIN EPIGRAPHIC LETTER REVERSED F;Lo;0;L;;;;;N;;;;; A7FC;LATIN EPIGRAPHIC LETTER REVERSED P;Lo;0;L;;;;;N;;;;; @@ -14142,6 +14244,29 @@ AADC;TAI VIET SYMBOL NUENG;Lo;0;L;;;;;N;;;;; AADD;TAI VIET SYMBOL SAM;Lm;0;L;;;;;N;;;;; AADE;TAI VIET SYMBOL HO HOI;Po;0;L;;;;;N;;;;; AADF;TAI VIET SYMBOL KOI KOI;Po;0;L;;;;;N;;;;; +AAE0;MEETEI MAYEK LETTER E;Lo;0;L;;;;;N;;;;; +AAE1;MEETEI MAYEK LETTER O;Lo;0;L;;;;;N;;;;; +AAE2;MEETEI MAYEK LETTER CHA;Lo;0;L;;;;;N;;;;; +AAE3;MEETEI MAYEK LETTER NYA;Lo;0;L;;;;;N;;;;; +AAE4;MEETEI MAYEK LETTER TTA;Lo;0;L;;;;;N;;;;; +AAE5;MEETEI MAYEK LETTER TTHA;Lo;0;L;;;;;N;;;;; +AAE6;MEETEI MAYEK LETTER DDA;Lo;0;L;;;;;N;;;;; +AAE7;MEETEI MAYEK LETTER DDHA;Lo;0;L;;;;;N;;;;; +AAE8;MEETEI MAYEK LETTER NNA;Lo;0;L;;;;;N;;;;; +AAE9;MEETEI MAYEK LETTER SHA;Lo;0;L;;;;;N;;;;; +AAEA;MEETEI MAYEK LETTER SSA;Lo;0;L;;;;;N;;;;; +AAEB;MEETEI MAYEK VOWEL SIGN II;Mc;0;L;;;;;N;;;;; +AAEC;MEETEI MAYEK VOWEL SIGN UU;Mn;0;NSM;;;;;N;;;;; +AAED;MEETEI MAYEK VOWEL SIGN AAI;Mn;0;NSM;;;;;N;;;;; +AAEE;MEETEI MAYEK VOWEL SIGN AU;Mc;0;L;;;;;N;;;;; +AAEF;MEETEI MAYEK VOWEL SIGN AAU;Mc;0;L;;;;;N;;;;; +AAF0;MEETEI MAYEK CHEIKHAN;Po;0;L;;;;;N;;;;; +AAF1;MEETEI MAYEK AHANG KHUDAM;Po;0;L;;;;;N;;;;; +AAF2;MEETEI MAYEK ANJI;Lo;0;L;;;;;N;;;;; +AAF3;MEETEI MAYEK SYLLABLE REPETITION MARK;Lm;0;L;;;;;N;;;;; +AAF4;MEETEI MAYEK WORD REPETITION MARK;Lm;0;L;;;;;N;;;;; +AAF5;MEETEI MAYEK VOWEL SIGN VISARGA;Mc;0;L;;;;;N;;;;; +AAF6;MEETEI MAYEK VIRAMA;Mn;9;NSM;;;;;N;;;;; AB01;ETHIOPIC SYLLABLE TTHU;Lo;0;L;;;;;N;;;;; AB02;ETHIOPIC SYLLABLE TTHI;Lo;0;L;;;;;N;;;;; AB03;ETHIOPIC SYLLABLE TTHAA;Lo;0;L;;;;;N;;;;; @@ -14614,6 +14739,8 @@ FA2A;CJK COMPATIBILITY IDEOGRAPH-FA2A;Lo;0;L;98EF;;;;N;;;;; FA2B;CJK COMPATIBILITY IDEOGRAPH-FA2B;Lo;0;L;98FC;;;;N;;;;; FA2C;CJK COMPATIBILITY IDEOGRAPH-FA2C;Lo;0;L;9928;;;;N;;;;; FA2D;CJK COMPATIBILITY IDEOGRAPH-FA2D;Lo;0;L;9DB4;;;;N;;;;; +FA2E;CJK COMPATIBILITY IDEOGRAPH-FA2E;Lo;0;L;90DE;;;;N;;;;; +FA2F;CJK COMPATIBILITY IDEOGRAPH-FA2F;Lo;0;L;96B7;;;;N;;;;; FA30;CJK COMPATIBILITY IDEOGRAPH-FA30;Lo;0;L;4FAE;;;;N;;;;; FA31;CJK COMPATIBILITY IDEOGRAPH-FA31;Lo;0;L;50E7;;;;N;;;;; FA32;CJK COMPATIBILITY IDEOGRAPH-FA32;Lo;0;L;514D;;;;N;;;;; @@ -16126,7 +16253,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 100FA;LINEAR B IDEOGRAM VESSEL B305;Lo;0;L;;;;;N;;;;; 10100;AEGEAN WORD SEPARATOR LINE;Po;0;L;;;;;N;;;;; 10101;AEGEAN WORD SEPARATOR DOT;Po;0;ON;;;;;N;;;;; -10102;AEGEAN CHECK MARK;So;0;L;;;;;N;;;;; +10102;AEGEAN CHECK MARK;Po;0;L;;;;;N;;;;; 10107;AEGEAN NUMBER ONE;No;0;L;;;;1;N;;;;; 10108;AEGEAN NUMBER TWO;No;0;L;;;;2;N;;;;; 10109;AEGEAN NUMBER THREE;No;0;L;;;;3;N;;;;; @@ -16845,6 +16972,64 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 10938;LYDIAN LETTER NN;Lo;0;R;;;;;N;;;;; 10939;LYDIAN LETTER C;Lo;0;R;;;;;N;;;;; 1093F;LYDIAN TRIANGULAR MARK;Po;0;R;;;;;N;;;;; +10980;MEROITIC HIEROGLYPHIC LETTER A;Lo;0;R;;;;;N;;;;; +10981;MEROITIC HIEROGLYPHIC LETTER E;Lo;0;R;;;;;N;;;;; +10982;MEROITIC HIEROGLYPHIC LETTER I;Lo;0;R;;;;;N;;;;; +10983;MEROITIC HIEROGLYPHIC LETTER O;Lo;0;R;;;;;N;;;;; +10984;MEROITIC HIEROGLYPHIC LETTER YA;Lo;0;R;;;;;N;;;;; +10985;MEROITIC HIEROGLYPHIC LETTER WA;Lo;0;R;;;;;N;;;;; +10986;MEROITIC HIEROGLYPHIC LETTER BA;Lo;0;R;;;;;N;;;;; +10987;MEROITIC HIEROGLYPHIC LETTER BA-2;Lo;0;R;;;;;N;;;;; +10988;MEROITIC HIEROGLYPHIC LETTER PA;Lo;0;R;;;;;N;;;;; +10989;MEROITIC HIEROGLYPHIC LETTER MA;Lo;0;R;;;;;N;;;;; +1098A;MEROITIC HIEROGLYPHIC LETTER NA;Lo;0;R;;;;;N;;;;; +1098B;MEROITIC HIEROGLYPHIC LETTER NA-2;Lo;0;R;;;;;N;;;;; +1098C;MEROITIC HIEROGLYPHIC LETTER NE;Lo;0;R;;;;;N;;;;; +1098D;MEROITIC HIEROGLYPHIC LETTER NE-2;Lo;0;R;;;;;N;;;;; +1098E;MEROITIC HIEROGLYPHIC LETTER RA;Lo;0;R;;;;;N;;;;; +1098F;MEROITIC HIEROGLYPHIC LETTER RA-2;Lo;0;R;;;;;N;;;;; +10990;MEROITIC HIEROGLYPHIC LETTER LA;Lo;0;R;;;;;N;;;;; +10991;MEROITIC HIEROGLYPHIC LETTER KHA;Lo;0;R;;;;;N;;;;; +10992;MEROITIC HIEROGLYPHIC LETTER HHA;Lo;0;R;;;;;N;;;;; +10993;MEROITIC HIEROGLYPHIC LETTER SA;Lo;0;R;;;;;N;;;;; +10994;MEROITIC HIEROGLYPHIC LETTER SA-2;Lo;0;R;;;;;N;;;;; +10995;MEROITIC HIEROGLYPHIC LETTER SE;Lo;0;R;;;;;N;;;;; +10996;MEROITIC HIEROGLYPHIC LETTER KA;Lo;0;R;;;;;N;;;;; +10997;MEROITIC HIEROGLYPHIC LETTER QA;Lo;0;R;;;;;N;;;;; +10998;MEROITIC HIEROGLYPHIC LETTER TA;Lo;0;R;;;;;N;;;;; +10999;MEROITIC HIEROGLYPHIC LETTER TA-2;Lo;0;R;;;;;N;;;;; +1099A;MEROITIC HIEROGLYPHIC LETTER TE;Lo;0;R;;;;;N;;;;; +1099B;MEROITIC HIEROGLYPHIC LETTER TE-2;Lo;0;R;;;;;N;;;;; +1099C;MEROITIC HIEROGLYPHIC LETTER TO;Lo;0;R;;;;;N;;;;; +1099D;MEROITIC HIEROGLYPHIC LETTER DA;Lo;0;R;;;;;N;;;;; +1099E;MEROITIC HIEROGLYPHIC SYMBOL VIDJ;Lo;0;R;;;;;N;;;;; +1099F;MEROITIC HIEROGLYPHIC SYMBOL VIDJ-2;Lo;0;R;;;;;N;;;;; +109A0;MEROITIC CURSIVE LETTER A;Lo;0;R;;;;;N;;;;; +109A1;MEROITIC CURSIVE LETTER E;Lo;0;R;;;;;N;;;;; +109A2;MEROITIC CURSIVE LETTER I;Lo;0;R;;;;;N;;;;; +109A3;MEROITIC CURSIVE LETTER O;Lo;0;R;;;;;N;;;;; +109A4;MEROITIC CURSIVE LETTER YA;Lo;0;R;;;;;N;;;;; +109A5;MEROITIC CURSIVE LETTER WA;Lo;0;R;;;;;N;;;;; +109A6;MEROITIC CURSIVE LETTER BA;Lo;0;R;;;;;N;;;;; +109A7;MEROITIC CURSIVE LETTER PA;Lo;0;R;;;;;N;;;;; +109A8;MEROITIC CURSIVE LETTER MA;Lo;0;R;;;;;N;;;;; +109A9;MEROITIC CURSIVE LETTER NA;Lo;0;R;;;;;N;;;;; +109AA;MEROITIC CURSIVE LETTER NE;Lo;0;R;;;;;N;;;;; +109AB;MEROITIC CURSIVE LETTER RA;Lo;0;R;;;;;N;;;;; +109AC;MEROITIC CURSIVE LETTER LA;Lo;0;R;;;;;N;;;;; +109AD;MEROITIC CURSIVE LETTER KHA;Lo;0;R;;;;;N;;;;; +109AE;MEROITIC CURSIVE LETTER HHA;Lo;0;R;;;;;N;;;;; +109AF;MEROITIC CURSIVE LETTER SA;Lo;0;R;;;;;N;;;;; +109B0;MEROITIC CURSIVE LETTER ARCHAIC SA;Lo;0;R;;;;;N;;;;; +109B1;MEROITIC CURSIVE LETTER SE;Lo;0;R;;;;;N;;;;; +109B2;MEROITIC CURSIVE LETTER KA;Lo;0;R;;;;;N;;;;; +109B3;MEROITIC CURSIVE LETTER QA;Lo;0;R;;;;;N;;;;; +109B4;MEROITIC CURSIVE LETTER TA;Lo;0;R;;;;;N;;;;; +109B5;MEROITIC CURSIVE LETTER TE;Lo;0;R;;;;;N;;;;; +109B6;MEROITIC CURSIVE LETTER TO;Lo;0;R;;;;;N;;;;; +109B7;MEROITIC CURSIVE LETTER DA;Lo;0;R;;;;;N;;;;; +109BE;MEROITIC CURSIVE LOGOGRAM RMT;Lo;0;R;;;;;N;;;;; +109BF;MEROITIC CURSIVE LOGOGRAM IMN;Lo;0;R;;;;;N;;;;; 10A00;KHAROSHTHI LETTER A;Lo;0;R;;;;;N;;;;; 10A01;KHAROSHTHI VOWEL SIGN I;Mn;0;NSM;;;;;N;;;;; 10A02;KHAROSHTHI VOWEL SIGN U;Mn;0;NSM;;;;;N;;;;; @@ -17338,6 +17523,257 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 110BF;KAITHI DOUBLE SECTION MARK;Po;0;L;;;;;N;;;;; 110C0;KAITHI DANDA;Po;0;L;;;;;N;;;;; 110C1;KAITHI DOUBLE DANDA;Po;0;L;;;;;N;;;;; +110D0;SORA SOMPENG LETTER SAH;Lo;0;L;;;;;N;;;;; +110D1;SORA SOMPENG LETTER TAH;Lo;0;L;;;;;N;;;;; +110D2;SORA SOMPENG LETTER BAH;Lo;0;L;;;;;N;;;;; +110D3;SORA SOMPENG LETTER CAH;Lo;0;L;;;;;N;;;;; +110D4;SORA SOMPENG LETTER DAH;Lo;0;L;;;;;N;;;;; +110D5;SORA SOMPENG LETTER GAH;Lo;0;L;;;;;N;;;;; +110D6;SORA SOMPENG LETTER MAH;Lo;0;L;;;;;N;;;;; +110D7;SORA SOMPENG LETTER NGAH;Lo;0;L;;;;;N;;;;; +110D8;SORA SOMPENG LETTER LAH;Lo;0;L;;;;;N;;;;; +110D9;SORA SOMPENG LETTER NAH;Lo;0;L;;;;;N;;;;; +110DA;SORA SOMPENG LETTER VAH;Lo;0;L;;;;;N;;;;; +110DB;SORA SOMPENG LETTER PAH;Lo;0;L;;;;;N;;;;; +110DC;SORA SOMPENG LETTER YAH;Lo;0;L;;;;;N;;;;; +110DD;SORA SOMPENG LETTER RAH;Lo;0;L;;;;;N;;;;; +110DE;SORA SOMPENG LETTER HAH;Lo;0;L;;;;;N;;;;; +110DF;SORA SOMPENG LETTER KAH;Lo;0;L;;;;;N;;;;; +110E0;SORA SOMPENG LETTER JAH;Lo;0;L;;;;;N;;;;; +110E1;SORA SOMPENG LETTER NYAH;Lo;0;L;;;;;N;;;;; +110E2;SORA SOMPENG LETTER AH;Lo;0;L;;;;;N;;;;; +110E3;SORA SOMPENG LETTER EEH;Lo;0;L;;;;;N;;;;; +110E4;SORA SOMPENG LETTER IH;Lo;0;L;;;;;N;;;;; +110E5;SORA SOMPENG LETTER UH;Lo;0;L;;;;;N;;;;; +110E6;SORA SOMPENG LETTER OH;Lo;0;L;;;;;N;;;;; +110E7;SORA SOMPENG LETTER EH;Lo;0;L;;;;;N;;;;; +110E8;SORA SOMPENG LETTER MAE;Lo;0;L;;;;;N;;;;; +110F0;SORA SOMPENG DIGIT ZERO;Nd;0;L;;0;0;0;N;;;;; +110F1;SORA SOMPENG DIGIT ONE;Nd;0;L;;1;1;1;N;;;;; +110F2;SORA SOMPENG DIGIT TWO;Nd;0;L;;2;2;2;N;;;;; +110F3;SORA SOMPENG DIGIT THREE;Nd;0;L;;3;3;3;N;;;;; +110F4;SORA SOMPENG DIGIT FOUR;Nd;0;L;;4;4;4;N;;;;; +110F5;SORA SOMPENG DIGIT FIVE;Nd;0;L;;5;5;5;N;;;;; +110F6;SORA SOMPENG DIGIT SIX;Nd;0;L;;6;6;6;N;;;;; +110F7;SORA SOMPENG DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;; +110F8;SORA SOMPENG DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;; +110F9;SORA SOMPENG DIGIT NINE;Nd;0;L;;9;9;9;N;;;;; +11100;CHAKMA SIGN CANDRABINDU;Mn;230;NSM;;;;;N;;;;; +11101;CHAKMA SIGN ANUSVARA;Mn;230;NSM;;;;;N;;;;; +11102;CHAKMA SIGN VISARGA;Mn;230;NSM;;;;;N;;;;; +11103;CHAKMA LETTER AA;Lo;0;L;;;;;N;;;;; +11104;CHAKMA LETTER I;Lo;0;L;;;;;N;;;;; +11105;CHAKMA LETTER U;Lo;0;L;;;;;N;;;;; +11106;CHAKMA LETTER E;Lo;0;L;;;;;N;;;;; +11107;CHAKMA LETTER KAA;Lo;0;L;;;;;N;;;;; +11108;CHAKMA LETTER KHAA;Lo;0;L;;;;;N;;;;; +11109;CHAKMA LETTER GAA;Lo;0;L;;;;;N;;;;; +1110A;CHAKMA LETTER GHAA;Lo;0;L;;;;;N;;;;; +1110B;CHAKMA LETTER NGAA;Lo;0;L;;;;;N;;;;; +1110C;CHAKMA LETTER CAA;Lo;0;L;;;;;N;;;;; +1110D;CHAKMA LETTER CHAA;Lo;0;L;;;;;N;;;;; +1110E;CHAKMA LETTER JAA;Lo;0;L;;;;;N;;;;; +1110F;CHAKMA LETTER JHAA;Lo;0;L;;;;;N;;;;; +11110;CHAKMA LETTER NYAA;Lo;0;L;;;;;N;;;;; +11111;CHAKMA LETTER TTAA;Lo;0;L;;;;;N;;;;; +11112;CHAKMA LETTER TTHAA;Lo;0;L;;;;;N;;;;; +11113;CHAKMA LETTER DDAA;Lo;0;L;;;;;N;;;;; +11114;CHAKMA LETTER DDHAA;Lo;0;L;;;;;N;;;;; +11115;CHAKMA LETTER NNAA;Lo;0;L;;;;;N;;;;; +11116;CHAKMA LETTER TAA;Lo;0;L;;;;;N;;;;; +11117;CHAKMA LETTER THAA;Lo;0;L;;;;;N;;;;; +11118;CHAKMA LETTER DAA;Lo;0;L;;;;;N;;;;; +11119;CHAKMA LETTER DHAA;Lo;0;L;;;;;N;;;;; +1111A;CHAKMA LETTER NAA;Lo;0;L;;;;;N;;;;; +1111B;CHAKMA LETTER PAA;Lo;0;L;;;;;N;;;;; +1111C;CHAKMA LETTER PHAA;Lo;0;L;;;;;N;;;;; +1111D;CHAKMA LETTER BAA;Lo;0;L;;;;;N;;;;; +1111E;CHAKMA LETTER BHAA;Lo;0;L;;;;;N;;;;; +1111F;CHAKMA LETTER MAA;Lo;0;L;;;;;N;;;;; +11120;CHAKMA LETTER YYAA;Lo;0;L;;;;;N;;;;; +11121;CHAKMA LETTER YAA;Lo;0;L;;;;;N;;;;; +11122;CHAKMA LETTER RAA;Lo;0;L;;;;;N;;;;; +11123;CHAKMA LETTER LAA;Lo;0;L;;;;;N;;;;; +11124;CHAKMA LETTER WAA;Lo;0;L;;;;;N;;;;; +11125;CHAKMA LETTER SAA;Lo;0;L;;;;;N;;;;; +11126;CHAKMA LETTER HAA;Lo;0;L;;;;;N;;;;; +11127;CHAKMA VOWEL SIGN A;Mn;0;NSM;;;;;N;;;;; +11128;CHAKMA VOWEL SIGN I;Mn;0;NSM;;;;;N;;;;; +11129;CHAKMA VOWEL SIGN II;Mn;0;NSM;;;;;N;;;;; +1112A;CHAKMA VOWEL SIGN U;Mn;0;NSM;;;;;N;;;;; +1112B;CHAKMA VOWEL SIGN UU;Mn;0;NSM;;;;;N;;;;; +1112C;CHAKMA VOWEL SIGN E;Mc;0;L;;;;;N;;;;; +1112D;CHAKMA VOWEL SIGN AI;Mn;0;NSM;;;;;N;;;;; +1112E;CHAKMA VOWEL SIGN O;Mn;0;NSM;11131 11127;;;;N;;;;; +1112F;CHAKMA VOWEL SIGN AU;Mn;0;NSM;11132 11127;;;;N;;;;; +11130;CHAKMA VOWEL SIGN OI;Mn;0;NSM;;;;;N;;;;; +11131;CHAKMA O MARK;Mn;0;NSM;;;;;N;;;;; +11132;CHAKMA AU MARK;Mn;0;NSM;;;;;N;;;;; +11133;CHAKMA VIRAMA;Mn;9;NSM;;;;;N;;;;; +11134;CHAKMA MAAYYAA;Mn;9;NSM;;;;;N;;;;; +11136;CHAKMA DIGIT ZERO;Nd;0;L;;0;0;0;N;;;;; +11137;CHAKMA DIGIT ONE;Nd;0;L;;1;1;1;N;;;;; +11138;CHAKMA DIGIT TWO;Nd;0;L;;2;2;2;N;;;;; +11139;CHAKMA DIGIT THREE;Nd;0;L;;3;3;3;N;;;;; +1113A;CHAKMA DIGIT FOUR;Nd;0;L;;4;4;4;N;;;;; +1113B;CHAKMA DIGIT FIVE;Nd;0;L;;5;5;5;N;;;;; +1113C;CHAKMA DIGIT SIX;Nd;0;L;;6;6;6;N;;;;; +1113D;CHAKMA DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;; +1113E;CHAKMA DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;; +1113F;CHAKMA DIGIT NINE;Nd;0;L;;9;9;9;N;;;;; +11140;CHAKMA SECTION MARK;Po;0;L;;;;;N;;;;; +11141;CHAKMA DANDA;Po;0;L;;;;;N;;;;; +11142;CHAKMA DOUBLE DANDA;Po;0;L;;;;;N;;;;; +11143;CHAKMA QUESTION MARK;Po;0;L;;;;;N;;;;; +11180;SHARADA SIGN CANDRABINDU;Mn;0;NSM;;;;;N;;;;; +11181;SHARADA SIGN ANUSVARA;Mn;0;NSM;;;;;N;;;;; +11182;SHARADA SIGN VISARGA;Mc;0;L;;;;;N;;;;; +11183;SHARADA LETTER A;Lo;0;L;;;;;N;;;;; +11184;SHARADA LETTER AA;Lo;0;L;;;;;N;;;;; +11185;SHARADA LETTER I;Lo;0;L;;;;;N;;;;; +11186;SHARADA LETTER II;Lo;0;L;;;;;N;;;;; +11187;SHARADA LETTER U;Lo;0;L;;;;;N;;;;; +11188;SHARADA LETTER UU;Lo;0;L;;;;;N;;;;; +11189;SHARADA LETTER VOCALIC R;Lo;0;L;;;;;N;;;;; +1118A;SHARADA LETTER VOCALIC RR;Lo;0;L;;;;;N;;;;; +1118B;SHARADA LETTER VOCALIC L;Lo;0;L;;;;;N;;;;; +1118C;SHARADA LETTER VOCALIC LL;Lo;0;L;;;;;N;;;;; +1118D;SHARADA LETTER E;Lo;0;L;;;;;N;;;;; +1118E;SHARADA LETTER AI;Lo;0;L;;;;;N;;;;; +1118F;SHARADA LETTER O;Lo;0;L;;;;;N;;;;; +11190;SHARADA LETTER AU;Lo;0;L;;;;;N;;;;; +11191;SHARADA LETTER KA;Lo;0;L;;;;;N;;;;; +11192;SHARADA LETTER KHA;Lo;0;L;;;;;N;;;;; +11193;SHARADA LETTER GA;Lo;0;L;;;;;N;;;;; +11194;SHARADA LETTER GHA;Lo;0;L;;;;;N;;;;; +11195;SHARADA LETTER NGA;Lo;0;L;;;;;N;;;;; +11196;SHARADA LETTER CA;Lo;0;L;;;;;N;;;;; +11197;SHARADA LETTER CHA;Lo;0;L;;;;;N;;;;; +11198;SHARADA LETTER JA;Lo;0;L;;;;;N;;;;; +11199;SHARADA LETTER JHA;Lo;0;L;;;;;N;;;;; +1119A;SHARADA LETTER NYA;Lo;0;L;;;;;N;;;;; +1119B;SHARADA LETTER TTA;Lo;0;L;;;;;N;;;;; +1119C;SHARADA LETTER TTHA;Lo;0;L;;;;;N;;;;; +1119D;SHARADA LETTER DDA;Lo;0;L;;;;;N;;;;; +1119E;SHARADA LETTER DDHA;Lo;0;L;;;;;N;;;;; +1119F;SHARADA LETTER NNA;Lo;0;L;;;;;N;;;;; +111A0;SHARADA LETTER TA;Lo;0;L;;;;;N;;;;; +111A1;SHARADA LETTER THA;Lo;0;L;;;;;N;;;;; +111A2;SHARADA LETTER DA;Lo;0;L;;;;;N;;;;; +111A3;SHARADA LETTER DHA;Lo;0;L;;;;;N;;;;; +111A4;SHARADA LETTER NA;Lo;0;L;;;;;N;;;;; +111A5;SHARADA LETTER PA;Lo;0;L;;;;;N;;;;; +111A6;SHARADA LETTER PHA;Lo;0;L;;;;;N;;;;; +111A7;SHARADA LETTER BA;Lo;0;L;;;;;N;;;;; +111A8;SHARADA LETTER BHA;Lo;0;L;;;;;N;;;;; +111A9;SHARADA LETTER MA;Lo;0;L;;;;;N;;;;; +111AA;SHARADA LETTER YA;Lo;0;L;;;;;N;;;;; +111AB;SHARADA LETTER RA;Lo;0;L;;;;;N;;;;; +111AC;SHARADA LETTER LA;Lo;0;L;;;;;N;;;;; +111AD;SHARADA LETTER LLA;Lo;0;L;;;;;N;;;;; +111AE;SHARADA LETTER VA;Lo;0;L;;;;;N;;;;; +111AF;SHARADA LETTER SHA;Lo;0;L;;;;;N;;;;; +111B0;SHARADA LETTER SSA;Lo;0;L;;;;;N;;;;; +111B1;SHARADA LETTER SA;Lo;0;L;;;;;N;;;;; +111B2;SHARADA LETTER HA;Lo;0;L;;;;;N;;;;; +111B3;SHARADA VOWEL SIGN AA;Mc;0;L;;;;;N;;;;; +111B4;SHARADA VOWEL SIGN I;Mc;0;L;;;;;N;;;;; +111B5;SHARADA VOWEL SIGN II;Mc;0;L;;;;;N;;;;; +111B6;SHARADA VOWEL SIGN U;Mn;0;NSM;;;;;N;;;;; +111B7;SHARADA VOWEL SIGN UU;Mn;0;NSM;;;;;N;;;;; +111B8;SHARADA VOWEL SIGN VOCALIC R;Mn;0;NSM;;;;;N;;;;; +111B9;SHARADA VOWEL SIGN VOCALIC RR;Mn;0;NSM;;;;;N;;;;; +111BA;SHARADA VOWEL SIGN VOCALIC L;Mn;0;NSM;;;;;N;;;;; +111BB;SHARADA VOWEL SIGN VOCALIC LL;Mn;0;NSM;;;;;N;;;;; +111BC;SHARADA VOWEL SIGN E;Mn;0;NSM;;;;;N;;;;; +111BD;SHARADA VOWEL SIGN AI;Mn;0;NSM;;;;;N;;;;; +111BE;SHARADA VOWEL SIGN O;Mn;0;NSM;;;;;N;;;;; +111BF;SHARADA VOWEL SIGN AU;Mc;0;L;;;;;N;;;;; +111C0;SHARADA SIGN VIRAMA;Mc;9;L;;;;;N;;;;; +111C1;SHARADA SIGN AVAGRAHA;Lo;0;L;;;;;N;;;;; +111C2;SHARADA SIGN JIHVAMULIYA;Lo;0;L;;;;;N;;;;; +111C3;SHARADA SIGN UPADHMANIYA;Lo;0;L;;;;;N;;;;; +111C4;SHARADA OM;Lo;0;L;;;;;N;;;;; +111C5;SHARADA DANDA;Po;0;L;;;;;N;;;;; +111C6;SHARADA DOUBLE DANDA;Po;0;L;;;;;N;;;;; +111C7;SHARADA ABBREVIATION SIGN;Po;0;L;;;;;N;;;;; +111C8;SHARADA SEPARATOR;Po;0;L;;;;;N;;;;; +111D0;SHARADA DIGIT ZERO;Nd;0;L;;0;0;0;N;;;;; +111D1;SHARADA DIGIT ONE;Nd;0;L;;1;1;1;N;;;;; +111D2;SHARADA DIGIT TWO;Nd;0;L;;2;2;2;N;;;;; +111D3;SHARADA DIGIT THREE;Nd;0;L;;3;3;3;N;;;;; +111D4;SHARADA DIGIT FOUR;Nd;0;L;;4;4;4;N;;;;; +111D5;SHARADA DIGIT FIVE;Nd;0;L;;5;5;5;N;;;;; +111D6;SHARADA DIGIT SIX;Nd;0;L;;6;6;6;N;;;;; +111D7;SHARADA DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;; +111D8;SHARADA DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;; +111D9;SHARADA DIGIT NINE;Nd;0;L;;9;9;9;N;;;;; +11680;TAKRI LETTER A;Lo;0;L;;;;;N;;;;; +11681;TAKRI LETTER AA;Lo;0;L;;;;;N;;;;; +11682;TAKRI LETTER I;Lo;0;L;;;;;N;;;;; +11683;TAKRI LETTER II;Lo;0;L;;;;;N;;;;; +11684;TAKRI LETTER U;Lo;0;L;;;;;N;;;;; +11685;TAKRI LETTER UU;Lo;0;L;;;;;N;;;;; +11686;TAKRI LETTER E;Lo;0;L;;;;;N;;;;; +11687;TAKRI LETTER AI;Lo;0;L;;;;;N;;;;; +11688;TAKRI LETTER O;Lo;0;L;;;;;N;;;;; +11689;TAKRI LETTER AU;Lo;0;L;;;;;N;;;;; +1168A;TAKRI LETTER KA;Lo;0;L;;;;;N;;;;; +1168B;TAKRI LETTER KHA;Lo;0;L;;;;;N;;;;; +1168C;TAKRI LETTER GA;Lo;0;L;;;;;N;;;;; +1168D;TAKRI LETTER GHA;Lo;0;L;;;;;N;;;;; +1168E;TAKRI LETTER NGA;Lo;0;L;;;;;N;;;;; +1168F;TAKRI LETTER CA;Lo;0;L;;;;;N;;;;; +11690;TAKRI LETTER CHA;Lo;0;L;;;;;N;;;;; +11691;TAKRI LETTER JA;Lo;0;L;;;;;N;;;;; +11692;TAKRI LETTER JHA;Lo;0;L;;;;;N;;;;; +11693;TAKRI LETTER NYA;Lo;0;L;;;;;N;;;;; +11694;TAKRI LETTER TTA;Lo;0;L;;;;;N;;;;; +11695;TAKRI LETTER TTHA;Lo;0;L;;;;;N;;;;; +11696;TAKRI LETTER DDA;Lo;0;L;;;;;N;;;;; +11697;TAKRI LETTER DDHA;Lo;0;L;;;;;N;;;;; +11698;TAKRI LETTER NNA;Lo;0;L;;;;;N;;;;; +11699;TAKRI LETTER TA;Lo;0;L;;;;;N;;;;; +1169A;TAKRI LETTER THA;Lo;0;L;;;;;N;;;;; +1169B;TAKRI LETTER DA;Lo;0;L;;;;;N;;;;; +1169C;TAKRI LETTER DHA;Lo;0;L;;;;;N;;;;; +1169D;TAKRI LETTER NA;Lo;0;L;;;;;N;;;;; +1169E;TAKRI LETTER PA;Lo;0;L;;;;;N;;;;; +1169F;TAKRI LETTER PHA;Lo;0;L;;;;;N;;;;; +116A0;TAKRI LETTER BA;Lo;0;L;;;;;N;;;;; +116A1;TAKRI LETTER BHA;Lo;0;L;;;;;N;;;;; +116A2;TAKRI LETTER MA;Lo;0;L;;;;;N;;;;; +116A3;TAKRI LETTER YA;Lo;0;L;;;;;N;;;;; +116A4;TAKRI LETTER RA;Lo;0;L;;;;;N;;;;; +116A5;TAKRI LETTER LA;Lo;0;L;;;;;N;;;;; +116A6;TAKRI LETTER VA;Lo;0;L;;;;;N;;;;; +116A7;TAKRI LETTER SHA;Lo;0;L;;;;;N;;;;; +116A8;TAKRI LETTER SA;Lo;0;L;;;;;N;;;;; +116A9;TAKRI LETTER HA;Lo;0;L;;;;;N;;;;; +116AA;TAKRI LETTER RRA;Lo;0;L;;;;;N;;;;; +116AB;TAKRI SIGN ANUSVARA;Mn;0;NSM;;;;;N;;;;; +116AC;TAKRI SIGN VISARGA;Mc;0;L;;;;;N;;;;; +116AD;TAKRI VOWEL SIGN AA;Mn;0;NSM;;;;;N;;;;; +116AE;TAKRI VOWEL SIGN I;Mc;0;L;;;;;N;;;;; +116AF;TAKRI VOWEL SIGN II;Mc;0;L;;;;;N;;;;; +116B0;TAKRI VOWEL SIGN U;Mn;0;NSM;;;;;N;;;;; +116B1;TAKRI VOWEL SIGN UU;Mn;0;NSM;;;;;N;;;;; +116B2;TAKRI VOWEL SIGN E;Mn;0;NSM;;;;;N;;;;; +116B3;TAKRI VOWEL SIGN AI;Mn;0;NSM;;;;;N;;;;; +116B4;TAKRI VOWEL SIGN O;Mn;0;NSM;;;;;N;;;;; +116B5;TAKRI VOWEL SIGN AU;Mn;0;NSM;;;;;N;;;;; +116B6;TAKRI SIGN VIRAMA;Mc;9;L;;;;;N;;;;; +116B7;TAKRI SIGN NUKTA;Mn;7;NSM;;;;;N;;;;; +116C0;TAKRI DIGIT ZERO;Nd;0;L;;0;0;0;N;;;;; +116C1;TAKRI DIGIT ONE;Nd;0;L;;1;1;1;N;;;;; +116C2;TAKRI DIGIT TWO;Nd;0;L;;2;2;2;N;;;;; +116C3;TAKRI DIGIT THREE;Nd;0;L;;3;3;3;N;;;;; +116C4;TAKRI DIGIT FOUR;Nd;0;L;;4;4;4;N;;;;; +116C5;TAKRI DIGIT FIVE;Nd;0;L;;5;5;5;N;;;;; +116C6;TAKRI DIGIT SIX;Nd;0;L;;6;6;6;N;;;;; +116C7;TAKRI DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;; +116C8;TAKRI DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;; +116C9;TAKRI DIGIT NINE;Nd;0;L;;9;9;9;N;;;;; 12000;CUNEIFORM SIGN A;Lo;0;L;;;;;N;;;;; 12001;CUNEIFORM SIGN A TIMES A;Lo;0;L;;;;;N;;;;; 12002;CUNEIFORM SIGN A TIMES BAD;Lo;0;L;;;;;N;;;;; @@ -19960,6 +20396,139 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 16A36;BAMUM LETTER PHASE-F KPA;Lo;0;L;;;;;N;;;;; 16A37;BAMUM LETTER PHASE-F SAMBA;Lo;0;L;;;;;N;;;;; 16A38;BAMUM LETTER PHASE-F VUEQ;Lo;0;L;;;;;N;;;;; +16F00;MIAO LETTER PA;Lo;0;L;;;;;N;;;;; +16F01;MIAO LETTER BA;Lo;0;L;;;;;N;;;;; +16F02;MIAO LETTER YI PA;Lo;0;L;;;;;N;;;;; +16F03;MIAO LETTER PLA;Lo;0;L;;;;;N;;;;; +16F04;MIAO LETTER MA;Lo;0;L;;;;;N;;;;; +16F05;MIAO LETTER MHA;Lo;0;L;;;;;N;;;;; +16F06;MIAO LETTER ARCHAIC MA;Lo;0;L;;;;;N;;;;; +16F07;MIAO LETTER FA;Lo;0;L;;;;;N;;;;; +16F08;MIAO LETTER VA;Lo;0;L;;;;;N;;;;; +16F09;MIAO LETTER VFA;Lo;0;L;;;;;N;;;;; +16F0A;MIAO LETTER TA;Lo;0;L;;;;;N;;;;; +16F0B;MIAO LETTER DA;Lo;0;L;;;;;N;;;;; +16F0C;MIAO LETTER YI TTA;Lo;0;L;;;;;N;;;;; +16F0D;MIAO LETTER YI TA;Lo;0;L;;;;;N;;;;; +16F0E;MIAO LETTER TTA;Lo;0;L;;;;;N;;;;; +16F0F;MIAO LETTER DDA;Lo;0;L;;;;;N;;;;; +16F10;MIAO LETTER NA;Lo;0;L;;;;;N;;;;; +16F11;MIAO LETTER NHA;Lo;0;L;;;;;N;;;;; +16F12;MIAO LETTER YI NNA;Lo;0;L;;;;;N;;;;; +16F13;MIAO LETTER ARCHAIC NA;Lo;0;L;;;;;N;;;;; +16F14;MIAO LETTER NNA;Lo;0;L;;;;;N;;;;; +16F15;MIAO LETTER NNHA;Lo;0;L;;;;;N;;;;; +16F16;MIAO LETTER LA;Lo;0;L;;;;;N;;;;; +16F17;MIAO LETTER LYA;Lo;0;L;;;;;N;;;;; +16F18;MIAO LETTER LHA;Lo;0;L;;;;;N;;;;; +16F19;MIAO LETTER LHYA;Lo;0;L;;;;;N;;;;; +16F1A;MIAO LETTER TLHA;Lo;0;L;;;;;N;;;;; +16F1B;MIAO LETTER DLHA;Lo;0;L;;;;;N;;;;; +16F1C;MIAO LETTER TLHYA;Lo;0;L;;;;;N;;;;; +16F1D;MIAO LETTER DLHYA;Lo;0;L;;;;;N;;;;; +16F1E;MIAO LETTER KA;Lo;0;L;;;;;N;;;;; +16F1F;MIAO LETTER GA;Lo;0;L;;;;;N;;;;; +16F20;MIAO LETTER YI KA;Lo;0;L;;;;;N;;;;; +16F21;MIAO LETTER QA;Lo;0;L;;;;;N;;;;; +16F22;MIAO LETTER QGA;Lo;0;L;;;;;N;;;;; +16F23;MIAO LETTER NGA;Lo;0;L;;;;;N;;;;; +16F24;MIAO LETTER NGHA;Lo;0;L;;;;;N;;;;; +16F25;MIAO LETTER ARCHAIC NGA;Lo;0;L;;;;;N;;;;; +16F26;MIAO LETTER HA;Lo;0;L;;;;;N;;;;; +16F27;MIAO LETTER XA;Lo;0;L;;;;;N;;;;; +16F28;MIAO LETTER GHA;Lo;0;L;;;;;N;;;;; +16F29;MIAO LETTER GHHA;Lo;0;L;;;;;N;;;;; +16F2A;MIAO LETTER TSSA;Lo;0;L;;;;;N;;;;; +16F2B;MIAO LETTER DZZA;Lo;0;L;;;;;N;;;;; +16F2C;MIAO LETTER NYA;Lo;0;L;;;;;N;;;;; +16F2D;MIAO LETTER NYHA;Lo;0;L;;;;;N;;;;; +16F2E;MIAO LETTER TSHA;Lo;0;L;;;;;N;;;;; +16F2F;MIAO LETTER DZHA;Lo;0;L;;;;;N;;;;; +16F30;MIAO LETTER YI TSHA;Lo;0;L;;;;;N;;;;; +16F31;MIAO LETTER YI DZHA;Lo;0;L;;;;;N;;;;; +16F32;MIAO LETTER REFORMED TSHA;Lo;0;L;;;;;N;;;;; +16F33;MIAO LETTER SHA;Lo;0;L;;;;;N;;;;; +16F34;MIAO LETTER SSA;Lo;0;L;;;;;N;;;;; +16F35;MIAO LETTER ZHA;Lo;0;L;;;;;N;;;;; +16F36;MIAO LETTER ZSHA;Lo;0;L;;;;;N;;;;; +16F37;MIAO LETTER TSA;Lo;0;L;;;;;N;;;;; +16F38;MIAO LETTER DZA;Lo;0;L;;;;;N;;;;; +16F39;MIAO LETTER YI TSA;Lo;0;L;;;;;N;;;;; +16F3A;MIAO LETTER SA;Lo;0;L;;;;;N;;;;; +16F3B;MIAO LETTER ZA;Lo;0;L;;;;;N;;;;; +16F3C;MIAO LETTER ZSA;Lo;0;L;;;;;N;;;;; +16F3D;MIAO LETTER ZZA;Lo;0;L;;;;;N;;;;; +16F3E;MIAO LETTER ZZSA;Lo;0;L;;;;;N;;;;; +16F3F;MIAO LETTER ARCHAIC ZZA;Lo;0;L;;;;;N;;;;; +16F40;MIAO LETTER ZZYA;Lo;0;L;;;;;N;;;;; +16F41;MIAO LETTER ZZSYA;Lo;0;L;;;;;N;;;;; +16F42;MIAO LETTER WA;Lo;0;L;;;;;N;;;;; +16F43;MIAO LETTER AH;Lo;0;L;;;;;N;;;;; +16F44;MIAO LETTER HHA;Lo;0;L;;;;;N;;;;; +16F50;MIAO LETTER NASALIZATION;Lo;0;L;;;;;N;;;;; +16F51;MIAO SIGN ASPIRATION;Mc;0;L;;;;;N;;;;; +16F52;MIAO SIGN REFORMED VOICING;Mc;0;L;;;;;N;;;;; +16F53;MIAO SIGN REFORMED ASPIRATION;Mc;0;L;;;;;N;;;;; +16F54;MIAO VOWEL SIGN A;Mc;0;L;;;;;N;;;;; +16F55;MIAO VOWEL SIGN AA;Mc;0;L;;;;;N;;;;; +16F56;MIAO VOWEL SIGN AHH;Mc;0;L;;;;;N;;;;; +16F57;MIAO VOWEL SIGN AN;Mc;0;L;;;;;N;;;;; +16F58;MIAO VOWEL SIGN ANG;Mc;0;L;;;;;N;;;;; +16F59;MIAO VOWEL SIGN O;Mc;0;L;;;;;N;;;;; +16F5A;MIAO VOWEL SIGN OO;Mc;0;L;;;;;N;;;;; +16F5B;MIAO VOWEL SIGN WO;Mc;0;L;;;;;N;;;;; +16F5C;MIAO VOWEL SIGN W;Mc;0;L;;;;;N;;;;; +16F5D;MIAO VOWEL SIGN E;Mc;0;L;;;;;N;;;;; +16F5E;MIAO VOWEL SIGN EN;Mc;0;L;;;;;N;;;;; +16F5F;MIAO VOWEL SIGN ENG;Mc;0;L;;;;;N;;;;; +16F60;MIAO VOWEL SIGN OEY;Mc;0;L;;;;;N;;;;; +16F61;MIAO VOWEL SIGN I;Mc;0;L;;;;;N;;;;; +16F62;MIAO VOWEL SIGN IA;Mc;0;L;;;;;N;;;;; +16F63;MIAO VOWEL SIGN IAN;Mc;0;L;;;;;N;;;;; +16F64;MIAO VOWEL SIGN IANG;Mc;0;L;;;;;N;;;;; +16F65;MIAO VOWEL SIGN IO;Mc;0;L;;;;;N;;;;; +16F66;MIAO VOWEL SIGN IE;Mc;0;L;;;;;N;;;;; +16F67;MIAO VOWEL SIGN II;Mc;0;L;;;;;N;;;;; +16F68;MIAO VOWEL SIGN IU;Mc;0;L;;;;;N;;;;; +16F69;MIAO VOWEL SIGN ING;Mc;0;L;;;;;N;;;;; +16F6A;MIAO VOWEL SIGN U;Mc;0;L;;;;;N;;;;; +16F6B;MIAO VOWEL SIGN UA;Mc;0;L;;;;;N;;;;; +16F6C;MIAO VOWEL SIGN UAN;Mc;0;L;;;;;N;;;;; +16F6D;MIAO VOWEL SIGN UANG;Mc;0;L;;;;;N;;;;; +16F6E;MIAO VOWEL SIGN UU;Mc;0;L;;;;;N;;;;; +16F6F;MIAO VOWEL SIGN UEI;Mc;0;L;;;;;N;;;;; +16F70;MIAO VOWEL SIGN UNG;Mc;0;L;;;;;N;;;;; +16F71;MIAO VOWEL SIGN Y;Mc;0;L;;;;;N;;;;; +16F72;MIAO VOWEL SIGN YI;Mc;0;L;;;;;N;;;;; +16F73;MIAO VOWEL SIGN AE;Mc;0;L;;;;;N;;;;; +16F74;MIAO VOWEL SIGN AEE;Mc;0;L;;;;;N;;;;; +16F75;MIAO VOWEL SIGN ERR;Mc;0;L;;;;;N;;;;; +16F76;MIAO VOWEL SIGN ROUNDED ERR;Mc;0;L;;;;;N;;;;; +16F77;MIAO VOWEL SIGN ER;Mc;0;L;;;;;N;;;;; +16F78;MIAO VOWEL SIGN ROUNDED ER;Mc;0;L;;;;;N;;;;; +16F79;MIAO VOWEL SIGN AI;Mc;0;L;;;;;N;;;;; +16F7A;MIAO VOWEL SIGN EI;Mc;0;L;;;;;N;;;;; +16F7B;MIAO VOWEL SIGN AU;Mc;0;L;;;;;N;;;;; +16F7C;MIAO VOWEL SIGN OU;Mc;0;L;;;;;N;;;;; +16F7D;MIAO VOWEL SIGN N;Mc;0;L;;;;;N;;;;; +16F7E;MIAO VOWEL SIGN NG;Mc;0;L;;;;;N;;;;; +16F8F;MIAO TONE RIGHT;Mn;0;NSM;;;;;N;;;;; +16F90;MIAO TONE TOP RIGHT;Mn;0;NSM;;;;;N;;;;; +16F91;MIAO TONE ABOVE;Mn;0;NSM;;;;;N;;;;; +16F92;MIAO TONE BELOW;Mn;0;NSM;;;;;N;;;;; +16F93;MIAO LETTER TONE-2;Lm;0;L;;;;;N;;;;; +16F94;MIAO LETTER TONE-3;Lm;0;L;;;;;N;;;;; +16F95;MIAO LETTER TONE-4;Lm;0;L;;;;;N;;;;; +16F96;MIAO LETTER TONE-5;Lm;0;L;;;;;N;;;;; +16F97;MIAO LETTER TONE-6;Lm;0;L;;;;;N;;;;; +16F98;MIAO LETTER TONE-7;Lm;0;L;;;;;N;;;;; +16F99;MIAO LETTER TONE-8;Lm;0;L;;;;;N;;;;; +16F9A;MIAO LETTER REFORMED TONE-1;Lm;0;L;;;;;N;;;;; +16F9B;MIAO LETTER REFORMED TONE-2;Lm;0;L;;;;;N;;;;; +16F9C;MIAO LETTER REFORMED TONE-4;Lm;0;L;;;;;N;;;;; +16F9D;MIAO LETTER REFORMED TONE-5;Lm;0;L;;;;;N;;;;; +16F9E;MIAO LETTER REFORMED TONE-6;Lm;0;L;;;;;N;;;;; +16F9F;MIAO LETTER REFORMED TONE-8;Lm;0;L;;;;;N;;;;; 1B000;KATAKANA LETTER ARCHAIC E;Lo;0;L;;;;;N;;;;; 1B001;HIRAGANA LETTER ARCHAIC YE;Lo;0;L;;;;;N;;;;; 1D000;BYZANTINE MUSICAL SYMBOL PSILI;So;0;L;;;;;N;;;;; @@ -21599,6 +22168,149 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1D7FD;MATHEMATICAL MONOSPACE DIGIT SEVEN;Nd;0;EN;<font> 0037;7;7;7;N;;;;; 1D7FE;MATHEMATICAL MONOSPACE DIGIT EIGHT;Nd;0;EN;<font> 0038;8;8;8;N;;;;; 1D7FF;MATHEMATICAL MONOSPACE DIGIT NINE;Nd;0;EN;<font> 0039;9;9;9;N;;;;; +1EE00;ARABIC MATHEMATICAL ALEF;Lo;0;AL;<font> 0627;;;;N;;;;; +1EE01;ARABIC MATHEMATICAL BEH;Lo;0;AL;<font> 0628;;;;N;;;;; +1EE02;ARABIC MATHEMATICAL JEEM;Lo;0;AL;<font> 062C;;;;N;;;;; +1EE03;ARABIC MATHEMATICAL DAL;Lo;0;AL;<font> 062F;;;;N;;;;; +1EE05;ARABIC MATHEMATICAL WAW;Lo;0;AL;<font> 0648;;;;N;;;;; +1EE06;ARABIC MATHEMATICAL ZAIN;Lo;0;AL;<font> 0632;;;;N;;;;; +1EE07;ARABIC MATHEMATICAL HAH;Lo;0;AL;<font> 062D;;;;N;;;;; +1EE08;ARABIC MATHEMATICAL TAH;Lo;0;AL;<font> 0637;;;;N;;;;; +1EE09;ARABIC MATHEMATICAL YEH;Lo;0;AL;<font> 064A;;;;N;;;;; +1EE0A;ARABIC MATHEMATICAL KAF;Lo;0;AL;<font> 0643;;;;N;;;;; +1EE0B;ARABIC MATHEMATICAL LAM;Lo;0;AL;<font> 0644;;;;N;;;;; +1EE0C;ARABIC MATHEMATICAL MEEM;Lo;0;AL;<font> 0645;;;;N;;;;; +1EE0D;ARABIC MATHEMATICAL NOON;Lo;0;AL;<font> 0646;;;;N;;;;; +1EE0E;ARABIC MATHEMATICAL SEEN;Lo;0;AL;<font> 0633;;;;N;;;;; +1EE0F;ARABIC MATHEMATICAL AIN;Lo;0;AL;<font> 0639;;;;N;;;;; +1EE10;ARABIC MATHEMATICAL FEH;Lo;0;AL;<font> 0641;;;;N;;;;; +1EE11;ARABIC MATHEMATICAL SAD;Lo;0;AL;<font> 0635;;;;N;;;;; +1EE12;ARABIC MATHEMATICAL QAF;Lo;0;AL;<font> 0642;;;;N;;;;; +1EE13;ARABIC MATHEMATICAL REH;Lo;0;AL;<font> 0631;;;;N;;;;; +1EE14;ARABIC MATHEMATICAL SHEEN;Lo;0;AL;<font> 0634;;;;N;;;;; +1EE15;ARABIC MATHEMATICAL TEH;Lo;0;AL;<font> 062A;;;;N;;;;; +1EE16;ARABIC MATHEMATICAL THEH;Lo;0;AL;<font> 062B;;;;N;;;;; +1EE17;ARABIC MATHEMATICAL KHAH;Lo;0;AL;<font> 062E;;;;N;;;;; +1EE18;ARABIC MATHEMATICAL THAL;Lo;0;AL;<font> 0630;;;;N;;;;; +1EE19;ARABIC MATHEMATICAL DAD;Lo;0;AL;<font> 0636;;;;N;;;;; +1EE1A;ARABIC MATHEMATICAL ZAH;Lo;0;AL;<font> 0638;;;;N;;;;; +1EE1B;ARABIC MATHEMATICAL GHAIN;Lo;0;AL;<font> 063A;;;;N;;;;; +1EE1C;ARABIC MATHEMATICAL DOTLESS BEH;Lo;0;AL;<font> 066E;;;;N;;;;; +1EE1D;ARABIC MATHEMATICAL DOTLESS NOON;Lo;0;AL;<font> 06BA;;;;N;;;;; +1EE1E;ARABIC MATHEMATICAL DOTLESS FEH;Lo;0;AL;<font> 06A1;;;;N;;;;; +1EE1F;ARABIC MATHEMATICAL DOTLESS QAF;Lo;0;AL;<font> 066F;;;;N;;;;; +1EE21;ARABIC MATHEMATICAL INITIAL BEH;Lo;0;AL;<font> 0628;;;;N;;;;; +1EE22;ARABIC MATHEMATICAL INITIAL JEEM;Lo;0;AL;<font> 062C;;;;N;;;;; +1EE24;ARABIC MATHEMATICAL INITIAL HEH;Lo;0;AL;<font> 0647;;;;N;;;;; +1EE27;ARABIC MATHEMATICAL INITIAL HAH;Lo;0;AL;<font> 062D;;;;N;;;;; +1EE29;ARABIC MATHEMATICAL INITIAL YEH;Lo;0;AL;<font> 064A;;;;N;;;;; +1EE2A;ARABIC MATHEMATICAL INITIAL KAF;Lo;0;AL;<font> 0643;;;;N;;;;; +1EE2B;ARABIC MATHEMATICAL INITIAL LAM;Lo;0;AL;<font> 0644;;;;N;;;;; +1EE2C;ARABIC MATHEMATICAL INITIAL MEEM;Lo;0;AL;<font> 0645;;;;N;;;;; +1EE2D;ARABIC MATHEMATICAL INITIAL NOON;Lo;0;AL;<font> 0646;;;;N;;;;; +1EE2E;ARABIC MATHEMATICAL INITIAL SEEN;Lo;0;AL;<font> 0633;;;;N;;;;; +1EE2F;ARABIC MATHEMATICAL INITIAL AIN;Lo;0;AL;<font> 0639;;;;N;;;;; +1EE30;ARABIC MATHEMATICAL INITIAL FEH;Lo;0;AL;<font> 0641;;;;N;;;;; +1EE31;ARABIC MATHEMATICAL INITIAL SAD;Lo;0;AL;<font> 0635;;;;N;;;;; +1EE32;ARABIC MATHEMATICAL INITIAL QAF;Lo;0;AL;<font> 0642;;;;N;;;;; +1EE34;ARABIC MATHEMATICAL INITIAL SHEEN;Lo;0;AL;<font> 0634;;;;N;;;;; +1EE35;ARABIC MATHEMATICAL INITIAL TEH;Lo;0;AL;<font> 062A;;;;N;;;;; +1EE36;ARABIC MATHEMATICAL INITIAL THEH;Lo;0;AL;<font> 062B;;;;N;;;;; +1EE37;ARABIC MATHEMATICAL INITIAL KHAH;Lo;0;AL;<font> 062E;;;;N;;;;; +1EE39;ARABIC MATHEMATICAL INITIAL DAD;Lo;0;AL;<font> 0636;;;;N;;;;; +1EE3B;ARABIC MATHEMATICAL INITIAL GHAIN;Lo;0;AL;<font> 063A;;;;N;;;;; +1EE42;ARABIC MATHEMATICAL TAILED JEEM;Lo;0;AL;<font> 062C;;;;N;;;;; +1EE47;ARABIC MATHEMATICAL TAILED HAH;Lo;0;AL;<font> 062D;;;;N;;;;; +1EE49;ARABIC MATHEMATICAL TAILED YEH;Lo;0;AL;<font> 064A;;;;N;;;;; +1EE4B;ARABIC MATHEMATICAL TAILED LAM;Lo;0;AL;<font> 0644;;;;N;;;;; +1EE4D;ARABIC MATHEMATICAL TAILED NOON;Lo;0;AL;<font> 0646;;;;N;;;;; +1EE4E;ARABIC MATHEMATICAL TAILED SEEN;Lo;0;AL;<font> 0633;;;;N;;;;; +1EE4F;ARABIC MATHEMATICAL TAILED AIN;Lo;0;AL;<font> 0639;;;;N;;;;; +1EE51;ARABIC MATHEMATICAL TAILED SAD;Lo;0;AL;<font> 0635;;;;N;;;;; +1EE52;ARABIC MATHEMATICAL TAILED QAF;Lo;0;AL;<font> 0642;;;;N;;;;; +1EE54;ARABIC MATHEMATICAL TAILED SHEEN;Lo;0;AL;<font> 0634;;;;N;;;;; +1EE57;ARABIC MATHEMATICAL TAILED KHAH;Lo;0;AL;<font> 062E;;;;N;;;;; +1EE59;ARABIC MATHEMATICAL TAILED DAD;Lo;0;AL;<font> 0636;;;;N;;;;; +1EE5B;ARABIC MATHEMATICAL TAILED GHAIN;Lo;0;AL;<font> 063A;;;;N;;;;; +1EE5D;ARABIC MATHEMATICAL TAILED DOTLESS NOON;Lo;0;AL;<font> 06BA;;;;N;;;;; +1EE5F;ARABIC MATHEMATICAL TAILED DOTLESS QAF;Lo;0;AL;<font> 066F;;;;N;;;;; +1EE61;ARABIC MATHEMATICAL STRETCHED BEH;Lo;0;AL;<font> 0628;;;;N;;;;; +1EE62;ARABIC MATHEMATICAL STRETCHED JEEM;Lo;0;AL;<font> 062C;;;;N;;;;; +1EE64;ARABIC MATHEMATICAL STRETCHED HEH;Lo;0;AL;<font> 0647;;;;N;;;;; +1EE67;ARABIC MATHEMATICAL STRETCHED HAH;Lo;0;AL;<font> 062D;;;;N;;;;; +1EE68;ARABIC MATHEMATICAL STRETCHED TAH;Lo;0;AL;<font> 0637;;;;N;;;;; +1EE69;ARABIC MATHEMATICAL STRETCHED YEH;Lo;0;AL;<font> 064A;;;;N;;;;; +1EE6A;ARABIC MATHEMATICAL STRETCHED KAF;Lo;0;AL;<font> 0643;;;;N;;;;; +1EE6C;ARABIC MATHEMATICAL STRETCHED MEEM;Lo;0;AL;<font> 0645;;;;N;;;;; +1EE6D;ARABIC MATHEMATICAL STRETCHED NOON;Lo;0;AL;<font> 0646;;;;N;;;;; +1EE6E;ARABIC MATHEMATICAL STRETCHED SEEN;Lo;0;AL;<font> 0633;;;;N;;;;; +1EE6F;ARABIC MATHEMATICAL STRETCHED AIN;Lo;0;AL;<font> 0639;;;;N;;;;; +1EE70;ARABIC MATHEMATICAL STRETCHED FEH;Lo;0;AL;<font> 0641;;;;N;;;;; +1EE71;ARABIC MATHEMATICAL STRETCHED SAD;Lo;0;AL;<font> 0635;;;;N;;;;; +1EE72;ARABIC MATHEMATICAL STRETCHED QAF;Lo;0;AL;<font> 0642;;;;N;;;;; +1EE74;ARABIC MATHEMATICAL STRETCHED SHEEN;Lo;0;AL;<font> 0634;;;;N;;;;; +1EE75;ARABIC MATHEMATICAL STRETCHED TEH;Lo;0;AL;<font> 062A;;;;N;;;;; +1EE76;ARABIC MATHEMATICAL STRETCHED THEH;Lo;0;AL;<font> 062B;;;;N;;;;; +1EE77;ARABIC MATHEMATICAL STRETCHED KHAH;Lo;0;AL;<font> 062E;;;;N;;;;; +1EE79;ARABIC MATHEMATICAL STRETCHED DAD;Lo;0;AL;<font> 0636;;;;N;;;;; +1EE7A;ARABIC MATHEMATICAL STRETCHED ZAH;Lo;0;AL;<font> 0638;;;;N;;;;; +1EE7B;ARABIC MATHEMATICAL STRETCHED GHAIN;Lo;0;AL;<font> 063A;;;;N;;;;; +1EE7C;ARABIC MATHEMATICAL STRETCHED DOTLESS BEH;Lo;0;AL;<font> 066E;;;;N;;;;; +1EE7E;ARABIC MATHEMATICAL STRETCHED DOTLESS FEH;Lo;0;AL;<font> 06A1;;;;N;;;;; +1EE80;ARABIC MATHEMATICAL LOOPED ALEF;Lo;0;AL;<font> 0627;;;;N;;;;; +1EE81;ARABIC MATHEMATICAL LOOPED BEH;Lo;0;AL;<font> 0628;;;;N;;;;; +1EE82;ARABIC MATHEMATICAL LOOPED JEEM;Lo;0;AL;<font> 062C;;;;N;;;;; +1EE83;ARABIC MATHEMATICAL LOOPED DAL;Lo;0;AL;<font> 062F;;;;N;;;;; +1EE84;ARABIC MATHEMATICAL LOOPED HEH;Lo;0;AL;<font> 0647;;;;N;;;;; +1EE85;ARABIC MATHEMATICAL LOOPED WAW;Lo;0;AL;<font> 0648;;;;N;;;;; +1EE86;ARABIC MATHEMATICAL LOOPED ZAIN;Lo;0;AL;<font> 0632;;;;N;;;;; +1EE87;ARABIC MATHEMATICAL LOOPED HAH;Lo;0;AL;<font> 062D;;;;N;;;;; +1EE88;ARABIC MATHEMATICAL LOOPED TAH;Lo;0;AL;<font> 0637;;;;N;;;;; +1EE89;ARABIC MATHEMATICAL LOOPED YEH;Lo;0;AL;<font> 064A;;;;N;;;;; +1EE8B;ARABIC MATHEMATICAL LOOPED LAM;Lo;0;AL;<font> 0644;;;;N;;;;; +1EE8C;ARABIC MATHEMATICAL LOOPED MEEM;Lo;0;AL;<font> 0645;;;;N;;;;; +1EE8D;ARABIC MATHEMATICAL LOOPED NOON;Lo;0;AL;<font> 0646;;;;N;;;;; +1EE8E;ARABIC MATHEMATICAL LOOPED SEEN;Lo;0;AL;<font> 0633;;;;N;;;;; +1EE8F;ARABIC MATHEMATICAL LOOPED AIN;Lo;0;AL;<font> 0639;;;;N;;;;; +1EE90;ARABIC MATHEMATICAL LOOPED FEH;Lo;0;AL;<font> 0641;;;;N;;;;; +1EE91;ARABIC MATHEMATICAL LOOPED SAD;Lo;0;AL;<font> 0635;;;;N;;;;; +1EE92;ARABIC MATHEMATICAL LOOPED QAF;Lo;0;AL;<font> 0642;;;;N;;;;; +1EE93;ARABIC MATHEMATICAL LOOPED REH;Lo;0;AL;<font> 0631;;;;N;;;;; +1EE94;ARABIC MATHEMATICAL LOOPED SHEEN;Lo;0;AL;<font> 0634;;;;N;;;;; +1EE95;ARABIC MATHEMATICAL LOOPED TEH;Lo;0;AL;<font> 062A;;;;N;;;;; +1EE96;ARABIC MATHEMATICAL LOOPED THEH;Lo;0;AL;<font> 062B;;;;N;;;;; +1EE97;ARABIC MATHEMATICAL LOOPED KHAH;Lo;0;AL;<font> 062E;;;;N;;;;; +1EE98;ARABIC MATHEMATICAL LOOPED THAL;Lo;0;AL;<font> 0630;;;;N;;;;; +1EE99;ARABIC MATHEMATICAL LOOPED DAD;Lo;0;AL;<font> 0636;;;;N;;;;; +1EE9A;ARABIC MATHEMATICAL LOOPED ZAH;Lo;0;AL;<font> 0638;;;;N;;;;; +1EE9B;ARABIC MATHEMATICAL LOOPED GHAIN;Lo;0;AL;<font> 063A;;;;N;;;;; +1EEA1;ARABIC MATHEMATICAL DOUBLE-STRUCK BEH;Lo;0;AL;<font> 0628;;;;N;;;;; +1EEA2;ARABIC MATHEMATICAL DOUBLE-STRUCK JEEM;Lo;0;AL;<font> 062C;;;;N;;;;; +1EEA3;ARABIC MATHEMATICAL DOUBLE-STRUCK DAL;Lo;0;AL;<font> 062F;;;;N;;;;; +1EEA5;ARABIC MATHEMATICAL DOUBLE-STRUCK WAW;Lo;0;AL;<font> 0648;;;;N;;;;; +1EEA6;ARABIC MATHEMATICAL DOUBLE-STRUCK ZAIN;Lo;0;AL;<font> 0632;;;;N;;;;; +1EEA7;ARABIC MATHEMATICAL DOUBLE-STRUCK HAH;Lo;0;AL;<font> 062D;;;;N;;;;; +1EEA8;ARABIC MATHEMATICAL DOUBLE-STRUCK TAH;Lo;0;AL;<font> 0637;;;;N;;;;; +1EEA9;ARABIC MATHEMATICAL DOUBLE-STRUCK YEH;Lo;0;AL;<font> 064A;;;;N;;;;; +1EEAB;ARABIC MATHEMATICAL DOUBLE-STRUCK LAM;Lo;0;AL;<font> 0644;;;;N;;;;; +1EEAC;ARABIC MATHEMATICAL DOUBLE-STRUCK MEEM;Lo;0;AL;<font> 0645;;;;N;;;;; +1EEAD;ARABIC MATHEMATICAL DOUBLE-STRUCK NOON;Lo;0;AL;<font> 0646;;;;N;;;;; +1EEAE;ARABIC MATHEMATICAL DOUBLE-STRUCK SEEN;Lo;0;AL;<font> 0633;;;;N;;;;; +1EEAF;ARABIC MATHEMATICAL DOUBLE-STRUCK AIN;Lo;0;AL;<font> 0639;;;;N;;;;; +1EEB0;ARABIC MATHEMATICAL DOUBLE-STRUCK FEH;Lo;0;AL;<font> 0641;;;;N;;;;; +1EEB1;ARABIC MATHEMATICAL DOUBLE-STRUCK SAD;Lo;0;AL;<font> 0635;;;;N;;;;; +1EEB2;ARABIC MATHEMATICAL DOUBLE-STRUCK QAF;Lo;0;AL;<font> 0642;;;;N;;;;; +1EEB3;ARABIC MATHEMATICAL DOUBLE-STRUCK REH;Lo;0;AL;<font> 0631;;;;N;;;;; +1EEB4;ARABIC MATHEMATICAL DOUBLE-STRUCK SHEEN;Lo;0;AL;<font> 0634;;;;N;;;;; +1EEB5;ARABIC MATHEMATICAL DOUBLE-STRUCK TEH;Lo;0;AL;<font> 062A;;;;N;;;;; +1EEB6;ARABIC MATHEMATICAL DOUBLE-STRUCK THEH;Lo;0;AL;<font> 062B;;;;N;;;;; +1EEB7;ARABIC MATHEMATICAL DOUBLE-STRUCK KHAH;Lo;0;AL;<font> 062E;;;;N;;;;; +1EEB8;ARABIC MATHEMATICAL DOUBLE-STRUCK THAL;Lo;0;AL;<font> 0630;;;;N;;;;; +1EEB9;ARABIC MATHEMATICAL DOUBLE-STRUCK DAD;Lo;0;AL;<font> 0636;;;;N;;;;; +1EEBA;ARABIC MATHEMATICAL DOUBLE-STRUCK ZAH;Lo;0;AL;<font> 0638;;;;N;;;;; +1EEBB;ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN;Lo;0;AL;<font> 063A;;;;N;;;;; +1EEF0;ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL;Sm;0;ON;;;;;N;;;;; +1EEF1;ARABIC MATHEMATICAL OPERATOR HAH WITH DAL;Sm;0;ON;;;;;N;;;;; 1F000;MAHJONG TILE EAST WIND;So;0;ON;;;;;N;;;;; 1F001;MAHJONG TILE SOUTH WIND;So;0;ON;;;;;N;;;;; 1F002;MAHJONG TILE WEST WIND;So;0;ON;;;;;N;;;;; @@ -21902,6 +22614,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1F167;NEGATIVE CIRCLED LATIN CAPITAL LETTER X;So;0;L;;;;;N;;;;; 1F168;NEGATIVE CIRCLED LATIN CAPITAL LETTER Y;So;0;L;;;;;N;;;;; 1F169;NEGATIVE CIRCLED LATIN CAPITAL LETTER Z;So;0;L;;;;;N;;;;; +1F16A;RAISED MC SIGN;So;0;ON;<super> 004D 0043;;;;N;;;;; +1F16B;RAISED MD SIGN;So;0;ON;<super> 004D 0044;;;;N;;;;; 1F170;NEGATIVE SQUARED LATIN CAPITAL LETTER A;So;0;L;;;;;N;;;;; 1F171;NEGATIVE SQUARED LATIN CAPITAL LETTER B;So;0;L;;;;;N;;;;; 1F172;NEGATIVE SQUARED LATIN CAPITAL LETTER C;So;0;L;;;;;N;;;;; @@ -22354,7 +23068,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1F489;SYRINGE;So;0;ON;;;;;N;;;;; 1F48A;PILL;So;0;ON;;;;;N;;;;; 1F48B;KISS MARK;So;0;ON;;;;;N;;;;; -1F48C;LOVE LETTER;So;0;L;;;;;N;;;;; +1F48C;LOVE LETTER;So;0;ON;;;;;N;;;;; 1F48D;RING;So;0;ON;;;;;N;;;;; 1F48E;GEM STONE;So;0;ON;;;;;N;;;;; 1F48F;KISS;So;0;ON;;;;;N;;;;; @@ -22502,7 +23216,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1F521;INPUT SYMBOL FOR LATIN SMALL LETTERS;So;0;ON;;;;;N;;;;; 1F522;INPUT SYMBOL FOR NUMBERS;So;0;ON;;;;;N;;;;; 1F523;INPUT SYMBOL FOR SYMBOLS;So;0;ON;;;;;N;;;;; -1F524;INPUT SYMBOL FOR LATIN LETTERS;So;0;L;;;;;N;;;;; +1F524;INPUT SYMBOL FOR LATIN LETTERS;So;0;ON;;;;;N;;;;; 1F525;FIRE;So;0;ON;;;;;N;;;;; 1F526;ELECTRIC TORCH;So;0;ON;;;;;N;;;;; 1F527;WRENCH;So;0;ON;;;;;N;;;;; @@ -22528,6 +23242,10 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1F53B;DOWN-POINTING RED TRIANGLE;So;0;ON;;;;;N;;;;; 1F53C;UP-POINTING SMALL RED TRIANGLE;So;0;ON;;;;;N;;;;; 1F53D;DOWN-POINTING SMALL RED TRIANGLE;So;0;ON;;;;;N;;;;; +1F540;CIRCLED CROSS POMMEE;So;0;ON;;;;;N;;;;; +1F541;CROSS POMMEE WITH HALF-CIRCLE BELOW;So;0;ON;;;;;N;;;;; +1F542;CROSS POMMEE;So;0;ON;;;;;N;;;;; +1F543;NOTCHED LEFT SEMICIRCLE WITH THREE DOTS;So;0;ON;;;;;N;;;;; 1F550;CLOCK FACE ONE OCLOCK;So;0;ON;;;;;N;;;;; 1F551;CLOCK FACE TWO OCLOCK;So;0;ON;;;;;N;;;;; 1F552;CLOCK FACE THREE OCLOCK;So;0;ON;;;;;N;;;;; @@ -22557,6 +23275,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1F5FD;STATUE OF LIBERTY;So;0;ON;;;;;N;;;;; 1F5FE;SILHOUETTE OF JAPAN;So;0;ON;;;;;N;;;;; 1F5FF;MOYAI;So;0;ON;;;;;N;;;;; +1F600;GRINNING FACE;So;0;ON;;;;;N;;;;; 1F601;GRINNING FACE WITH SMILING EYES;So;0;ON;;;;;N;;;;; 1F602;FACE WITH TEARS OF JOY;So;0;ON;;;;;N;;;;; 1F603;SMILING FACE WITH OPEN MOUTH;So;0;ON;;;;;N;;;;; @@ -22573,30 +23292,42 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1F60E;SMILING FACE WITH SUNGLASSES;So;0;ON;;;;;N;;;;; 1F60F;SMIRKING FACE;So;0;ON;;;;;N;;;;; 1F610;NEUTRAL FACE;So;0;ON;;;;;N;;;;; +1F611;EXPRESSIONLESS FACE;So;0;ON;;;;;N;;;;; 1F612;UNAMUSED FACE;So;0;ON;;;;;N;;;;; 1F613;FACE WITH COLD SWEAT;So;0;ON;;;;;N;;;;; 1F614;PENSIVE FACE;So;0;ON;;;;;N;;;;; +1F615;CONFUSED FACE;So;0;ON;;;;;N;;;;; 1F616;CONFOUNDED FACE;So;0;ON;;;;;N;;;;; +1F617;KISSING FACE;So;0;ON;;;;;N;;;;; 1F618;FACE THROWING A KISS;So;0;ON;;;;;N;;;;; +1F619;KISSING FACE WITH SMILING EYES;So;0;ON;;;;;N;;;;; 1F61A;KISSING FACE WITH CLOSED EYES;So;0;ON;;;;;N;;;;; +1F61B;FACE WITH STUCK-OUT TONGUE;So;0;ON;;;;;N;;;;; 1F61C;FACE WITH STUCK-OUT TONGUE AND WINKING EYE;So;0;ON;;;;;N;;;;; 1F61D;FACE WITH STUCK-OUT TONGUE AND TIGHTLY-CLOSED EYES;So;0;ON;;;;;N;;;;; 1F61E;DISAPPOINTED FACE;So;0;ON;;;;;N;;;;; +1F61F;WORRIED FACE;So;0;ON;;;;;N;;;;; 1F620;ANGRY FACE;So;0;ON;;;;;N;;;;; 1F621;POUTING FACE;So;0;ON;;;;;N;;;;; 1F622;CRYING FACE;So;0;ON;;;;;N;;;;; 1F623;PERSEVERING FACE;So;0;ON;;;;;N;;;;; 1F624;FACE WITH LOOK OF TRIUMPH;So;0;ON;;;;;N;;;;; 1F625;DISAPPOINTED BUT RELIEVED FACE;So;0;ON;;;;;N;;;;; +1F626;FROWNING FACE WITH OPEN MOUTH;So;0;ON;;;;;N;;;;; +1F627;ANGUISHED FACE;So;0;ON;;;;;N;;;;; 1F628;FEARFUL FACE;So;0;ON;;;;;N;;;;; 1F629;WEARY FACE;So;0;ON;;;;;N;;;;; 1F62A;SLEEPY FACE;So;0;ON;;;;;N;;;;; 1F62B;TIRED FACE;So;0;ON;;;;;N;;;;; +1F62C;GRIMACING FACE;So;0;ON;;;;;N;;;;; 1F62D;LOUDLY CRYING FACE;So;0;ON;;;;;N;;;;; +1F62E;FACE WITH OPEN MOUTH;So;0;ON;;;;;N;;;;; +1F62F;HUSHED FACE;So;0;ON;;;;;N;;;;; 1F630;FACE WITH OPEN MOUTH AND COLD SWEAT;So;0;ON;;;;;N;;;;; 1F631;FACE SCREAMING IN FEAR;So;0;ON;;;;;N;;;;; 1F632;ASTONISHED FACE;So;0;ON;;;;;N;;;;; 1F633;FLUSHED FACE;So;0;ON;;;;;N;;;;; +1F634;SLEEPING FACE;So;0;ON;;;;;N;;;;; 1F635;DIZZY FACE;So;0;ON;;;;;N;;;;; 1F636;FACE WITHOUT MOUTH;So;0;ON;;;;;N;;;;; 1F637;FACE WITH MEDICAL MASK;So;0;ON;;;;;N;;;;; diff --git a/lib/unicore/auxiliary/GCBTest.txt b/lib/unicore/auxiliary/GCBTest.txt index 8f45e56cf6..33b859cbff 100644 --- a/lib/unicore/auxiliary/GCBTest.txt +++ b/lib/unicore/auxiliary/GCBTest.txt @@ -1,8 +1,8 @@ -# GraphemeBreakTest-6.0.0.txt -# Date: 2010-05-18, 00:49:27 GMT [MD] +# GraphemeBreakTest-6.1.0.txt +# Date: 2011-12-07, 17:54:39 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # @@ -30,8 +30,6 @@ ÷ 0020 × 0308 ÷ 0001 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] ÷ 0020 × 0300 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] ÷ 0020 × 0308 × 0300 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] -÷ 0020 ÷ 0E40 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] -÷ 0020 × 0308 ÷ 0E40 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] ÷ 0020 × 0903 ÷ # ÷ [0.2] SPACE (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 0020 × 0308 × 0903 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 0020 ÷ 1100 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] @@ -44,6 +42,10 @@ ÷ 0020 × 0308 ÷ AC00 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 0020 ÷ AC01 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 0020 × 0308 ÷ AC01 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0020 ÷ 0378 ÷ # ÷ [0.2] SPACE (Other) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 0020 × 0308 ÷ 0378 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 0020 ÷ D800 ÷ # ÷ [0.2] SPACE (Other) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] +÷ 0020 × 0308 ÷ D800 ÷ # ÷ [0.2] SPACE (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] ÷ 000D ÷ 0020 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] SPACE (Other) ÷ [0.3] ÷ 000D ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ 000D ÷ 000D ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] @@ -54,8 +56,6 @@ ÷ 000D ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] ÷ 000D ÷ 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] ÷ 000D ÷ 0308 × 0300 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] -÷ 000D ÷ 0E40 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] -÷ 000D ÷ 0308 ÷ 0E40 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] ÷ 000D ÷ 0903 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 000D ÷ 0308 × 0903 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 000D ÷ 1100 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] @@ -68,6 +68,10 @@ ÷ 000D ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 000D ÷ AC01 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 000D ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 000D ÷ 0378 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3] +÷ 000D ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 000D ÷ D800 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] <surrogate-D800> (Control) ÷ [0.3] +÷ 000D ÷ 0308 ÷ D800 ÷ # ÷ [0.2] <CARRIAGE RETURN (CR)> (CR) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] ÷ 000A ÷ 0020 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] SPACE (Other) ÷ [0.3] ÷ 000A ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ 000A ÷ 000D ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] @@ -78,8 +82,6 @@ ÷ 000A ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] ÷ 000A ÷ 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] ÷ 000A ÷ 0308 × 0300 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] -÷ 000A ÷ 0E40 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] -÷ 000A ÷ 0308 ÷ 0E40 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] ÷ 000A ÷ 0903 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 000A ÷ 0308 × 0903 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 000A ÷ 1100 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] @@ -92,6 +94,10 @@ ÷ 000A ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 000A ÷ AC01 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 000A ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 000A ÷ 0378 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3] +÷ 000A ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 000A ÷ D800 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] <surrogate-D800> (Control) ÷ [0.3] +÷ 000A ÷ 0308 ÷ D800 ÷ # ÷ [0.2] <LINE FEED (LF)> (LF) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] ÷ 0001 ÷ 0020 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] SPACE (Other) ÷ [0.3] ÷ 0001 ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ 0001 ÷ 000D ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] @@ -102,8 +108,6 @@ ÷ 0001 ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] ÷ 0001 ÷ 0300 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] ÷ 0001 ÷ 0308 × 0300 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] -÷ 0001 ÷ 0E40 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] -÷ 0001 ÷ 0308 ÷ 0E40 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] ÷ 0001 ÷ 0903 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 0001 ÷ 0308 × 0903 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 0001 ÷ 1100 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] @@ -116,6 +120,10 @@ ÷ 0001 ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 0001 ÷ AC01 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 0001 ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0001 ÷ 0378 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 0001 ÷ D800 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] <surrogate-D800> (Control) ÷ [0.3] +÷ 0001 ÷ 0308 ÷ D800 ÷ # ÷ [0.2] <START OF HEADING> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] ÷ 0300 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ 0300 × 0308 ÷ 0020 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ 0300 ÷ 000D ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] @@ -126,8 +134,6 @@ ÷ 0300 × 0308 ÷ 0001 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] ÷ 0300 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] ÷ 0300 × 0308 × 0300 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] -÷ 0300 ÷ 0E40 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] -÷ 0300 × 0308 ÷ 0E40 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] ÷ 0300 × 0903 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 0300 × 0308 × 0903 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 0300 ÷ 1100 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] @@ -140,30 +146,10 @@ ÷ 0300 × 0308 ÷ AC00 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 0300 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 0300 × 0308 ÷ AC01 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] -÷ 0E40 × 0020 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.2] SPACE (Other) ÷ [0.3] -÷ 0E40 × 0308 ÷ 0020 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] -÷ 0E40 ÷ 000D ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] -÷ 0E40 × 0308 ÷ 000D ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] -÷ 0E40 ÷ 000A ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] -÷ 0E40 × 0308 ÷ 000A ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] -÷ 0E40 ÷ 0001 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] -÷ 0E40 × 0308 ÷ 0001 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] -÷ 0E40 × 0300 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] -÷ 0E40 × 0308 × 0300 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] -÷ 0E40 × 0E40 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.2] THAI CHARACTER SARA E (Prepend) ÷ [0.3] -÷ 0E40 × 0308 ÷ 0E40 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] -÷ 0E40 × 0903 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] -÷ 0E40 × 0308 × 0903 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] -÷ 0E40 × 1100 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.2] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] -÷ 0E40 × 0308 ÷ 1100 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] -÷ 0E40 × 1160 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.2] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] -÷ 0E40 × 0308 ÷ 1160 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] -÷ 0E40 × 11A8 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.2] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] -÷ 0E40 × 0308 ÷ 11A8 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] -÷ 0E40 × AC00 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.2] HANGUL SYLLABLE GA (LV) ÷ [0.3] -÷ 0E40 × 0308 ÷ AC00 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] -÷ 0E40 × AC01 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.2] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] -÷ 0E40 × 0308 ÷ AC01 ÷ # ÷ [0.2] THAI CHARACTER SARA E (Prepend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0300 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 0300 × 0308 ÷ 0378 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 0300 ÷ D800 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] +÷ 0300 × 0308 ÷ D800 ÷ # ÷ [0.2] COMBINING GRAVE ACCENT (Extend) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] ÷ 0903 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ 0903 × 0308 ÷ 0020 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ 0903 ÷ 000D ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] @@ -174,8 +160,6 @@ ÷ 0903 × 0308 ÷ 0001 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] ÷ 0903 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] ÷ 0903 × 0308 × 0300 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] -÷ 0903 ÷ 0E40 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] -÷ 0903 × 0308 ÷ 0E40 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] ÷ 0903 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 0903 × 0308 × 0903 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 0903 ÷ 1100 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] @@ -188,6 +172,10 @@ ÷ 0903 × 0308 ÷ AC00 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 0903 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 0903 × 0308 ÷ AC01 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0903 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 0903 × 0308 ÷ 0378 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 0903 ÷ D800 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] +÷ 0903 × 0308 ÷ D800 ÷ # ÷ [0.2] DEVANAGARI SIGN VISARGA (SpacingMark) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] ÷ 1100 ÷ 0020 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ 1100 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ 1100 ÷ 000D ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] @@ -198,8 +186,6 @@ ÷ 1100 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] ÷ 1100 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] ÷ 1100 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] -÷ 1100 ÷ 0E40 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] -÷ 1100 × 0308 ÷ 0E40 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] ÷ 1100 × 0903 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 1100 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 1100 × 1100 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] @@ -212,6 +198,10 @@ ÷ 1100 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 1100 × AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [6.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 1100 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 1100 ÷ 0378 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 1100 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 1100 ÷ D800 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] +÷ 1100 × 0308 ÷ D800 ÷ # ÷ [0.2] HANGUL CHOSEONG KIYEOK (L) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] ÷ 1160 ÷ 0020 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ 1160 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ 1160 ÷ 000D ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] @@ -222,8 +212,6 @@ ÷ 1160 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] ÷ 1160 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] ÷ 1160 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] -÷ 1160 ÷ 0E40 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] -÷ 1160 × 0308 ÷ 0E40 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] ÷ 1160 × 0903 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 1160 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 1160 ÷ 1100 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] @@ -236,6 +224,10 @@ ÷ 1160 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 1160 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 1160 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 1160 ÷ 0378 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 1160 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 1160 ÷ D800 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] +÷ 1160 × 0308 ÷ D800 ÷ # ÷ [0.2] HANGUL JUNGSEONG FILLER (V) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] ÷ 11A8 ÷ 0020 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ 11A8 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ 11A8 ÷ 000D ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] @@ -246,8 +238,6 @@ ÷ 11A8 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] ÷ 11A8 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] ÷ 11A8 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] -÷ 11A8 ÷ 0E40 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] -÷ 11A8 × 0308 ÷ 0E40 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] ÷ 11A8 × 0903 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 11A8 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ 11A8 ÷ 1100 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] @@ -260,6 +250,10 @@ ÷ 11A8 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ 11A8 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ 11A8 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 11A8 ÷ 0378 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 11A8 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 11A8 ÷ D800 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] +÷ 11A8 × 0308 ÷ D800 ÷ # ÷ [0.2] HANGUL JONGSEONG KIYEOK (T) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] ÷ AC00 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ AC00 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ AC00 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] @@ -270,8 +264,6 @@ ÷ AC00 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] ÷ AC00 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] ÷ AC00 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] -÷ AC00 ÷ 0E40 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] -÷ AC00 × 0308 ÷ 0E40 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] ÷ AC00 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ AC00 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ AC00 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] @@ -284,6 +276,10 @@ ÷ AC00 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ AC00 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ AC00 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ AC00 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ AC00 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ AC00 ÷ D800 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] +÷ AC00 × 0308 ÷ D800 ÷ # ÷ [0.2] HANGUL SYLLABLE GA (LV) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] ÷ AC01 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ AC01 × 0308 ÷ 0020 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] ÷ AC01 ÷ 000D ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] @@ -294,8 +290,6 @@ ÷ AC01 × 0308 ÷ 0001 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] ÷ AC01 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] ÷ AC01 × 0308 × 0300 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] -÷ AC01 ÷ 0E40 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] -÷ AC01 × 0308 ÷ 0E40 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] THAI CHARACTER SARA E (Prepend) ÷ [0.3] ÷ AC01 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ AC01 × 0308 × 0903 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] ÷ AC01 ÷ 1100 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] @@ -308,4 +302,60 @@ ÷ AC01 × 0308 ÷ AC00 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] ÷ AC01 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] ÷ AC01 × 0308 ÷ AC01 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] -# Lines: 288 +÷ AC01 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ AC01 × 0308 ÷ 0378 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ AC01 ÷ D800 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] +÷ AC01 × 0308 ÷ D800 ÷ # ÷ [0.2] HANGUL SYLLABLE GAG (LVT) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] +÷ 0378 ÷ 0020 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0378 × 0308 ÷ 0020 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ 0378 ÷ 000D ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0378 × 0308 ÷ 000D ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ 0378 ÷ 000A ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0378 × 0308 ÷ 000A ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ 0378 ÷ 0001 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 0378 × 0308 ÷ 0001 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ 0378 × 0300 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] +÷ 0378 × 0308 × 0300 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] +÷ 0378 × 0903 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0378 × 0308 × 0903 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ 0378 ÷ 1100 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0378 × 0308 ÷ 1100 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ 0378 ÷ 1160 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0378 × 0308 ÷ 1160 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ 0378 ÷ 11A8 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0378 × 0308 ÷ 11A8 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ 0378 ÷ AC00 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0378 × 0308 ÷ AC00 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ 0378 ÷ AC01 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0378 × 0308 ÷ AC01 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ 0378 ÷ 0378 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 0378 × 0308 ÷ 0378 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ 0378 ÷ D800 ÷ # ÷ [0.2] <reserved-0378> (Other) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] +÷ 0378 × 0308 ÷ D800 ÷ # ÷ [0.2] <reserved-0378> (Other) × [9.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] +÷ D800 ÷ 0020 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] SPACE (Other) ÷ [0.3] +÷ D800 ÷ 0308 ÷ 0020 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] SPACE (Other) ÷ [0.3] +÷ D800 ÷ 000D ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ D800 ÷ 0308 ÷ 000D ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <CARRIAGE RETURN (CR)> (CR) ÷ [0.3] +÷ D800 ÷ 000A ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ D800 ÷ 0308 ÷ 000A ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <LINE FEED (LF)> (LF) ÷ [0.3] +÷ D800 ÷ 0001 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] <START OF HEADING> (Control) ÷ [0.3] +÷ D800 ÷ 0308 ÷ 0001 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <START OF HEADING> (Control) ÷ [0.3] +÷ D800 ÷ 0300 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] +÷ D800 ÷ 0308 × 0300 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.0] COMBINING GRAVE ACCENT (Extend) ÷ [0.3] +÷ D800 ÷ 0903 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ D800 ÷ 0308 × 0903 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) × [9.1] DEVANAGARI SIGN VISARGA (SpacingMark) ÷ [0.3] +÷ D800 ÷ 1100 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ D800 ÷ 0308 ÷ 1100 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL CHOSEONG KIYEOK (L) ÷ [0.3] +÷ D800 ÷ 1160 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ D800 ÷ 0308 ÷ 1160 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JUNGSEONG FILLER (V) ÷ [0.3] +÷ D800 ÷ 11A8 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ D800 ÷ 0308 ÷ 11A8 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL JONGSEONG KIYEOK (T) ÷ [0.3] +÷ D800 ÷ AC00 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ D800 ÷ 0308 ÷ AC00 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GA (LV) ÷ [0.3] +÷ D800 ÷ AC01 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ D800 ÷ 0308 ÷ AC01 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] HANGUL SYLLABLE GAG (LVT) ÷ [0.3] +÷ D800 ÷ 0378 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] <reserved-0378> (Other) ÷ [0.3] +÷ D800 ÷ 0308 ÷ 0378 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [999.0] <reserved-0378> (Other) ÷ [0.3] +÷ D800 ÷ D800 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] <surrogate-D800> (Control) ÷ [0.3] +÷ D800 ÷ 0308 ÷ D800 ÷ # ÷ [0.2] <surrogate-D800> (Control) ÷ [4.0] COMBINING DIAERESIS (Extend) ÷ [5.0] <surrogate-D800> (Control) ÷ [0.3] +# Lines: 338 diff --git a/lib/unicore/auxiliary/GraphemeBreakProperty.txt b/lib/unicore/auxiliary/GraphemeBreakProperty.txt index 0432515a00..d3f480da59 100644 --- a/lib/unicore/auxiliary/GraphemeBreakProperty.txt +++ b/lib/unicore/auxiliary/GraphemeBreakProperty.txt @@ -1,8 +1,8 @@ -# GraphemeBreakProperty-6.0.0.txt -# Date: 2010-09-01, 18:48:17 GMT [MD] +# GraphemeBreakProperty-6.1.0.txt +# Date: 2011-12-05, 16:44:15 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -34,25 +34,31 @@ 000E..001F ; Control # Cc [18] <control-000E>..<control-001F> 007F..009F ; Control # Cc [33] <control-007F>..<control-009F> 00AD ; Control # Cf SOFT HYPHEN -0600..0603 ; Control # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA +0600..0604 ; Control # Cf [5] ARABIC NUMBER SIGN..ARABIC SIGN SAMVAT 06DD ; Control # Cf ARABIC END OF AYAH 070F ; Control # Cf SYRIAC ABBREVIATION MARK -17B4..17B5 ; Control # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 200B ; Control # Cf ZERO WIDTH SPACE 200E..200F ; Control # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK 2028 ; Control # Zl LINE SEPARATOR 2029 ; Control # Zp PARAGRAPH SEPARATOR 202A..202E ; Control # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE 2060..2064 ; Control # Cf [5] WORD JOINER..INVISIBLE PLUS +2065..2069 ; Control # Cn [5] <reserved-2065>..<reserved-2069> 206A..206F ; Control # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES +D800..DFFF ; Control # Cs [2048] <surrogate-D800>..<surrogate-DFFF> FEFF ; Control # Cf ZERO WIDTH NO-BREAK SPACE +FFF0..FFF8 ; Control # Cn [9] <reserved-FFF0>..<reserved-FFF8> FFF9..FFFB ; Control # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR 110BD ; Control # Cf KAITHI NUMBER SIGN 1D173..1D17A ; Control # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE +E0000 ; Control # Cn <reserved-E0000> E0001 ; Control # Cf LANGUAGE TAG +E0002..E001F ; Control # Cn [30] <reserved-E0002>..<reserved-E001F> E0020..E007F ; Control # Cf [96] TAG SPACE..CANCEL TAG +E0080..E00FF ; Control # Cn [128] <reserved-E0080>..<reserved-E00FF> +E01F0..E0FFF ; Control # Cn [3600] <reserved-E01F0>..<reserved-E0FFF> -# Total code points: 203 +# Total code points: 6023 # ================================================ @@ -80,6 +86,7 @@ E0020..E007F ; Control # Cf [96] TAG SPACE..CANCEL TAG 0825..0827 ; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +08E4..08FE ; Extend # Mn [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT 0900..0902 ; Extend # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 093A ; Extend # Mn DEVANAGARI VOWEL SIGN OE 093C ; Extend # Mn DEVANAGARI SIGN NUKTA @@ -177,6 +184,7 @@ E0020..E007F ; Control # Cf [96] TAG SPACE..CANCEL TAG 1732..1734 ; Extend # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD 1752..1753 ; Extend # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U 1772..1773 ; Extend # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +17B4..17B5 ; Extend # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 17B7..17BD ; Extend # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA 17C6 ; Extend # Mn KHMER SIGN NIKAHIT 17C9..17D3 ; Extend # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT @@ -204,6 +212,7 @@ E0020..E007F ; Control # Cf [96] TAG SPACE..CANCEL TAG 1B80..1B81 ; Extend # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR 1BA2..1BA5 ; Extend # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU 1BA8..1BA9 ; Extend # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAB ; Extend # Mn SUNDANESE SIGN VIRAMA 1BE6 ; Extend # Mn BATAK SIGN TOMPI 1BE8..1BE9 ; Extend # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE 1BED ; Extend # Mn BATAK VOWEL SIGN KARO O @@ -214,6 +223,7 @@ E0020..E007F ; Control # Cf [96] TAG SPACE..CANCEL TAG 1CD4..1CE0 ; Extend # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA 1CE2..1CE8 ; Extend # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL 1CED ; Extend # Mn VEDIC SIGN TIRYAK +1CF4 ; Extend # Mn VEDIC TONE CANDRA ABOVE 1DC0..1DE6 ; Extend # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z 1DFC..1DFF ; Extend # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW 200C..200D ; Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER @@ -225,11 +235,13 @@ E0020..E007F ; Control # Cf [96] TAG SPACE..CANCEL TAG 2CEF..2CF1 ; Extend # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS 2D7F ; Extend # Mn TIFINAGH CONSONANT JOINER 2DE0..2DFF ; Extend # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS -302A..302F ; Extend # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK +302A..302D ; Extend # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; Extend # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK 3099..309A ; Extend # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK A66F ; Extend # Mn COMBINING CYRILLIC VZMET A670..A672 ; Extend # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN -A67C..A67D ; Extend # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A674..A67D ; Extend # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A69F ; Extend # Mn COMBINING CYRILLIC LETTER IOTIFIED E A6F0..A6F1 ; Extend # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS A802 ; Extend # Mn SYLOTI NAGRI SIGN DVISVARA A806 ; Extend # Mn SYLOTI NAGRI SIGN HASANTA @@ -253,6 +265,8 @@ AAB2..AAB4 ; Extend # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U AAB7..AAB8 ; Extend # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA AABE..AABF ; Extend # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK AAC1 ; Extend # Mn TAI VIET TONE MAI THO +AAEC..AAED ; Extend # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAF6 ; Extend # Mn MEETEI MAYEK VIRAMA ABE5 ; Extend # Mn MEETEI MAYEK VOWEL SIGN ANAP ABE8 ; Extend # Mn MEETEI MAYEK VOWEL SIGN UNAP ABED ; Extend # Mn MEETEI MAYEK APUN IYEK @@ -271,6 +285,16 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 11080..11081 ; Extend # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA 110B3..110B6 ; Extend # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B9..110BA ; Extend # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +11100..11102 ; Extend # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; Extend # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112D..11134 ; Extend # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11180..11181 ; Extend # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +111B6..111BE ; Extend # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +116AB ; Extend # Mn TAKRI SIGN ANUSVARA +116AD ; Extend # Mn TAKRI VOWEL SIGN AA +116B0..116B5 ; Extend # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B7 ; Extend # Mn TAKRI SIGN NUKTA +16F8F..16F92 ; Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 1D165 ; Extend # Mc MUSICAL SYMBOL COMBINING STEM 1D167..1D169 ; Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 1D16E..1D172 ; Extend # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5 @@ -280,17 +304,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 1D242..1D244 ; Extend # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1234 - -# ================================================ - -0E40..0E44 ; Prepend # Lo [5] THAI CHARACTER SARA E..THAI CHARACTER SARA AI MAIMALAI -0EC0..0EC4 ; Prepend # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI -AAB5..AAB6 ; Prepend # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O -AAB9 ; Prepend # Lo TAI VIET VOWEL UEA -AABB..AABC ; Prepend # Lo [2] TAI VIET VOWEL AUE..TAI VIET VOWEL AY - -# Total code points: 15 +# Total code points: 1317 # ================================================ @@ -333,24 +347,14 @@ AABB..AABC ; Prepend # Lo [2] TAI VIET VOWEL AUE..TAI VIET VOWEL AY 0DD0..0DD1 ; SpacingMark # Mc [2] SINHALA VOWEL SIGN KETTI AEDA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA 0DD8..0DDE ; SpacingMark # Mc [7] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN KOMBUVA HAA GAYANUKITTA 0DF2..0DF3 ; SpacingMark # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA -0E30 ; SpacingMark # Lo THAI CHARACTER SARA A -0E32..0E33 ; SpacingMark # Lo [2] THAI CHARACTER SARA AA..THAI CHARACTER SARA AM -0E45 ; SpacingMark # Lo THAI CHARACTER LAKKHANGYAO -0EB0 ; SpacingMark # Lo LAO VOWEL SIGN A -0EB2..0EB3 ; SpacingMark # Lo [2] LAO VOWEL SIGN AA..LAO VOWEL SIGN AM +0E33 ; SpacingMark # Lo THAI CHARACTER SARA AM +0EB3 ; SpacingMark # Lo LAO VOWEL SIGN AM 0F3E..0F3F ; SpacingMark # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES 0F7F ; SpacingMark # Mc TIBETAN SIGN RNAM BCAD -102B..102C ; SpacingMark # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA 1031 ; SpacingMark # Mc MYANMAR VOWEL SIGN E -1038 ; SpacingMark # Mc MYANMAR SIGN VISARGA 103B..103C ; SpacingMark # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA 1056..1057 ; SpacingMark # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR -1062..1064 ; SpacingMark # Mc [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO -1067..106D ; SpacingMark # Mc [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5 -1083..1084 ; SpacingMark # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E -1087..108C ; SpacingMark # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3 -108F ; SpacingMark # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5 -109A..109C ; SpacingMark # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A +1084 ; SpacingMark # Mc MYANMAR VOWEL SIGN SHAN E 17B6 ; SpacingMark # Mc KHMER VOWEL SIGN AA 17BE..17C5 ; SpacingMark # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU 17C7..17C8 ; SpacingMark # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU @@ -358,13 +362,11 @@ AABB..AABC ; Prepend # Lo [2] TAI VIET VOWEL AUE..TAI VIET VOWEL AY 1929..192B ; SpacingMark # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA 1930..1931 ; SpacingMark # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA 1933..1938 ; SpacingMark # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA -19B0..19C0 ; SpacingMark # Mc [17] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE VOWEL SIGN IY -19C8..19C9 ; SpacingMark # Mc [2] NEW TAI LUE TONE MARK-1..NEW TAI LUE TONE MARK-2 +19B5..19B7 ; SpacingMark # Mc [3] NEW TAI LUE VOWEL SIGN E..NEW TAI LUE VOWEL SIGN O +19BA ; SpacingMark # Mc NEW TAI LUE VOWEL SIGN AY 1A19..1A1B ; SpacingMark # Mc [3] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN AE 1A55 ; SpacingMark # Mc TAI THAM CONSONANT SIGN MEDIAL RA 1A57 ; SpacingMark # Mc TAI THAM CONSONANT SIGN LA TANG LAI -1A61 ; SpacingMark # Mc TAI THAM VOWEL SIGN A -1A63..1A64 ; SpacingMark # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA 1A6D..1A72 ; SpacingMark # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI 1B04 ; SpacingMark # Mc BALINESE SIGN BISAH 1B35 ; SpacingMark # Mc BALINESE VOWEL SIGN TEDUNG @@ -375,6 +377,7 @@ AABB..AABC ; Prepend # Lo [2] TAI VIET VOWEL AUE..TAI VIET VOWEL AY 1BA1 ; SpacingMark # Mc SUNDANESE CONSONANT SIGN PAMINGKAL 1BA6..1BA7 ; SpacingMark # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG 1BAA ; SpacingMark # Mc SUNDANESE SIGN PAMAAEH +1BAC..1BAD ; SpacingMark # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA 1BE7 ; SpacingMark # Mc BATAK VOWEL SIGN E 1BEA..1BEC ; SpacingMark # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O 1BEE ; SpacingMark # Mc BATAK VOWEL SIGN U @@ -382,7 +385,7 @@ AABB..AABC ; Prepend # Lo [2] TAI VIET VOWEL AUE..TAI VIET VOWEL AY 1C24..1C2B ; SpacingMark # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU 1C34..1C35 ; SpacingMark # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG 1CE1 ; SpacingMark # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA -1CF2 ; SpacingMark # Mc VEDIC SIGN ARDHAVISARGA +1CF2..1CF3 ; SpacingMark # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA A823..A824 ; SpacingMark # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I A827 ; SpacingMark # Mc SYLOTI NAGRI VOWEL SIGN OO A880..A881 ; SpacingMark # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA @@ -395,7 +398,9 @@ A9BD..A9C0 ; SpacingMark # Mc [4] JAVANESE CONSONANT SIGN KERET..JAVANESE P AA2F..AA30 ; SpacingMark # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI AA33..AA34 ; SpacingMark # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA AA4D ; SpacingMark # Mc CHAM CONSONANT SIGN FINAL H -AA7B ; SpacingMark # Mc MYANMAR SIGN PAO KAREN TONE +AAEB ; SpacingMark # Mc MEETEI MAYEK VOWEL SIGN II +AAEE..AAEF ; SpacingMark # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF5 ; SpacingMark # Mc MEETEI MAYEK VOWEL SIGN VISARGA ABE3..ABE4 ; SpacingMark # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP ABE6..ABE7 ; SpacingMark # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP ABE9..ABEA ; SpacingMark # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG @@ -405,10 +410,18 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK 11082 ; SpacingMark # Mc KAITHI SIGN VISARGA 110B0..110B2 ; SpacingMark # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II 110B7..110B8 ; SpacingMark # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +1112C ; SpacingMark # Mc CHAKMA VOWEL SIGN E +11182 ; SpacingMark # Mc SHARADA SIGN VISARGA +111B3..111B5 ; SpacingMark # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111BF..111C0 ; SpacingMark # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +116AC ; SpacingMark # Mc TAKRI SIGN VISARGA +116AE..116AF ; SpacingMark # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B6 ; SpacingMark # Mc TAKRI SIGN VIRAMA +16F51..16F7E ; SpacingMark # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG 1D166 ; SpacingMark # Mc MUSICAL SYMBOL COMBINING SPRECHGESANG STEM 1D16D ; SpacingMark # Mc MUSICAL SYMBOL COMBINING AUGMENTATION DOT -# Total code points: 275 +# Total code points: 291 # ================================================ diff --git a/lib/unicore/auxiliary/SentenceBreakProperty.txt b/lib/unicore/auxiliary/SentenceBreakProperty.txt index 87cf2a6762..a5eb0b71c0 100644 --- a/lib/unicore/auxiliary/SentenceBreakProperty.txt +++ b/lib/unicore/auxiliary/SentenceBreakProperty.txt @@ -1,8 +1,8 @@ -# SentenceBreakProperty-6.0.0.txt -# Date: 2010-08-19, 00:48:47 GMT [MD] +# SentenceBreakProperty-6.1.0.txt +# Date: 2011-11-27, 05:10:50 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -53,6 +53,7 @@ 0825..0827 ; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +08E4..08FE ; Extend # Mn [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT 0900..0902 ; Extend # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 0903 ; Extend # Mc DEVANAGARI SIGN VISARGA 093A ; Extend # Mn DEVANAGARI VOWEL SIGN OE @@ -195,6 +196,7 @@ 1732..1734 ; Extend # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD 1752..1753 ; Extend # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U 1772..1773 ; Extend # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +17B4..17B5 ; Extend # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 17B6 ; Extend # Mc KHMER VOWEL SIGN AA 17B7..17BD ; Extend # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA 17BE..17C5 ; Extend # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU @@ -246,6 +248,8 @@ 1BA6..1BA7 ; Extend # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG 1BA8..1BA9 ; Extend # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG 1BAA ; Extend # Mc SUNDANESE SIGN PAMAAEH +1BAB ; Extend # Mn SUNDANESE SIGN VIRAMA +1BAC..1BAD ; Extend # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA 1BE6 ; Extend # Mn BATAK SIGN TOMPI 1BE7 ; Extend # Mc BATAK VOWEL SIGN E 1BE8..1BE9 ; Extend # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE @@ -263,7 +267,8 @@ 1CE1 ; Extend # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA 1CE2..1CE8 ; Extend # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL 1CED ; Extend # Mn VEDIC SIGN TIRYAK -1CF2 ; Extend # Mc VEDIC SIGN ARDHAVISARGA +1CF2..1CF3 ; Extend # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF4 ; Extend # Mn VEDIC TONE CANDRA ABOVE 1DC0..1DE6 ; Extend # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z 1DFC..1DFF ; Extend # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW 200C..200D ; Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER @@ -275,11 +280,13 @@ 2CEF..2CF1 ; Extend # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS 2D7F ; Extend # Mn TIFINAGH CONSONANT JOINER 2DE0..2DFF ; Extend # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS -302A..302F ; Extend # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK +302A..302D ; Extend # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; Extend # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK 3099..309A ; Extend # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK A66F ; Extend # Mn COMBINING CYRILLIC VZMET A670..A672 ; Extend # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN -A67C..A67D ; Extend # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A674..A67D ; Extend # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A69F ; Extend # Mn COMBINING CYRILLIC LETTER IOTIFIED E A6F0..A6F1 ; Extend # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS A802 ; Extend # Mn SYLOTI NAGRI SIGN DVISVARA A806 ; Extend # Mn SYLOTI NAGRI SIGN HASANTA @@ -316,6 +323,11 @@ AAB2..AAB4 ; Extend # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U AAB7..AAB8 ; Extend # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA AABE..AABF ; Extend # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK AAC1 ; Extend # Mn TAI VIET TONE MAI THO +AAEB ; Extend # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; Extend # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; Extend # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF5 ; Extend # Mc MEETEI MAYEK VOWEL SIGN VISARGA +AAF6 ; Extend # Mn MEETEI MAYEK VIRAMA ABE3..ABE4 ; Extend # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP ABE5 ; Extend # Mn MEETEI MAYEK VOWEL SIGN ANAP ABE6..ABE7 ; Extend # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP @@ -343,6 +355,24 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 110B3..110B6 ; Extend # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B7..110B8 ; Extend # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU 110B9..110BA ; Extend # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +11100..11102 ; Extend # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; Extend # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; Extend # Mc CHAKMA VOWEL SIGN E +1112D..11134 ; Extend # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11180..11181 ; Extend # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; Extend # Mc SHARADA SIGN VISARGA +111B3..111B5 ; Extend # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; Extend # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF..111C0 ; Extend # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +116AB ; Extend # Mn TAKRI SIGN ANUSVARA +116AC ; Extend # Mc TAKRI SIGN VISARGA +116AD ; Extend # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; Extend # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; Extend # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B6 ; Extend # Mc TAKRI SIGN VIRAMA +116B7 ; Extend # Mn TAKRI SIGN NUKTA +16F51..16F7E ; Extend # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG +16F8F..16F92 ; Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 1D165..1D166 ; Extend # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM 1D167..1D169 ; Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 1D16D..1D172 ; Extend # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 @@ -352,7 +382,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 1D242..1D244 ; Extend # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1502 +# Total code points: 1649 # ================================================ @@ -365,10 +395,9 @@ E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 # ================================================ 00AD ; Format # Cf SOFT HYPHEN -0600..0603 ; Format # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA +0600..0604 ; Format # Cf [5] ARABIC NUMBER SIGN..ARABIC SIGN SAMVAT 06DD ; Format # Cf ARABIC END OF AYAH 070F ; Format # Cf SYRIAC ABBREVIATION MARK -17B4..17B5 ; Format # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 200B ; Format # Cf ZERO WIDTH SPACE 200E..200F ; Format # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK 202A..202E ; Format # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE @@ -381,7 +410,7 @@ FFF9..FFFB ; Format # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANN E0001 ; Format # Cf LANGUAGE TAG E0020..E007F ; Format # Cf [96] TAG SPACE..CANCEL TAG -# Total code points: 138 +# Total code points: 137 # ================================================ @@ -401,9 +430,9 @@ E0020..E007F ; Format # Cf [96] TAG SPACE..CANCEL TAG # ================================================ 0061..007A ; Lower # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z -00AA ; Lower # L& FEMININE ORDINAL INDICATOR +00AA ; Lower # Lo FEMININE ORDINAL INDICATOR 00B5 ; Lower # L& MICRO SIGN -00BA ; Lower # L& MASCULINE ORDINAL INDICATOR +00BA ; Lower # Lo MASCULINE ORDINAL INDICATOR 00DF..00F6 ; Lower # L& [24] LATIN SMALL LETTER SHARP S..LATIN SMALL LETTER O WITH DIAERESIS 00F8..00FF ; Lower # L& [8] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER Y WITH DIAERESIS 0101 ; Lower # L& LATIN SMALL LETTER A WITH MACRON @@ -673,8 +702,8 @@ E0020..E007F ; Format # Cf [96] TAG SPACE..CANCEL TAG 0527 ; Lower # L& CYRILLIC SMALL LETTER SHHA WITH DESCENDER 0561..0587 ; Lower # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN 1D00..1D2B ; Lower # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D61 ; Lower # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D77 ; Lower # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D2C..1D6A ; Lower # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; Lower # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; Lower # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; Lower # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; Lower # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA @@ -822,7 +851,9 @@ E0020..E007F ; Format # Cf [96] TAG SPACE..CANCEL TAG 1FE0..1FE7 ; Lower # L& [8] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI 1FF2..1FF4 ; Lower # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI 1FF6..1FF7 ; Lower # L& [2] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI -2090..2094 ; Lower # Lm [5] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA +2071 ; Lower # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; Lower # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; Lower # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T 210A ; Lower # L& SCRIPT SMALL G 210E..210F ; Lower # L& [2] PLANCK CONSTANT..PLANCK CONSTANT OVER TWO PI 2113 ; Lower # L& SCRIPT SMALL L @@ -843,8 +874,8 @@ E0020..E007F ; Format # Cf [96] TAG SPACE..CANCEL TAG 2C6C ; Lower # L& LATIN SMALL LETTER Z WITH DESCENDER 2C71 ; Lower # L& LATIN SMALL LETTER V WITH RIGHT HOOK 2C73..2C74 ; Lower # L& [2] LATIN SMALL LETTER W WITH HOOK..LATIN SMALL LETTER V WITH CURL -2C76..2C7C ; Lower # L& [7] LATIN SMALL LETTER HALF H..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; Lower # Lm MODIFIER LETTER CAPITAL V +2C76..2C7B ; Lower # L& [6] LATIN SMALL LETTER HALF H..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; Lower # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C81 ; Lower # L& COPTIC SMALL LETTER ALFA 2C83 ; Lower # L& COPTIC SMALL LETTER VIDA 2C85 ; Lower # L& COPTIC SMALL LETTER GAMMA @@ -897,7 +928,10 @@ E0020..E007F ; Format # Cf [96] TAG SPACE..CANCEL TAG 2CE3..2CE4 ; Lower # L& [2] COPTIC SMALL LETTER OLD NUBIAN WAU..COPTIC SYMBOL KAI 2CEC ; Lower # L& COPTIC SMALL LETTER CRYPTOGRAMMIC SHEI 2CEE ; Lower # L& COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF3 ; Lower # L& COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; Lower # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Lower # L& GEORGIAN SMALL LETTER YN +2D2D ; Lower # L& GEORGIAN SMALL LETTER AEN A641 ; Lower # L& CYRILLIC SMALL LETTER ZEMLYA A643 ; Lower # L& CYRILLIC SMALL LETTER DZELO A645 ; Lower # L& CYRILLIC SMALL LETTER REVERSED DZE @@ -983,11 +1017,13 @@ A787 ; Lower # L& LATIN SMALL LETTER INSULAR T A78C ; Lower # L& LATIN SMALL LETTER SALTILLO A78E ; Lower # L& LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A791 ; Lower # L& LATIN SMALL LETTER N WITH DESCENDER +A793 ; Lower # L& LATIN SMALL LETTER C WITH BAR A7A1 ; Lower # L& LATIN SMALL LETTER G WITH OBLIQUE STROKE A7A3 ; Lower # L& LATIN SMALL LETTER K WITH OBLIQUE STROKE A7A5 ; Lower # L& LATIN SMALL LETTER N WITH OBLIQUE STROKE A7A7 ; Lower # L& LATIN SMALL LETTER R WITH OBLIQUE STROKE A7A9 ; Lower # L& LATIN SMALL LETTER S WITH OBLIQUE STROKE +A7F8..A7F9 ; Lower # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; Lower # L& LATIN LETTER SMALL CAPITAL TURNED M FB00..FB06 ; Lower # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; Lower # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -1022,7 +1058,7 @@ FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1D7C4..1D7C9 ; Lower # L& [6] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC PI SYMBOL 1D7CB ; Lower # L& MATHEMATICAL BOLD SMALL DIGAMMA -# Total code points: 1917 +# Total code points: 1933 # ================================================ @@ -1294,6 +1330,8 @@ FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 0526 ; Upper # L& CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER 0531..0556 ; Upper # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH 10A0..10C5 ; Upper # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Upper # L& GEORGIAN CAPITAL LETTER YN +10CD ; Upper # L& GEORGIAN CAPITAL LETTER AEN 1E00 ; Upper # L& LATIN CAPITAL LETTER A WITH RING BELOW 1E02 ; Upper # L& LATIN CAPITAL LETTER B WITH DOT ABOVE 1E04 ; Upper # L& LATIN CAPITAL LETTER B WITH DOT BELOW @@ -1513,6 +1551,7 @@ FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 2CE2 ; Upper # L& COPTIC CAPITAL LETTER OLD NUBIAN WAU 2CEB ; Upper # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI 2CED ; Upper # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA +2CF2 ; Upper # L& COPTIC CAPITAL LETTER BOHAIRIC KHEI A640 ; Upper # L& CYRILLIC CAPITAL LETTER ZEMLYA A642 ; Upper # L& CYRILLIC CAPITAL LETTER DZELO A644 ; Upper # L& CYRILLIC CAPITAL LETTER REVERSED DZE @@ -1596,11 +1635,13 @@ A786 ; Upper # L& LATIN CAPITAL LETTER INSULAR T A78B ; Upper # L& LATIN CAPITAL LETTER SALTILLO A78D ; Upper # L& LATIN CAPITAL LETTER TURNED H A790 ; Upper # L& LATIN CAPITAL LETTER N WITH DESCENDER +A792 ; Upper # L& LATIN CAPITAL LETTER C WITH BAR A7A0 ; Upper # L& LATIN CAPITAL LETTER G WITH OBLIQUE STROKE A7A2 ; Upper # L& LATIN CAPITAL LETTER K WITH OBLIQUE STROKE A7A4 ; Upper # L& LATIN CAPITAL LETTER N WITH OBLIQUE STROKE A7A6 ; Upper # L& LATIN CAPITAL LETTER R WITH OBLIQUE STROKE A7A8 ; Upper # L& LATIN CAPITAL LETTER S WITH OBLIQUE STROKE +A7AA ; Upper # L& LATIN CAPITAL LETTER H WITH HOOK FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z 10400..10427 ; Upper # L& [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW 1D400..1D419 ; Upper # L& [26] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL BOLD CAPITAL Z @@ -1635,7 +1676,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 1D790..1D7A8 ; Upper # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA 1D7CA ; Upper # L& MATHEMATICAL BOLD CAPITAL DIGAMMA -# Total code points: 1509 +# Total code points: 1514 # ================================================ @@ -1673,6 +1714,8 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 0824 ; OLetter # Lm SAMARITAN MODIFIER LETTER SHORT A 0828 ; OLetter # Lm SAMARITAN MODIFIER LETTER I 0840..0858 ; OLetter # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +08A0 ; OLetter # Lo ARABIC LETTER BEH WITH SMALL V BELOW +08A2..08AC ; OLetter # Lo [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH 0904..0939 ; OLetter # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA 093D ; OLetter # Lo DEVANAGARI SIGN AVAGRAHA 0950 ; OLetter # Lo DEVANAGARI OM @@ -1780,7 +1823,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 0EBD ; OLetter # Lo LAO SEMIVOWEL SIGN NYO 0EC0..0EC4 ; OLetter # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI 0EC6 ; OLetter # Lm LAO KO LA -0EDC..0EDD ; OLetter # Lo [2] LAO HO NO..LAO HO MO +0EDC..0EDF ; OLetter # Lo [4] LAO HO NO..LAO LETTER KHMU NYO 0F00 ; OLetter # Lo TIBETAN SYLLABLE OM 0F40..0F47 ; OLetter # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA 0F49..0F6C ; OLetter # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA @@ -1796,7 +1839,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 108E ; OLetter # Lo MYANMAR LETTER RUMAI PALAUNG FA 10D0..10FA ; OLetter # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN 10FC ; OLetter # Lm MODIFIER LETTER GEORGIAN NAR -1100..1248 ; OLetter # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +10FD..1248 ; OLetter # Lo [332] GEORGIAN LETTER AEN..ETHIOPIC SYLLABLE QWA 124A..124D ; OLetter # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE 1250..1256 ; OLetter # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO 1258 ; OLetter # Lo ETHIOPIC SYLLABLE QHWA @@ -1846,20 +1889,18 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 1B45..1B4B ; OLetter # Lo [7] BALINESE LETTER KAF SASAK..BALINESE LETTER ASYURA SASAK 1B83..1BA0 ; OLetter # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA 1BAE..1BAF ; OLetter # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA -1BC0..1BE5 ; OLetter # Lo [38] BATAK LETTER A..BATAK LETTER U +1BBA..1BE5 ; OLetter # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U 1C00..1C23 ; OLetter # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A 1C4D..1C4F ; OLetter # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA 1C5A..1C77 ; OLetter # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; OLetter # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD 1CE9..1CEC ; OLetter # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL 1CEE..1CF1 ; OLetter # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA -2071 ; OLetter # Lm SUPERSCRIPT LATIN SMALL LETTER I -207F ; OLetter # Lm SUPERSCRIPT LATIN SMALL LETTER N -2095..209C ; OLetter # Lm [8] LATIN SUBSCRIPT SMALL LETTER H..LATIN SUBSCRIPT SMALL LETTER T +1CF5..1CF6 ; OLetter # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 2135..2138 ; OLetter # Lo [4] ALEF SYMBOL..DALET SYMBOL 2180..2182 ; OLetter # Nl [3] ROMAN NUMERAL ONE THOUSAND C D..ROMAN NUMERAL TEN THOUSAND 2185..2188 ; OLetter # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND -2D30..2D65 ; OLetter # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D30..2D67 ; OLetter # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO 2D6F ; OLetter # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D80..2D96 ; OLetter # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE 2DA0..2DA6 ; OLetter # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO @@ -1890,7 +1931,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 31A0..31BA ; OLetter # Lo [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY 31F0..31FF ; OLetter # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3400..4DB5 ; OLetter # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 -4E00..9FCB ; OLetter # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB +4E00..9FCC ; OLetter # Lo [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC A000..A014 ; OLetter # Lo [21] YI SYLLABLE IT..YI SYLLABLE E A015 ; OLetter # Lm YI SYLLABLE WU A016..A48C ; OLetter # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR @@ -1934,6 +1975,9 @@ AAC0 ; OLetter # Lo TAI VIET TONE MAI NUENG AAC2 ; OLetter # Lo TAI VIET TONE MAI SONG AADB..AADC ; OLetter # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG AADD ; OLetter # Lm TAI VIET SYMBOL SAM +AAE0..AAEA ; OLetter # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAF2 ; OLetter # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; OLetter # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK AB01..AB06 ; OLetter # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO AB09..AB0E ; OLetter # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO AB11..AB16 ; OLetter # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO @@ -1943,8 +1987,7 @@ ABC0..ABE2 ; OLetter # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER AC00..D7A3 ; OLetter # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH D7B0..D7C6 ; OLetter # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E D7CB..D7FB ; OLetter # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH -F900..FA2D ; OLetter # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; OLetter # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +F900..FA6D ; OLetter # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; OLetter # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB1D ; OLetter # Lo HEBREW LETTER YOD WITH HIRIQ FB1F..FB28 ; OLetter # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV @@ -1996,6 +2039,8 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1083F..10855 ; OLetter # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW 10900..10915 ; OLetter # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; OLetter # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10980..109B7 ; OLetter # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; OLetter # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; OLetter # Lo KHAROSHTHI LETTER A 10A10..10A13 ; OLetter # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA 10A15..10A17 ; OLetter # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA @@ -2007,17 +2052,58 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10C00..10C48 ; OLetter # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH 11003..11037 ; OLetter # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA 11083..110AF ; OLetter # Lo [45] KAITHI LETTER A..KAITHI LETTER HA +110D0..110E8 ; OLetter # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +11103..11126 ; OLetter # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11183..111B2 ; OLetter # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111C1..111C4 ; OLetter # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +11680..116AA ; OLetter # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA 12000..1236E ; OLetter # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM 12400..12462 ; OLetter # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER 13000..1342E ; OLetter # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 16800..16A38 ; OLetter # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16F00..16F44 ; OLetter # Lo [69] MIAO LETTER PA..MIAO LETTER HHA +16F50 ; OLetter # Lo MIAO LETTER NASALIZATION +16F93..16F9F ; OLetter # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 1B000..1B001 ; OLetter # Lo [2] KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE +1EE00..1EE03 ; OLetter # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; OLetter # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; OLetter # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; OLetter # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; OLetter # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; OLetter # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; OLetter # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; OLetter # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; OLetter # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; OLetter # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; OLetter # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; OLetter # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; OLetter # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; OLetter # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; OLetter # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; OLetter # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; OLetter # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; OLetter # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; OLetter # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; OLetter # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; OLetter # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; OLetter # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; OLetter # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; OLetter # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; OLetter # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; OLetter # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; OLetter # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; OLetter # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; OLetter # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; OLetter # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; OLetter # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; OLetter # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; OLetter # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 20000..2A6D6 ; OLetter # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 2A700..2B734 ; OLetter # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 2B740..2B81D ; OLetter # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2F800..2FA1D ; OLetter # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 97369 +# Total code points: 97841 # ================================================ @@ -2058,9 +2144,13 @@ AA50..AA59 ; Numeric # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE ABF0..ABF9 ; Numeric # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 104A0..104A9 ; Numeric # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE 11066..1106F ; Numeric # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE +110F0..110F9 ; Numeric # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11136..1113F ; Numeric # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +111D0..111D9 ; Numeric # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +116C0..116C9 ; Numeric # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 1D7CE..1D7FF ; Numeric # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE -# Total code points: 412 +# Total code points: 452 # ================================================ @@ -2109,6 +2199,7 @@ A8CE..A8CF ; STerm # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA A92F ; STerm # Po KAYAH LI SIGN SHYA A9C8..A9C9 ; STerm # Po [2] JAVANESE PADA LINGSA..JAVANESE PADA LUNGSI AA5D..AA5F ; STerm # Po [3] CHAM PUNCTUATION DANDA..CHAM PUNCTUATION TRIPLE DANDA +AAF0..AAF1 ; STerm # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM ABEB ; STerm # Po MEETEI MAYEK CHEIKHEI FE56..FE57 ; STerm # Po [2] SMALL QUESTION MARK..SMALL EXCLAMATION MARK FF01 ; STerm # Po FULLWIDTH EXCLAMATION MARK @@ -2117,8 +2208,10 @@ FF61 ; STerm # Po HALFWIDTH IDEOGRAPHIC FULL STOP 10A56..10A57 ; STerm # Po [2] KHAROSHTHI PUNCTUATION DANDA..KHAROSHTHI PUNCTUATION DOUBLE DANDA 11047..11048 ; STerm # Po [2] BRAHMI DANDA..BRAHMI DOUBLE DANDA 110BE..110C1 ; STerm # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +11141..11143 ; STerm # Po [3] CHAKMA DANDA..CHAKMA QUESTION MARK +111C5..111C6 ; STerm # Po [2] SHARADA DANDA..SHARADA DOUBLE DANDA -# Total code points: 73 +# Total code points: 80 # ================================================ diff --git a/lib/unicore/auxiliary/WordBreakProperty.txt b/lib/unicore/auxiliary/WordBreakProperty.txt index 4a3b6e4ab3..7f3225c6a8 100644 --- a/lib/unicore/auxiliary/WordBreakProperty.txt +++ b/lib/unicore/auxiliary/WordBreakProperty.txt @@ -1,8 +1,8 @@ -# WordBreakProperty-6.0.0.txt -# Date: 2010-08-19, 00:48:48 GMT [MD] +# WordBreakProperty-6.1.0.txt +# Date: 2011-11-27, 05:10:51 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -62,6 +62,7 @@ 0825..0827 ; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +08E4..08FE ; Extend # Mn [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT 0900..0902 ; Extend # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 0903 ; Extend # Mc DEVANAGARI SIGN VISARGA 093A ; Extend # Mn DEVANAGARI VOWEL SIGN OE @@ -204,6 +205,7 @@ 1732..1734 ; Extend # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD 1752..1753 ; Extend # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U 1772..1773 ; Extend # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +17B4..17B5 ; Extend # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 17B6 ; Extend # Mc KHMER VOWEL SIGN AA 17B7..17BD ; Extend # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA 17BE..17C5 ; Extend # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU @@ -255,6 +257,8 @@ 1BA6..1BA7 ; Extend # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG 1BA8..1BA9 ; Extend # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG 1BAA ; Extend # Mc SUNDANESE SIGN PAMAAEH +1BAB ; Extend # Mn SUNDANESE SIGN VIRAMA +1BAC..1BAD ; Extend # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA 1BE6 ; Extend # Mn BATAK SIGN TOMPI 1BE7 ; Extend # Mc BATAK VOWEL SIGN E 1BE8..1BE9 ; Extend # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE @@ -272,7 +276,8 @@ 1CE1 ; Extend # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA 1CE2..1CE8 ; Extend # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL 1CED ; Extend # Mn VEDIC SIGN TIRYAK -1CF2 ; Extend # Mc VEDIC SIGN ARDHAVISARGA +1CF2..1CF3 ; Extend # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF4 ; Extend # Mn VEDIC TONE CANDRA ABOVE 1DC0..1DE6 ; Extend # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z 1DFC..1DFF ; Extend # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW 200C..200D ; Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER @@ -284,11 +289,13 @@ 2CEF..2CF1 ; Extend # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS 2D7F ; Extend # Mn TIFINAGH CONSONANT JOINER 2DE0..2DFF ; Extend # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS -302A..302F ; Extend # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK +302A..302D ; Extend # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; Extend # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK 3099..309A ; Extend # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK A66F ; Extend # Mn COMBINING CYRILLIC VZMET A670..A672 ; Extend # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN -A67C..A67D ; Extend # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A674..A67D ; Extend # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A69F ; Extend # Mn COMBINING CYRILLIC LETTER IOTIFIED E A6F0..A6F1 ; Extend # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS A802 ; Extend # Mn SYLOTI NAGRI SIGN DVISVARA A806 ; Extend # Mn SYLOTI NAGRI SIGN HASANTA @@ -325,6 +332,11 @@ AAB2..AAB4 ; Extend # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U AAB7..AAB8 ; Extend # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA AABE..AABF ; Extend # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK AAC1 ; Extend # Mn TAI VIET TONE MAI THO +AAEB ; Extend # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; Extend # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; Extend # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF5 ; Extend # Mc MEETEI MAYEK VOWEL SIGN VISARGA +AAF6 ; Extend # Mn MEETEI MAYEK VIRAMA ABE3..ABE4 ; Extend # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP ABE5 ; Extend # Mn MEETEI MAYEK VOWEL SIGN ANAP ABE6..ABE7 ; Extend # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP @@ -352,6 +364,24 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 110B3..110B6 ; Extend # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B7..110B8 ; Extend # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU 110B9..110BA ; Extend # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +11100..11102 ; Extend # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; Extend # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; Extend # Mc CHAKMA VOWEL SIGN E +1112D..11134 ; Extend # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11180..11181 ; Extend # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; Extend # Mc SHARADA SIGN VISARGA +111B3..111B5 ; Extend # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; Extend # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF..111C0 ; Extend # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +116AB ; Extend # Mn TAKRI SIGN ANUSVARA +116AC ; Extend # Mc TAKRI SIGN VISARGA +116AD ; Extend # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; Extend # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; Extend # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B6 ; Extend # Mc TAKRI SIGN VIRAMA +116B7 ; Extend # Mn TAKRI SIGN NUKTA +16F51..16F7E ; Extend # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG +16F8F..16F92 ; Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 1D165..1D166 ; Extend # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM 1D167..1D169 ; Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 1D16D..1D172 ; Extend # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 @@ -361,15 +391,14 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 1D242..1D244 ; Extend # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1502 +# Total code points: 1649 # ================================================ 00AD ; Format # Cf SOFT HYPHEN -0600..0603 ; Format # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA +0600..0604 ; Format # Cf [5] ARABIC NUMBER SIGN..ARABIC SIGN SAMVAT 06DD ; Format # Cf ARABIC END OF AYAH 070F ; Format # Cf SYRIAC ABBREVIATION MARK -17B4..17B5 ; Format # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 200E..200F ; Format # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK 202A..202E ; Format # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE 2060..2064 ; Format # Cf [5] WORD JOINER..INVISIBLE PLUS @@ -381,7 +410,7 @@ FFF9..FFFB ; Format # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANN E0001 ; Format # Cf LANGUAGE TAG E0020..E007F ; Format # Cf [96] TAG SPACE..CANCEL TAG -# Total code points: 137 +# Total code points: 136 # ================================================ @@ -405,9 +434,9 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 0041..005A ; ALetter # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z 0061..007A ; ALetter # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z -00AA ; ALetter # L& FEMININE ORDINAL INDICATOR +00AA ; ALetter # Lo FEMININE ORDINAL INDICATOR 00B5 ; ALetter # L& MICRO SIGN -00BA ; ALetter # L& MASCULINE ORDINAL INDICATOR +00BA ; ALetter # Lo MASCULINE ORDINAL INDICATOR 00C0..00D6 ; ALetter # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS 00D8..00F6 ; ALetter # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS 00F8..01BA ; ALetter # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL @@ -462,6 +491,8 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 0824 ; ALetter # Lm SAMARITAN MODIFIER LETTER SHORT A 0828 ; ALetter # Lm SAMARITAN MODIFIER LETTER I 0840..0858 ; ALetter # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +08A0 ; ALetter # Lo ARABIC LETTER BEH WITH SMALL V BELOW +08A2..08AC ; ALetter # Lo [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH 0904..0939 ; ALetter # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA 093D ; ALetter # Lo DEVANAGARI SIGN AVAGRAHA 0950 ; ALetter # Lo DEVANAGARI OM @@ -554,9 +585,11 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 0F49..0F6C ; ALetter # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA 0F88..0F8C ; ALetter # Lo [5] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN INVERTED MCHU CAN 10A0..10C5 ; ALetter # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; ALetter # L& GEORGIAN CAPITAL LETTER YN +10CD ; ALetter # L& GEORGIAN CAPITAL LETTER AEN 10D0..10FA ; ALetter # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN 10FC ; ALetter # Lm MODIFIER LETTER GEORGIAN NAR -1100..1248 ; ALetter # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +10FD..1248 ; ALetter # Lo [332] GEORGIAN LETTER AEN..ETHIOPIC SYLLABLE QWA 124A..124D ; ALetter # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE 1250..1256 ; ALetter # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO 1258 ; ALetter # Lo ETHIOPIC SYLLABLE QHWA @@ -597,16 +630,17 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 1B45..1B4B ; ALetter # Lo [7] BALINESE LETTER KAF SASAK..BALINESE LETTER ASYURA SASAK 1B83..1BA0 ; ALetter # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA 1BAE..1BAF ; ALetter # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA -1BC0..1BE5 ; ALetter # Lo [38] BATAK LETTER A..BATAK LETTER U +1BBA..1BE5 ; ALetter # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U 1C00..1C23 ; ALetter # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A 1C4D..1C4F ; ALetter # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA 1C5A..1C77 ; ALetter # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; ALetter # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD 1CE9..1CEC ; ALetter # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL 1CEE..1CF1 ; ALetter # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA +1CF5..1CF6 ; ALetter # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 1D00..1D2B ; ALetter # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D61 ; ALetter # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D77 ; ALetter # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D2C..1D6A ; ALetter # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; ALetter # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; ALetter # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; ALetter # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; ALetter # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA @@ -653,12 +687,15 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 24B6..24E9 ; ALetter # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z 2C00..2C2E ; ALetter # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 2C30..2C5E ; ALetter # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE -2C60..2C7C ; ALetter # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; ALetter # Lm MODIFIER LETTER CAPITAL V +2C60..2C7B ; ALetter # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; ALetter # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; ALetter # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI 2CEB..2CEE ; ALetter # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; ALetter # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; ALetter # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE -2D30..2D65 ; ALetter # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D27 ; ALetter # L& GEORGIAN SMALL LETTER YN +2D2D ; ALetter # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; ALetter # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO 2D6F ; ALetter # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D80..2D96 ; ALetter # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE 2DA0..2DA6 ; ALetter # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO @@ -697,8 +734,9 @@ A770 ; ALetter # Lm MODIFIER LETTER US A771..A787 ; ALetter # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A788 ; ALetter # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A78B..A78E ; ALetter # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A791 ; ALetter # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER -A7A0..A7A9 ; ALetter # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE +A790..A793 ; ALetter # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR +A7A0..A7AA ; ALetter # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK +A7F8..A7F9 ; ALetter # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; ALetter # L& LATIN LETTER SMALL CAPITAL TURNED M A7FB..A801 ; ALetter # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I A803..A805 ; ALetter # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O @@ -716,6 +754,9 @@ A9CF ; ALetter # Lm JAVANESE PANGRANGKEP AA00..AA28 ; ALetter # Lo [41] CHAM LETTER A..CHAM LETTER HA AA40..AA42 ; ALetter # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG AA44..AA4B ; ALetter # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS +AAE0..AAEA ; ALetter # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAF2 ; ALetter # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; ALetter # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK AB01..AB06 ; ALetter # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO AB09..AB0E ; ALetter # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO AB11..AB16 ; ALetter # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO @@ -777,6 +818,8 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1083F..10855 ; ALetter # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW 10900..10915 ; ALetter # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; ALetter # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10980..109B7 ; ALetter # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; ALetter # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; ALetter # Lo KHAROSHTHI LETTER A 10A10..10A13 ; ALetter # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA 10A15..10A17 ; ALetter # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA @@ -788,10 +831,18 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10C00..10C48 ; ALetter # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH 11003..11037 ; ALetter # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA 11083..110AF ; ALetter # Lo [45] KAITHI LETTER A..KAITHI LETTER HA +110D0..110E8 ; ALetter # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +11103..11126 ; ALetter # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11183..111B2 ; ALetter # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111C1..111C4 ; ALetter # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +11680..116AA ; ALetter # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA 12000..1236E ; ALetter # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM 12400..12462 ; ALetter # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER 13000..1342E ; ALetter # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 16800..16A38 ; ALetter # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16F00..16F44 ; ALetter # Lo [69] MIAO LETTER PA..MIAO LETTER HHA +16F50 ; ALetter # Lo MIAO LETTER NASALIZATION +16F93..16F9F ; ALetter # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 1D400..1D454 ; ALetter # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G 1D456..1D49C ; ALetter # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A 1D49E..1D49F ; ALetter # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D @@ -822,8 +873,41 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1D78A..1D7A8 ; ALetter # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA 1D7AA..1D7C2 ; ALetter # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA 1D7C4..1D7CB ; ALetter # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA - -# Total code points: 24453 +1EE00..1EE03 ; ALetter # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; ALetter # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; ALetter # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; ALetter # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; ALetter # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; ALetter # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; ALetter # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; ALetter # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; ALetter # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; ALetter # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; ALetter # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; ALetter # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; ALetter # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; ALetter # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; ALetter # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; ALetter # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; ALetter # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; ALetter # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; ALetter # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; ALetter # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; ALetter # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; ALetter # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; ALetter # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; ALetter # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; ALetter # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; ALetter # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; ALetter # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; ALetter # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; ALetter # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; ALetter # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; ALetter # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; ALetter # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; ALetter # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN + +# Total code points: 24941 # ================================================ @@ -909,9 +993,13 @@ AA50..AA59 ; Numeric # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE ABF0..ABF9 ; Numeric # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 104A0..104A9 ; Numeric # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE 11066..1106F ; Numeric # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE +110F0..110F9 ; Numeric # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11136..1113F ; Numeric # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +111D0..111D9 ; Numeric # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +116C0..116C9 ; Numeric # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 1D7CE..1D7FF ; Numeric # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE -# Total code points: 411 +# Total code points: 451 # ================================================ diff --git a/lib/unicore/extracted/DBidiClass.txt b/lib/unicore/extracted/DBidiClass.txt index 8720776158..270a87e847 100644 --- a/lib/unicore/extracted/DBidiClass.txt +++ b/lib/unicore/extracted/DBidiClass.txt @@ -1,8 +1,8 @@ -# DerivedBidiClass-6.0.0.txt -# Date: 2010-08-19, 00:48:03 GMT [MD] +# DerivedBidiClass-6.1.0.txt +# Date: 2011-12-11, 18:26:53 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -13,32 +13,39 @@ # reserved for right-to-left scripts are given either types R or AL. # # The unassigned code points that default to AL are in the ranges: -# [\u0600-\u07BF \uFB50-\uFDFF \uFE70-\uFEFF] +# [\u0600-\u07BF \u08A0-\u08FF \uFB50-\uFDCF \uFDF0-\uFDFF \uFE70-\uFEFF \U0001EE00-\U0001EEFF] # # Arabic: U+0600 - U+06FF # Syriac: U+0700 - U+074F # Arabic_Supplement: U+0750 - U+077F # Thaana: U+0780 - U+07BF +# Arabic Extended-A: U+08A0 - U+08FF # Arabic_Presentation_Forms_A: -# U+FB50 - U+FDFF +# U+FB50 - U+FDCF +# U+FDF0 - U+FDFF # Arabic_Presentation_Forms_B: # U+FE70 - U+FEFF -# minus noncharacter code points. +# Arabic Mathematical Alphabetic Symbols: +# U+1EE00 - U+1EEFF # # The unassigned code points that default to R are in the ranges: -# [\u0590-\u05FF \u07C0-\u08FF \uFB1D-\uFB4F \U00010800-\U00010FFF \U0001E800-\U0001EFFF] +# [\u0590-\u05FF \u07C0-\u089F \uFB1D-\uFB4F \U00010800-\U00010FFF \U0001E800-\U0001EDFF \U0001EF00-\U0001EFFF] # # Hebrew: U+0590 - U+05FF # NKo: U+07C0 - U+07FF # Cypriot_Syllabary: U+10800 - U+1083F # Phoenician: U+10900 - U+1091F # Lydian: U+10920 - U+1093F +# Meroitic Hieroglyphs: +# U+10980 - U+1099F +# Meroitic Cursive: U+109A0 - U+109FF # Kharoshthi: U+10A00 - U+10A5F # and any others in the ranges: -# U+0800 - U+08FF, +# U+0800 - U+089F, # U+FB1D - U+FB4F, # U+10840 - U+10FFF, -# U+1E800 - U+1EFFF +# U+1E800 - U+1EDFF, +# U+1EF00 - U+1EFFF # # For all other cases: @@ -53,9 +60,9 @@ 0041..005A ; L # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z 0061..007A ; L # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z -00AA ; L # L& FEMININE ORDINAL INDICATOR +00AA ; L # Lo FEMININE ORDINAL INDICATOR 00B5 ; L # L& MICRO SIGN -00BA ; L # L& MASCULINE ORDINAL INDICATOR +00BA ; L # Lo MASCULINE ORDINAL INDICATOR 00C0..00D6 ; L # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS 00D8..00F6 ; L # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS 00F8..01BA ; L # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL @@ -148,6 +155,7 @@ 0AD0 ; L # Lo GUJARATI OM 0AE0..0AE1 ; L # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL 0AE6..0AEF ; L # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE +0AF0 ; L # Po GUJARATI ABBREVIATION SIGN 0B02..0B03 ; L # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA 0B05..0B0C ; L # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L 0B0F..0B10 ; L # Lo [2] ORIYA LETTER E..ORIYA LETTER AI @@ -264,11 +272,13 @@ 0EC0..0EC4 ; L # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI 0EC6 ; L # Lm LAO KO LA 0ED0..0ED9 ; L # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE -0EDC..0EDD ; L # Lo [2] LAO HO NO..LAO HO MO +0EDC..0EDF ; L # Lo [4] LAO HO NO..LAO LETTER KHMU NYO 0F00 ; L # Lo TIBETAN SYLLABLE OM 0F01..0F03 ; L # So [3] TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA 0F04..0F12 ; L # Po [15] TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK RGYA GRAM SHAD -0F13..0F17 ; L # So [5] TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS +0F13 ; L # So TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN +0F14 ; L # Po TIBETAN MARK GTER TSHEG +0F15..0F17 ; L # So [3] TIBETAN LOGOTYPE SIGN CHAD RTAGS..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS 0F1A..0F1F ; L # So [6] TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RDEL DKAR RDEL NAG 0F20..0F29 ; L # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE 0F2A..0F33 ; L # No [10] TIBETAN DIGIT HALF ONE..TIBETAN DIGIT HALF ZERO @@ -312,10 +322,12 @@ 109A..109C ; L # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A 109E..109F ; L # So [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION 10A0..10C5 ; L # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; L # L& GEORGIAN CAPITAL LETTER YN +10CD ; L # L& GEORGIAN CAPITAL LETTER AEN 10D0..10FA ; L # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN 10FB ; L # Po GEORGIAN PARAGRAPH SEPARATOR 10FC ; L # Lm MODIFIER LETTER GEORGIAN NAR -1100..1248 ; L # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +10FD..1248 ; L # Lo [332] GEORGIAN LETTER AEN..ETHIOPIC SYLLABLE QWA 124A..124D ; L # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE 1250..1256 ; L # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO 1258 ; L # Lo ETHIOPIC SYLLABLE QHWA @@ -331,8 +343,7 @@ 12D8..1310 ; L # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA 1312..1315 ; L # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE 1318..135A ; L # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA -1360 ; L # So ETHIOPIC SECTION MARK -1361..1368 ; L # Po [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR +1360..1368 ; L # Po [9] ETHIOPIC SECTION MARK..ETHIOPIC PARAGRAPH SEPARATOR 1369..137C ; L # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND 1380..138F ; L # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE 13A0..13F4 ; L # Lo [85] CHEROKEE LETTER A..CHEROKEE LETTER YV @@ -351,7 +362,6 @@ 1760..176C ; L # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA 176E..1770 ; L # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA 1780..17B3 ; L # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU -17B4..17B5 ; L # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 17B6 ; L # Mc KHMER VOWEL SIGN AA 17BE..17C5 ; L # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU 17C7..17C8 ; L # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU @@ -411,9 +421,10 @@ 1BA1 ; L # Mc SUNDANESE CONSONANT SIGN PAMINGKAL 1BA6..1BA7 ; L # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG 1BAA ; L # Mc SUNDANESE SIGN PAMAAEH +1BAC..1BAD ; L # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA 1BAE..1BAF ; L # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA 1BB0..1BB9 ; L # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE -1BC0..1BE5 ; L # Lo [38] BATAK LETTER A..BATAK LETTER U +1BBA..1BE5 ; L # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U 1BE7 ; L # Mc BATAK VOWEL SIGN E 1BEA..1BEC ; L # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O 1BEE ; L # Mc BATAK VOWEL SIGN U @@ -429,14 +440,16 @@ 1C5A..1C77 ; L # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; L # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD 1C7E..1C7F ; L # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD +1CC0..1CC7 ; L # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA 1CD3 ; L # Po VEDIC SIGN NIHSHVASA 1CE1 ; L # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA 1CE9..1CEC ; L # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL 1CEE..1CF1 ; L # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA -1CF2 ; L # Mc VEDIC SIGN ARDHAVISARGA +1CF2..1CF3 ; L # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF5..1CF6 ; L # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 1D00..1D2B ; L # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D61 ; L # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D77 ; L # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D2C..1D6A ; L # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; L # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; L # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; L # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; L # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA @@ -489,12 +502,15 @@ 2800..28FF ; L # So [256] BRAILLE PATTERN BLANK..BRAILLE PATTERN DOTS-12345678 2C00..2C2E ; L # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 2C30..2C5E ; L # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE -2C60..2C7C ; L # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; L # Lm MODIFIER LETTER CAPITAL V +2C60..2C7B ; L # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; L # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; L # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI 2CEB..2CEE ; L # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; L # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; L # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE -2D30..2D65 ; L # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D27 ; L # L& GEORGIAN SMALL LETTER YN +2D2D ; L # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; L # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO 2D6F ; L # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D70 ; L # Po TIFINAGH SEPARATOR MARK 2D80..2D96 ; L # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE @@ -510,6 +526,7 @@ 3006 ; L # Lo IDEOGRAPHIC CLOSING MARK 3007 ; L # Nl IDEOGRAPHIC NUMBER ZERO 3021..3029 ; L # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE +302E..302F ; L # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK 3031..3035 ; L # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF 3038..303A ; L # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY 303B ; L # Lm VERTICAL IDEOGRAPHIC ITERATION MARK @@ -529,7 +546,8 @@ 31F0..31FF ; L # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3200..321C ; L # So [29] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED HANGUL CIEUC U 3220..3229 ; L # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN -322A..324F ; L # So [38] PARENTHESIZED IDEOGRAPH MOON..CIRCLED NUMBER EIGHTY ON BLACK SQUARE +322A..3247 ; L # So [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO +3248..324F ; L # No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE 3260..327B ; L # So [28] CIRCLED HANGUL KIYEOK..CIRCLED HANGUL HIEUH A 327F ; L # So KOREAN STANDARD SYMBOL 3280..3289 ; L # No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN @@ -540,7 +558,7 @@ 337B..33DD ; L # So [99] SQUARE ERA NAME HEISEI..SQUARE WB 33E0..33FE ; L # So [31] IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY ONE..IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THIRTY-ONE 3400..4DB5 ; L # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 -4E00..9FCB ; L # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB +4E00..9FCC ; L # Lo [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC A000..A014 ; L # Lo [21] YI SYLLABLE IT..YI SYLLABLE E A015 ; L # Lm YI SYLLABLE WU A016..A48C ; L # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR @@ -563,8 +581,9 @@ A770 ; L # Lm MODIFIER LETTER US A771..A787 ; L # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T A789..A78A ; L # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; L # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A791 ; L # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER -A7A0..A7A9 ; L # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE +A790..A793 ; L # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR +A7A0..A7AA ; L # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK +A7F8..A7F9 ; L # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; L # L& LATIN LETTER SMALL CAPITAL TURNED M A7FB..A801 ; L # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I A803..A805 ; L # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O @@ -622,6 +641,13 @@ AAC2 ; L # Lo TAI VIET TONE MAI SONG AADB..AADC ; L # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG AADD ; L # Lm TAI VIET SYMBOL SAM AADE..AADF ; L # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI +AAE0..AAEA ; L # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAEB ; L # Mc MEETEI MAYEK VOWEL SIGN II +AAEE..AAEF ; L # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF0..AAF1 ; L # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM +AAF2 ; L # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; L # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF5 ; L # Mc MEETEI MAYEK VOWEL SIGN VISARGA AB01..AB06 ; L # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO AB09..AB0E ; L # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO AB11..AB16 ; L # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO @@ -638,8 +664,7 @@ AC00..D7A3 ; L # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH D7B0..D7C6 ; L # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E D7CB..D7FB ; L # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH E000..F8FF ; L # Co [6400] <private-use-E000>..<private-use-F8FF> -F900..FA2D ; L # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; L # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +F900..FA6D ; L # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; L # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB00..FB06 ; L # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; L # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -662,7 +687,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 10050..1005D ; L # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 10080..100FA ; L # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 10100 ; L # Po AEGEAN WORD SEPARATOR LINE -10102 ; L # So AEGEAN CHECK MARK +10102 ; L # Po AEGEAN CHECK MARK 10107..10133 ; L # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND 10137..1013F ; L # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT 101D0..101FC ; L # So [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND @@ -695,11 +720,33 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 110BB..110BC ; L # Po [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN 110BD ; L # Cf KAITHI NUMBER SIGN 110BE..110C1 ; L # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +110D0..110E8 ; L # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +110F0..110F9 ; L # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11103..11126 ; L # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +1112C ; L # Mc CHAKMA VOWEL SIGN E +11136..1113F ; L # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +11140..11143 ; L # Po [4] CHAKMA SECTION MARK..CHAKMA QUESTION MARK +11182 ; L # Mc SHARADA SIGN VISARGA +11183..111B2 ; L # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111B3..111B5 ; L # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111BF..111C0 ; L # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +111C1..111C4 ; L # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111C5..111C8 ; L # Po [4] SHARADA DANDA..SHARADA SEPARATOR +111D0..111D9 ; L # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +11680..116AA ; L # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116AC ; L # Mc TAKRI SIGN VISARGA +116AE..116AF ; L # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B6 ; L # Mc TAKRI SIGN VIRAMA +116C0..116C9 ; L # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 12000..1236E ; L # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM 12400..12462 ; L # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER 12470..12473 ; L # Po [4] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON 13000..1342E ; L # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 16800..16A38 ; L # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16F00..16F44 ; L # Lo [69] MIAO LETTER PA..MIAO LETTER HHA +16F50 ; L # Lo MIAO LETTER NASALIZATION +16F51..16F7E ; L # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG +16F93..16F9F ; L # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 1B000..1B001 ; L # Lo [2] KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE 1D000..1D0F5 ; L # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO 1D100..1D126 ; L # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 @@ -753,8 +800,6 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 1F210..1F23A ; L # So [43] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-55B6 1F240..1F248 ; L # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 1F250..1F251 ; L # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT -1F48C ; L # So LOVE LETTER -1F524 ; L # So INPUT SYMBOL FOR LATIN LETTERS 20000..2A6D6 ; L # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 2A700..2B734 ; L # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 2B740..2B81D ; L # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D @@ -762,8 +807,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] <private-use-F0000>..<private-use-FFFFD> 100000..10FFFD; L # Co [65534] <private-use-100000>..<private-use-10FFFD> -# The above property value applies to 859451 code points not listed here. -# Total code points: 1098619 +# The above property value applies to 858960 code points not listed here. +# Total code points: 1098531 # ================================================ @@ -795,7 +840,7 @@ F0000..FFFFD ; L # Co [65534] <private-use-F0000>..<private-use-FFFFD> 0840..0858 ; R # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 085C..085D ; R # Cn [2] <reserved-085C>..<reserved-085D> 085E ; R # Po MANDAIC PUNCTUATION -085F..08FF ; R # Cn [161] <reserved-085F>..<reserved-08FF> +085F..089F ; R # Cn [65] <reserved-085F>..<reserved-089F> 200F ; R # Cf RIGHT-TO-LEFT MARK FB1D ; R # Lo HEBREW LETTER YOD WITH HIRIQ FB1F..FB28 ; R # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV @@ -831,7 +876,11 @@ FB46..FB4F ; R # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE AL 10920..10939 ; R # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C 1093A..1093E ; R # Cn [5] <reserved-1093A>..<reserved-1093E> 1093F ; R # Po LYDIAN TRIANGULAR MARK -10940..109FF ; R # Cn [192] <reserved-10940>..<reserved-109FF> +10940..1097F ; R # Cn [64] <reserved-10940>..<reserved-1097F> +10980..109B7 ; R # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109B8..109BD ; R # Cn [6] <reserved-109B8>..<reserved-109BD> +109BE..109BF ; R # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN +109C0..109FF ; R # Cn [64] <reserved-109C0>..<reserved-109FF> 10A00 ; R # Lo KHAROSHTHI LETTER A 10A04 ; R # Cn <reserved-10A04> 10A07..10A0B ; R # Cn [5] <reserved-10A07>..<reserved-10A0B> @@ -862,9 +911,10 @@ FB46..FB4F ; R # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE AL 10C00..10C48 ; R # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH 10C49..10E5F ; R # Cn [535] <reserved-10C49>..<reserved-10E5F> 10E7F..10FFF ; R # Cn [385] <reserved-10E7F>..<reserved-10FFF> -1E800..1EFFF ; R # Cn [2048] <reserved-1E800>..<reserved-1EFFF> +1E800..1EDFF ; R # Cn [1536] <reserved-1E800>..<reserved-1EDFF> +1EF00..1EFFF ; R # Cn [256] <reserved-1EF00>..<reserved-1EFFF> -# Total code points: 4438 +# Total code points: 4086 # ================================================ @@ -911,6 +961,7 @@ FF0D ; ES # Pd FULLWIDTH HYPHEN-MINUS 00A2..00A5 ; ET # Sc [4] CENT SIGN..YEN SIGN 00B0 ; ET # So DEGREE SIGN 00B1 ; ET # Sm PLUS-MINUS SIGN +058F ; ET # Sc ARMENIAN DRAM SIGN 0609..060A ; ET # Po [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN 066A ; ET # Po ARABIC PERCENT SIGN 09F2..09F3 ; ET # Sc [2] BENGALI RUPEE MARK..BENGALI RUPEE SIGN @@ -934,17 +985,16 @@ FF05 ; ET # Po FULLWIDTH PERCENT SIGN FFE0..FFE1 ; ET # Sc [2] FULLWIDTH CENT SIGN..FULLWIDTH POUND SIGN FFE5..FFE6 ; ET # Sc [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN -# Total code points: 64 +# Total code points: 65 # ================================================ # Bidi_Class=Arabic_Number -0600..0603 ; AN # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA +0600..0604 ; AN # Cf [5] ARABIC NUMBER SIGN..ARABIC SIGN SAMVAT 0660..0669 ; AN # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE 066B..066C ; AN # Po [2] ARABIC DECIMAL SEPARATOR..ARABIC THOUSANDS SEPARATOR 06DD ; AN # Cf ARABIC END OF AYAH -070F ; AN # Cf SYRIAC ABBREVIATION MARK 10E60..10E7E ; AN # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS # Total code points: 49 @@ -1029,7 +1079,8 @@ FF1A ; CS # Po FULLWIDTH COLON 007D ; ON # Pe RIGHT CURLY BRACKET 007E ; ON # Sm TILDE 00A1 ; ON # Po INVERTED EXCLAMATION MARK -00A6..00A7 ; ON # So [2] BROKEN BAR..SECTION SIGN +00A6 ; ON # So BROKEN BAR +00A7 ; ON # Po SECTION SIGN 00A8 ; ON # Sk DIAERESIS 00A9 ; ON # So COPYRIGHT SIGN 00AB ; ON # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK @@ -1037,8 +1088,7 @@ FF1A ; CS # Po FULLWIDTH COLON 00AE ; ON # So REGISTERED SIGN 00AF ; ON # Sk MACRON 00B4 ; ON # Sk ACUTE ACCENT -00B6 ; ON # So PILCROW SIGN -00B7 ; ON # Po MIDDLE DOT +00B6..00B7 ; ON # Po [2] PILCROW SIGN..MIDDLE DOT 00B8 ; ON # Sk CEDILLA 00BB ; ON # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 00BC..00BE ; ON # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS @@ -1206,9 +1256,7 @@ FF1A ; CS # Po FULLWIDTH COLON 27C0..27C4 ; ON # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET 27C5 ; ON # Ps LEFT S-SHAPED BAG DELIMITER 27C6 ; ON # Pe RIGHT S-SHAPED BAG DELIMITER -27C7..27CA ; ON # Sm [4] OR WITH DOT INSIDE..VERTICAL BAR WITH HORIZONTAL STROKE -27CC ; ON # Sm LONG DIVISION -27CE..27E5 ; ON # Sm [24] SQUARED LOGICAL AND..WHITE SQUARE WITH RIGHTWARDS TICK +27C7..27E5 ; ON # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK 27E6 ; ON # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET 27E7 ; ON # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET 27E8 ; ON # Ps MATHEMATICAL LEFT ANGLE BRACKET @@ -1292,7 +1340,8 @@ FF1A ; CS # Po FULLWIDTH COLON 2E29 ; ON # Pe RIGHT DOUBLE PARENTHESIS 2E2A..2E2E ; ON # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK 2E2F ; ON # Lm VERTICAL TILDE -2E30..2E31 ; ON # Po [2] RING POINT..WORD SEPARATOR MIDDLE DOT +2E30..2E39 ; ON # Po [10] RING POINT..TOP HALF SECTION SIGN +2E3A..2E3B ; ON # Pd [2] TWO-EM DASH..THREE-EM DASH 2E80..2E99 ; ON # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP 2E9B..2EF3 ; ON # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE 2F00..2FD5 ; ON # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE @@ -1445,12 +1494,14 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 1D74F ; ON # Sm MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL 1D789 ; ON # Sm MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL 1D7C3 ; ON # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL +1EEF0..1EEF1 ; ON # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL 1F000..1F02B ; ON # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK 1F030..1F093 ; ON # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06 1F0A0..1F0AE ; ON # So [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES 1F0B1..1F0BE ; ON # So [14] PLAYING CARD ACE OF HEARTS..PLAYING CARD KING OF HEARTS 1F0C1..1F0CF ; ON # So [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER 1F0D1..1F0DF ; ON # So [15] PLAYING CARD ACE OF CLUBS..PLAYING CARD WHITE JOKER +1F16A..1F16B ; ON # So [2] RAISED MC SIGN..RAISED MD SIGN 1F300..1F320 ; ON # So [33] CYCLONE..SHOOTING STAR 1F330..1F335 ; ON # So [6] CHESTNUT..CACTUS 1F337..1F37C ; ON # So [70] TULIP..BABY BOTTLE @@ -1460,29 +1511,17 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 1F3E0..1F3F0 ; ON # So [17] HOUSE BUILDING..EUROPEAN CASTLE 1F400..1F43E ; ON # So [63] RAT..PAW PRINTS 1F440 ; ON # So EYES -1F442..1F48B ; ON # So [74] EAR..KISS MARK -1F48D..1F4F7 ; ON # So [107] RING..CAMERA +1F442..1F4F7 ; ON # So [182] EAR..CAMERA 1F4F9..1F4FC ; ON # So [4] VIDEO CAMERA..VIDEOCASSETTE -1F500..1F523 ; ON # So [36] TWISTED RIGHTWARDS ARROWS..INPUT SYMBOL FOR SYMBOLS -1F525..1F53D ; ON # So [25] FIRE..DOWN-POINTING SMALL RED TRIANGLE +1F500..1F53D ; ON # So [62] TWISTED RIGHTWARDS ARROWS..DOWN-POINTING SMALL RED TRIANGLE +1F540..1F543 ; ON # So [4] CIRCLED CROSS POMMEE..NOTCHED LEFT SEMICIRCLE WITH THREE DOTS 1F550..1F567 ; ON # So [24] CLOCK FACE ONE OCLOCK..CLOCK FACE TWELVE-THIRTY -1F5FB..1F5FF ; ON # So [5] MOUNT FUJI..MOYAI -1F601..1F610 ; ON # So [16] GRINNING FACE WITH SMILING EYES..NEUTRAL FACE -1F612..1F614 ; ON # So [3] UNAMUSED FACE..PENSIVE FACE -1F616 ; ON # So CONFOUNDED FACE -1F618 ; ON # So FACE THROWING A KISS -1F61A ; ON # So KISSING FACE WITH CLOSED EYES -1F61C..1F61E ; ON # So [3] FACE WITH STUCK-OUT TONGUE AND WINKING EYE..DISAPPOINTED FACE -1F620..1F625 ; ON # So [6] ANGRY FACE..DISAPPOINTED BUT RELIEVED FACE -1F628..1F62B ; ON # So [4] FEARFUL FACE..TIRED FACE -1F62D ; ON # So LOUDLY CRYING FACE -1F630..1F633 ; ON # So [4] FACE WITH OPEN MOUTH AND COLD SWEAT..FLUSHED FACE -1F635..1F640 ; ON # So [12] DIZZY FACE..WEARY CAT FACE +1F5FB..1F640 ; ON # So [70] MOUNT FUJI..WEARY CAT FACE 1F645..1F64F ; ON # So [11] FACE WITH NO GOOD GESTURE..PERSON WITH FOLDED HANDS 1F680..1F6C5 ; ON # So [70] ROCKET..LEFT LUGGAGE 1F700..1F773 ; ON # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE -# Total code points: 4412 +# Total code points: 4447 # ================================================ @@ -1554,6 +1593,7 @@ FFFFE..FFFFF ; BN # Cn [2] <noncharacter-FFFFE>..<noncharacter-FFFFF> 0825..0827 ; NSM # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; NSM # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; NSM # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +08E4..08FE ; NSM # Mn [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT 0900..0902 ; NSM # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 093A ; NSM # Mn DEVANAGARI VOWEL SIGN OE 093C ; NSM # Mn DEVANAGARI SIGN NUKTA @@ -1637,6 +1677,7 @@ FFFFE..FFFFF ; BN # Cn [2] <noncharacter-FFFFE>..<noncharacter-FFFFF> 1732..1734 ; NSM # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD 1752..1753 ; NSM # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U 1772..1773 ; NSM # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +17B4..17B5 ; NSM # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 17B7..17BD ; NSM # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA 17C6 ; NSM # Mn KHMER SIGN NIKAHIT 17C9..17D3 ; NSM # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT @@ -1664,6 +1705,7 @@ FFFFE..FFFFF ; BN # Cn [2] <noncharacter-FFFFE>..<noncharacter-FFFFF> 1B80..1B81 ; NSM # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR 1BA2..1BA5 ; NSM # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU 1BA8..1BA9 ; NSM # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAB ; NSM # Mn SUNDANESE SIGN VIRAMA 1BE6 ; NSM # Mn BATAK SIGN TOMPI 1BE8..1BE9 ; NSM # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE 1BED ; NSM # Mn BATAK VOWEL SIGN KARO O @@ -1674,6 +1716,7 @@ FFFFE..FFFFF ; BN # Cn [2] <noncharacter-FFFFE>..<noncharacter-FFFFF> 1CD4..1CE0 ; NSM # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA 1CE2..1CE8 ; NSM # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL 1CED ; NSM # Mn VEDIC SIGN TIRYAK +1CF4 ; NSM # Mn VEDIC TONE CANDRA ABOVE 1DC0..1DE6 ; NSM # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z 1DFC..1DFF ; NSM # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW 20D0..20DC ; NSM # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE @@ -1684,11 +1727,12 @@ FFFFE..FFFFF ; BN # Cn [2] <noncharacter-FFFFE>..<noncharacter-FFFFF> 2CEF..2CF1 ; NSM # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS 2D7F ; NSM # Mn TIFINAGH CONSONANT JOINER 2DE0..2DFF ; NSM # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS -302A..302F ; NSM # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK +302A..302D ; NSM # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK 3099..309A ; NSM # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK A66F ; NSM # Mn COMBINING CYRILLIC VZMET A670..A672 ; NSM # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN -A67C..A67D ; NSM # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A674..A67D ; NSM # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A69F ; NSM # Mn COMBINING CYRILLIC LETTER IOTIFIED E A6F0..A6F1 ; NSM # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS A802 ; NSM # Mn SYLOTI NAGRI SIGN DVISVARA A806 ; NSM # Mn SYLOTI NAGRI SIGN HASANTA @@ -1712,6 +1756,8 @@ AAB2..AAB4 ; NSM # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U AAB7..AAB8 ; NSM # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA AABE..AABF ; NSM # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK AAC1 ; NSM # Mn TAI VIET TONE MAI THO +AAEC..AAED ; NSM # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAF6 ; NSM # Mn MEETEI MAYEK VIRAMA ABE5 ; NSM # Mn MEETEI MAYEK VOWEL SIGN ANAP ABE8 ; NSM # Mn MEETEI MAYEK VOWEL SIGN UNAP ABED ; NSM # Mn MEETEI MAYEK APUN IYEK @@ -1729,6 +1775,16 @@ FE20..FE26 ; NSM # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOININ 11080..11081 ; NSM # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA 110B3..110B6 ; NSM # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B9..110BA ; NSM # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +11100..11102 ; NSM # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; NSM # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112D..11134 ; NSM # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11180..11181 ; NSM # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +111B6..111BE ; NSM # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +116AB ; NSM # Mn TAKRI SIGN ANUSVARA +116AD ; NSM # Mn TAKRI VOWEL SIGN AA +116B0..116B5 ; NSM # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B7 ; NSM # Mn TAKRI SIGN NUKTA +16F8F..16F92 ; NSM # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 1D167..1D169 ; NSM # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 1D17B..1D182 ; NSM # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE 1D185..1D18B ; NSM # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE @@ -1736,13 +1792,13 @@ FE20..FE26 ; NSM # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOININ 1D242..1D244 ; NSM # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1209 +# Total code points: 1290 # ================================================ # Bidi_Class=Arabic_Letter -0604..0605 ; AL # Cn [2] <reserved-0604>..<reserved-0605> +0605 ; AL # Cn <reserved-0605> 0608 ; AL # Sm ARABIC RAY 060B ; AL # Sc AFGHANI SIGN 060D ; AL # Po ARABIC DATE SEPARATOR @@ -1764,12 +1820,18 @@ E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 06FF ; AL # Lo ARABIC LETTER HEH WITH INVERTED V 0700..070D ; AL # Po [14] SYRIAC END OF PARAGRAPH..SYRIAC HARKLEAN ASTERISCUS 070E ; AL # Cn <reserved-070E> +070F ; AL # Cf SYRIAC ABBREVIATION MARK 0710 ; AL # Lo SYRIAC LETTER ALAPH 0712..072F ; AL # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH 074B..074C ; AL # Cn [2] <reserved-074B>..<reserved-074C> 074D..07A5 ; AL # Lo [89] SYRIAC LETTER SOGDIAN ZHAIN..THAANA LETTER WAAVU 07B1 ; AL # Lo THAANA LETTER NAA 07B2..07BF ; AL # Cn [14] <reserved-07B2>..<reserved-07BF> +08A0 ; AL # Lo ARABIC LETTER BEH WITH SMALL V BELOW +08A1 ; AL # Cn <reserved-08A1> +08A2..08AC ; AL # Lo [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH +08AD..08E3 ; AL # Cn [55] <reserved-08AD>..<reserved-08E3> +08FF ; AL # Cn <reserved-08FF> FB50..FBB1 ; AL # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM FBB2..FBC1 ; AL # Sk [16] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL SMALL TAH BELOW FBC2..FBD2 ; AL # Cn [17] <reserved-FBC2>..<reserved-FBD2> @@ -1786,8 +1848,75 @@ FE70..FE74 ; AL # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISO FE75 ; AL # Cn <reserved-FE75> FE76..FEFC ; AL # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM FEFD..FEFE ; AL # Cn [2] <reserved-FEFD>..<reserved-FEFE> - -# Total code points: 1115 +1EE00..1EE03 ; AL # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE04 ; AL # Cn <reserved-1EE04> +1EE05..1EE1F ; AL # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE20 ; AL # Cn <reserved-1EE20> +1EE21..1EE22 ; AL # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE23 ; AL # Cn <reserved-1EE23> +1EE24 ; AL # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE25..1EE26 ; AL # Cn [2] <reserved-1EE25>..<reserved-1EE26> +1EE27 ; AL # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE28 ; AL # Cn <reserved-1EE28> +1EE29..1EE32 ; AL # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE33 ; AL # Cn <reserved-1EE33> +1EE34..1EE37 ; AL # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE38 ; AL # Cn <reserved-1EE38> +1EE39 ; AL # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3A ; AL # Cn <reserved-1EE3A> +1EE3B ; AL # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE3C..1EE41 ; AL # Cn [6] <reserved-1EE3C>..<reserved-1EE41> +1EE42 ; AL # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE43..1EE46 ; AL # Cn [4] <reserved-1EE43>..<reserved-1EE46> +1EE47 ; AL # Lo ARABIC MATHEMATICAL TAILED HAH +1EE48 ; AL # Cn <reserved-1EE48> +1EE49 ; AL # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4A ; AL # Cn <reserved-1EE4A> +1EE4B ; AL # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4C ; AL # Cn <reserved-1EE4C> +1EE4D..1EE4F ; AL # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE50 ; AL # Cn <reserved-1EE50> +1EE51..1EE52 ; AL # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE53 ; AL # Cn <reserved-1EE53> +1EE54 ; AL # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE55..1EE56 ; AL # Cn [2] <reserved-1EE55>..<reserved-1EE56> +1EE57 ; AL # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE58 ; AL # Cn <reserved-1EE58> +1EE59 ; AL # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5A ; AL # Cn <reserved-1EE5A> +1EE5B ; AL # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5C ; AL # Cn <reserved-1EE5C> +1EE5D ; AL # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5E ; AL # Cn <reserved-1EE5E> +1EE5F ; AL # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE60 ; AL # Cn <reserved-1EE60> +1EE61..1EE62 ; AL # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE63 ; AL # Cn <reserved-1EE63> +1EE64 ; AL # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE65..1EE66 ; AL # Cn [2] <reserved-1EE65>..<reserved-1EE66> +1EE67..1EE6A ; AL # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6B ; AL # Cn <reserved-1EE6B> +1EE6C..1EE72 ; AL # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE73 ; AL # Cn <reserved-1EE73> +1EE74..1EE77 ; AL # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE78 ; AL # Cn <reserved-1EE78> +1EE79..1EE7C ; AL # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7D ; AL # Cn <reserved-1EE7D> +1EE7E ; AL # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE7F ; AL # Cn <reserved-1EE7F> +1EE80..1EE89 ; AL # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8A ; AL # Cn <reserved-1EE8A> +1EE8B..1EE9B ; AL # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EE9C..1EEA0 ; AL # Cn [5] <reserved-1EE9C>..<reserved-1EEA0> +1EEA1..1EEA3 ; AL # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA4 ; AL # Cn <reserved-1EEA4> +1EEA5..1EEA9 ; AL # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAA ; AL # Cn <reserved-1EEAA> +1EEAB..1EEBB ; AL # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1EEBC..1EEEF ; AL # Cn [52] <reserved-1EEBC>..<reserved-1EEEF> +1EEF2..1EEFF ; AL # Cn [14] <reserved-1EEF2>..<reserved-1EEFF> + +# Total code points: 1438 # ================================================ diff --git a/lib/unicore/extracted/DBinaryProperties.txt b/lib/unicore/extracted/DBinaryProperties.txt index c5a10dcf8f..6d23c068a9 100644 --- a/lib/unicore/extracted/DBinaryProperties.txt +++ b/lib/unicore/extracted/DBinaryProperties.txt @@ -1,8 +1,8 @@ -# DerivedBinaryProperties-6.0.0.txt -# Date: 2010-05-18, 00:49:04 GMT [MD] +# DerivedBinaryProperties-6.1.0.txt +# Date: 2011-07-25, 00:54:10 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -83,7 +83,7 @@ 27C5 ; Bidi_Mirrored # Ps LEFT S-SHAPED BAG DELIMITER 27C6 ; Bidi_Mirrored # Pe RIGHT S-SHAPED BAG DELIMITER 27C8..27C9 ; Bidi_Mirrored # Sm [2] REVERSE SOLIDUS PRECEDING SUBSET..SUPERSET PRECEDING SOLIDUS -27CC ; Bidi_Mirrored # Sm LONG DIVISION +27CB..27CD ; Bidi_Mirrored # Sm [3] MATHEMATICAL RISING DIAGONAL..MATHEMATICAL FALLING DIAGONAL 27D3..27D6 ; Bidi_Mirrored # Sm [4] LOWER RIGHT CORNER WITH DOT..RIGHT OUTER JOIN 27DC..27DE ; Bidi_Mirrored # Sm [3] LEFT MULTIMAP..LONG LEFT TACK 27E2..27E5 ; Bidi_Mirrored # Sm [4] WHITE CONCAVE-SIDED DIAMOND WITH LEFTWARDS TICK..WHITE SQUARE WITH RIGHTWARDS TICK @@ -222,6 +222,6 @@ FF63 ; Bidi_Mirrored # Pe HALFWIDTH RIGHT CORNER BRACKET 1D789 ; Bidi_Mirrored # Sm MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL 1D7C3 ; Bidi_Mirrored # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL -# Total code points: 543 +# Total code points: 545 # EOF diff --git a/lib/unicore/extracted/DCombiningClass.txt b/lib/unicore/extracted/DCombiningClass.txt index f2695cf61e..33495d2c50 100644 --- a/lib/unicore/extracted/DCombiningClass.txt +++ b/lib/unicore/extracted/DCombiningClass.txt @@ -1,8 +1,8 @@ -# DerivedCombiningClass-6.0.0.txt -# Date: 2010-08-19, 00:48:04 GMT [MD] +# DerivedCombiningClass-6.1.0.txt +# Date: 2011-12-05, 16:44:07 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -51,10 +51,11 @@ 00A0 ; 0 # Zs NO-BREAK SPACE 00A1 ; 0 # Po INVERTED EXCLAMATION MARK 00A2..00A5 ; 0 # Sc [4] CENT SIGN..YEN SIGN -00A6..00A7 ; 0 # So [2] BROKEN BAR..SECTION SIGN +00A6 ; 0 # So BROKEN BAR +00A7 ; 0 # Po SECTION SIGN 00A8 ; 0 # Sk DIAERESIS 00A9 ; 0 # So COPYRIGHT SIGN -00AA ; 0 # L& FEMININE ORDINAL INDICATOR +00AA ; 0 # Lo FEMININE ORDINAL INDICATOR 00AB ; 0 # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 00AC ; 0 # Sm NOT SIGN 00AD ; 0 # Cf SOFT HYPHEN @@ -65,11 +66,10 @@ 00B2..00B3 ; 0 # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE 00B4 ; 0 # Sk ACUTE ACCENT 00B5 ; 0 # L& MICRO SIGN -00B6 ; 0 # So PILCROW SIGN -00B7 ; 0 # Po MIDDLE DOT +00B6..00B7 ; 0 # Po [2] PILCROW SIGN..MIDDLE DOT 00B8 ; 0 # Sk CEDILLA 00B9 ; 0 # No SUPERSCRIPT ONE -00BA ; 0 # L& MASCULINE ORDINAL INDICATOR +00BA ; 0 # Lo MASCULINE ORDINAL INDICATOR 00BB ; 0 # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 00BC..00BE ; 0 # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS 00BF ; 0 # Po INVERTED QUESTION MARK @@ -120,6 +120,7 @@ 0561..0587 ; 0 # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN 0589 ; 0 # Po ARMENIAN FULL STOP 058A ; 0 # Pd ARMENIAN HYPHEN +058F ; 0 # Sc ARMENIAN DRAM SIGN 05BE ; 0 # Pd HEBREW PUNCTUATION MAQAF 05C0 ; 0 # Po HEBREW PUNCTUATION PASEQ 05C3 ; 0 # Po HEBREW PUNCTUATION SOF PASUQ @@ -127,7 +128,7 @@ 05D0..05EA ; 0 # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV 05F0..05F2 ; 0 # Lo [3] HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD 05F3..05F4 ; 0 # Po [2] HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM -0600..0603 ; 0 # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA +0600..0604 ; 0 # Cf [5] ARABIC NUMBER SIGN..ARABIC SIGN SAMVAT 0606..0608 ; 0 # Sm [3] ARABIC-INDIC CUBE ROOT..ARABIC RAY 0609..060A ; 0 # Po [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN 060B ; 0 # Sc AFGHANI SIGN @@ -173,6 +174,8 @@ 0830..083E ; 0 # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU 0840..0858 ; 0 # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 085E ; 0 # Po MANDAIC PUNCTUATION +08A0 ; 0 # Lo ARABIC LETTER BEH WITH SMALL V BELOW +08A2..08AC ; 0 # Lo [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH 0900..0902 ; 0 # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 0903 ; 0 # Mc DEVANAGARI SIGN VISARGA 0904..0939 ; 0 # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA @@ -255,6 +258,7 @@ 0AE0..0AE1 ; 0 # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL 0AE2..0AE3 ; 0 # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL 0AE6..0AEF ; 0 # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE +0AF0 ; 0 # Po GUJARATI ABBREVIATION SIGN 0AF1 ; 0 # Sc GUJARATI RUPEE SIGN 0B01 ; 0 # Mn ORIYA SIGN CANDRABINDU 0B02..0B03 ; 0 # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA @@ -402,11 +406,13 @@ 0EC6 ; 0 # Lm LAO KO LA 0ECC..0ECD ; 0 # Mn [2] LAO CANCELLATION MARK..LAO NIGGAHITA 0ED0..0ED9 ; 0 # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE -0EDC..0EDD ; 0 # Lo [2] LAO HO NO..LAO HO MO +0EDC..0EDF ; 0 # Lo [4] LAO HO NO..LAO LETTER KHMU NYO 0F00 ; 0 # Lo TIBETAN SYLLABLE OM 0F01..0F03 ; 0 # So [3] TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA 0F04..0F12 ; 0 # Po [15] TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK RGYA GRAM SHAD -0F13..0F17 ; 0 # So [5] TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS +0F13 ; 0 # So TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN +0F14 ; 0 # Po TIBETAN MARK GTER TSHEG +0F15..0F17 ; 0 # So [3] TIBETAN LOGOTYPE SIGN CHAD RTAGS..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS 0F1A..0F1F ; 0 # So [6] TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RDEL DKAR RDEL NAG 0F20..0F29 ; 0 # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE 0F2A..0F33 ; 0 # No [10] TIBETAN DIGIT HALF ONE..TIBETAN DIGIT HALF ZERO @@ -469,10 +475,12 @@ 109D ; 0 # Mn MYANMAR VOWEL SIGN AITON AI 109E..109F ; 0 # So [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION 10A0..10C5 ; 0 # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; 0 # L& GEORGIAN CAPITAL LETTER YN +10CD ; 0 # L& GEORGIAN CAPITAL LETTER AEN 10D0..10FA ; 0 # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN 10FB ; 0 # Po GEORGIAN PARAGRAPH SEPARATOR 10FC ; 0 # Lm MODIFIER LETTER GEORGIAN NAR -1100..1248 ; 0 # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +10FD..1248 ; 0 # Lo [332] GEORGIAN LETTER AEN..ETHIOPIC SYLLABLE QWA 124A..124D ; 0 # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE 1250..1256 ; 0 # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO 1258 ; 0 # Lo ETHIOPIC SYLLABLE QHWA @@ -488,8 +496,7 @@ 12D8..1310 ; 0 # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA 1312..1315 ; 0 # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE 1318..135A ; 0 # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA -1360 ; 0 # So ETHIOPIC SECTION MARK -1361..1368 ; 0 # Po [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR +1360..1368 ; 0 # Po [9] ETHIOPIC SECTION MARK..ETHIOPIC PARAGRAPH SEPARATOR 1369..137C ; 0 # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND 1380..138F ; 0 # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE 1390..1399 ; 0 # So [10] ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT @@ -517,7 +524,7 @@ 176E..1770 ; 0 # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA 1772..1773 ; 0 # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U 1780..17B3 ; 0 # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU -17B4..17B5 ; 0 # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B4..17B5 ; 0 # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 17B6 ; 0 # Mc KHMER VOWEL SIGN AA 17B7..17BD ; 0 # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA 17BE..17C5 ; 0 # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU @@ -605,9 +612,10 @@ 1BA2..1BA5 ; 0 # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU 1BA6..1BA7 ; 0 # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG 1BA8..1BA9 ; 0 # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAC..1BAD ; 0 # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA 1BAE..1BAF ; 0 # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA 1BB0..1BB9 ; 0 # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE -1BC0..1BE5 ; 0 # Lo [38] BATAK LETTER A..BATAK LETTER U +1BBA..1BE5 ; 0 # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U 1BE7 ; 0 # Mc BATAK VOWEL SIGN E 1BE8..1BE9 ; 0 # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE 1BEA..1BEC ; 0 # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O @@ -627,14 +635,16 @@ 1C5A..1C77 ; 0 # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; 0 # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD 1C7E..1C7F ; 0 # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD +1CC0..1CC7 ; 0 # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA 1CD3 ; 0 # Po VEDIC SIGN NIHSHVASA 1CE1 ; 0 # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA 1CE9..1CEC ; 0 # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL 1CEE..1CF1 ; 0 # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA -1CF2 ; 0 # Mc VEDIC SIGN ARDHAVISARGA +1CF2..1CF3 ; 0 # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF5..1CF6 ; 0 # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 1D00..1D2B ; 0 # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D61 ; 0 # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D77 ; 0 # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D2C..1D6A ; 0 # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; 0 # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; 0 # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; 0 # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; 0 # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA @@ -815,9 +825,7 @@ 27C0..27C4 ; 0 # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET 27C5 ; 0 # Ps LEFT S-SHAPED BAG DELIMITER 27C6 ; 0 # Pe RIGHT S-SHAPED BAG DELIMITER -27C7..27CA ; 0 # Sm [4] OR WITH DOT INSIDE..VERTICAL BAR WITH HORIZONTAL STROKE -27CC ; 0 # Sm LONG DIVISION -27CE..27E5 ; 0 # Sm [24] SQUARED LOGICAL AND..WHITE SQUARE WITH RIGHTWARDS TICK +27C7..27E5 ; 0 # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK 27E6 ; 0 # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET 27E7 ; 0 # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET 27E8 ; 0 # Ps MATHEMATICAL LEFT ANGLE BRACKET @@ -869,16 +877,19 @@ 2B50..2B59 ; 0 # So [10] WHITE MEDIUM STAR..HEAVY CIRCLED SALTIRE 2C00..2C2E ; 0 # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 2C30..2C5E ; 0 # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE -2C60..2C7C ; 0 # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; 0 # Lm MODIFIER LETTER CAPITAL V +2C60..2C7B ; 0 # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; 0 # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; 0 # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI 2CE5..2CEA ; 0 # So [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA 2CEB..2CEE ; 0 # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; 0 # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2CF9..2CFC ; 0 # Po [4] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN VERSE DIVIDER 2CFD ; 0 # No COPTIC FRACTION ONE HALF 2CFE..2CFF ; 0 # Po [2] COPTIC FULL STOP..COPTIC MORPHOLOGICAL DIVIDER 2D00..2D25 ; 0 # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE -2D30..2D65 ; 0 # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D27 ; 0 # L& GEORGIAN SMALL LETTER YN +2D2D ; 0 # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; 0 # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO 2D6F ; 0 # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D70 ; 0 # Po TIFINAGH SEPARATOR MARK 2D80..2D96 ; 0 # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE @@ -921,7 +932,8 @@ 2E29 ; 0 # Pe RIGHT DOUBLE PARENTHESIS 2E2A..2E2E ; 0 # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK 2E2F ; 0 # Lm VERTICAL TILDE -2E30..2E31 ; 0 # Po [2] RING POINT..WORD SEPARATOR MIDDLE DOT +2E30..2E39 ; 0 # Po [10] RING POINT..TOP HALF SECTION SIGN +2E3A..2E3B ; 0 # Pd [2] TWO-EM DASH..THREE-EM DASH 2E80..2E99 ; 0 # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP 2E9B..2EF3 ; 0 # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE 2F00..2FD5 ; 0 # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE @@ -983,7 +995,9 @@ 31F0..31FF ; 0 # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3200..321E ; 0 # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU 3220..3229 ; 0 # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN -322A..3250 ; 0 # So [39] PARENTHESIZED IDEOGRAPH MOON..PARTNERSHIP SIGN +322A..3247 ; 0 # So [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO +3248..324F ; 0 # No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE +3250 ; 0 # So PARTNERSHIP SIGN 3251..325F ; 0 # No [15] CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE 3260..327F ; 0 # So [32] CIRCLED HANGUL KIYEOK..KOREAN STANDARD SYMBOL 3280..3289 ; 0 # No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN @@ -993,7 +1007,7 @@ 3300..33FF ; 0 # So [256] SQUARE APAATO..SQUARE GAL 3400..4DB5 ; 0 # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 4DC0..4DFF ; 0 # So [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION -4E00..9FCB ; 0 # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB +4E00..9FCC ; 0 # Lo [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC A000..A014 ; 0 # Lo [21] YI SYLLABLE IT..YI SYLLABLE E A015 ; 0 # Lm YI SYLLABLE WU A016..A48C ; 0 # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR @@ -1026,8 +1040,9 @@ A771..A787 ; 0 # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR A788 ; 0 # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; 0 # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; 0 # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A791 ; 0 # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER -A7A0..A7A9 ; 0 # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE +A790..A793 ; 0 # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR +A7A0..A7AA ; 0 # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK +A7F8..A7F9 ; 0 # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; 0 # L& LATIN LETTER SMALL CAPITAL TURNED M A7FB..A801 ; 0 # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I A802 ; 0 # Mn SYLOTI NAGRI SIGN DVISVARA @@ -1102,6 +1117,14 @@ AAC2 ; 0 # Lo TAI VIET TONE MAI SONG AADB..AADC ; 0 # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG AADD ; 0 # Lm TAI VIET SYMBOL SAM AADE..AADF ; 0 # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI +AAE0..AAEA ; 0 # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAEB ; 0 # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; 0 # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; 0 # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF0..AAF1 ; 0 # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM +AAF2 ; 0 # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; 0 # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF5 ; 0 # Mc MEETEI MAYEK VOWEL SIGN VISARGA AB01..AB06 ; 0 # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO AB09..AB0E ; 0 # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO AB11..AB16 ; 0 # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO @@ -1120,8 +1143,7 @@ AC00..D7A3 ; 0 # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH D7B0..D7C6 ; 0 # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E D7CB..D7FB ; 0 # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH E000..F8FF ; 0 # Co [6400] <private-use-E000>..<private-use-F8FF> -F900..FA2D ; 0 # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; 0 # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +F900..FA6D ; 0 # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; 0 # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB00..FB06 ; 0 # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; 0 # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH @@ -1249,8 +1271,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1003F..1004D ; 0 # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO 10050..1005D ; 0 # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 10080..100FA ; 0 # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 -10100..10101 ; 0 # Po [2] AEGEAN WORD SEPARATOR LINE..AEGEAN WORD SEPARATOR DOT -10102 ; 0 # So AEGEAN CHECK MARK +10100..10102 ; 0 # Po [3] AEGEAN WORD SEPARATOR LINE..AEGEAN CHECK MARK 10107..10133 ; 0 # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND 10137..1013F ; 0 # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT 10140..10174 ; 0 # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS @@ -1289,6 +1310,8 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1091F ; 0 # Po PHOENICIAN WORD SEPARATOR 10920..10939 ; 0 # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C 1093F ; 0 # Po LYDIAN TRIANGULAR MARK +10980..109B7 ; 0 # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; 0 # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; 0 # Lo KHAROSHTHI LETTER A 10A01..10A03 ; 0 # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R 10A05..10A06 ; 0 # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O @@ -1327,11 +1350,40 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 110BB..110BC ; 0 # Po [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN 110BD ; 0 # Cf KAITHI NUMBER SIGN 110BE..110C1 ; 0 # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +110D0..110E8 ; 0 # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +110F0..110F9 ; 0 # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11103..11126 ; 0 # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11127..1112B ; 0 # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; 0 # Mc CHAKMA VOWEL SIGN E +1112D..11132 ; 0 # Mn [6] CHAKMA VOWEL SIGN AI..CHAKMA AU MARK +11136..1113F ; 0 # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +11140..11143 ; 0 # Po [4] CHAKMA SECTION MARK..CHAKMA QUESTION MARK +11180..11181 ; 0 # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; 0 # Mc SHARADA SIGN VISARGA +11183..111B2 ; 0 # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111B3..111B5 ; 0 # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; 0 # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF ; 0 # Mc SHARADA VOWEL SIGN AU +111C1..111C4 ; 0 # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111C5..111C8 ; 0 # Po [4] SHARADA DANDA..SHARADA SEPARATOR +111D0..111D9 ; 0 # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +11680..116AA ; 0 # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116AB ; 0 # Mn TAKRI SIGN ANUSVARA +116AC ; 0 # Mc TAKRI SIGN VISARGA +116AD ; 0 # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; 0 # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; 0 # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116C0..116C9 ; 0 # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 12000..1236E ; 0 # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM 12400..12462 ; 0 # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER 12470..12473 ; 0 # Po [4] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON 13000..1342E ; 0 # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 16800..16A38 ; 0 # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16F00..16F44 ; 0 # Lo [69] MIAO LETTER PA..MIAO LETTER HHA +16F50 ; 0 # Lo MIAO LETTER NASALIZATION +16F51..16F7E ; 0 # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG +16F8F..16F92 ; 0 # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16F93..16F9F ; 0 # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 1B000..1B001 ; 0 # Lo [2] KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE 1D000..1D0F5 ; 0 # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO 1D100..1D126 ; 0 # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 @@ -1386,6 +1438,40 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1D7C3 ; 0 # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL 1D7C4..1D7CB ; 0 # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1D7CE..1D7FF ; 0 # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1EE00..1EE03 ; 0 # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; 0 # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; 0 # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; 0 # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; 0 # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; 0 # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; 0 # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; 0 # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; 0 # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; 0 # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; 0 # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; 0 # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; 0 # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; 0 # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; 0 # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; 0 # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; 0 # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; 0 # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; 0 # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; 0 # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; 0 # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; 0 # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; 0 # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; 0 # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; 0 # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; 0 # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; 0 # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; 0 # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; 0 # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; 0 # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; 0 # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; 0 # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; 0 # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1EEF0..1EEF1 ; 0 # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL 1F000..1F02B ; 0 # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK 1F030..1F093 ; 0 # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06 1F0A0..1F0AE ; 0 # So [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES @@ -1394,7 +1480,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1F0D1..1F0DF ; 0 # So [15] PLAYING CARD ACE OF CLUBS..PLAYING CARD WHITE JOKER 1F100..1F10A ; 0 # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA 1F110..1F12E ; 0 # So [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ -1F130..1F169 ; 0 # So [58] SQUARED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F130..1F16B ; 0 # So [60] SQUARED LATIN CAPITAL LETTER A..RAISED MD SIGN 1F170..1F19A ; 0 # So [43] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VS 1F1E6..1F202 ; 0 # So [29] REGIONAL INDICATOR SYMBOL LETTER A..SQUARED KATAKANA SA 1F210..1F23A ; 0 # So [43] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-55B6 @@ -1412,19 +1498,9 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1F442..1F4F7 ; 0 # So [182] EAR..CAMERA 1F4F9..1F4FC ; 0 # So [4] VIDEO CAMERA..VIDEOCASSETTE 1F500..1F53D ; 0 # So [62] TWISTED RIGHTWARDS ARROWS..DOWN-POINTING SMALL RED TRIANGLE +1F540..1F543 ; 0 # So [4] CIRCLED CROSS POMMEE..NOTCHED LEFT SEMICIRCLE WITH THREE DOTS 1F550..1F567 ; 0 # So [24] CLOCK FACE ONE OCLOCK..CLOCK FACE TWELVE-THIRTY -1F5FB..1F5FF ; 0 # So [5] MOUNT FUJI..MOYAI -1F601..1F610 ; 0 # So [16] GRINNING FACE WITH SMILING EYES..NEUTRAL FACE -1F612..1F614 ; 0 # So [3] UNAMUSED FACE..PENSIVE FACE -1F616 ; 0 # So CONFOUNDED FACE -1F618 ; 0 # So FACE THROWING A KISS -1F61A ; 0 # So KISSING FACE WITH CLOSED EYES -1F61C..1F61E ; 0 # So [3] FACE WITH STUCK-OUT TONGUE AND WINKING EYE..DISAPPOINTED FACE -1F620..1F625 ; 0 # So [6] ANGRY FACE..DISAPPOINTED BUT RELIEVED FACE -1F628..1F62B ; 0 # So [4] FEARFUL FACE..TIRED FACE -1F62D ; 0 # So LOUDLY CRYING FACE -1F630..1F633 ; 0 # So [4] FACE WITH OPEN MOUTH AND COLD SWEAT..FLUSHED FACE -1F635..1F640 ; 0 # So [12] DIZZY FACE..WEARY CAT FACE +1F5FB..1F640 ; 0 # So [70] MOUNT FUJI..WEARY CAT FACE 1F645..1F64F ; 0 # So [11] FACE WITH NO GOOD GESTURE..PERSON WITH FOLDED HANDS 1F680..1F6C5 ; 0 # So [70] ROCKET..LEFT LUGGAGE 1F700..1F773 ; 0 # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE @@ -1438,8 +1514,8 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] <private-use-F0000>..<private-use-FFFFD> 100000..10FFFD; 0 # Co [65534] <private-use-100000>..<private-use-10FFFD> -# The above property value applies to 867195 code points not listed here. -# Total code points: 1113506 +# The above property value applies to 866463 code points not listed here. +# Total code points: 1113459 # ================================================ @@ -1473,8 +1549,9 @@ F0000..FFFFD ; 0 # Co [65534] <private-use-F0000>..<private-use-FFFFD> 1C37 ; 7 # Mn LEPCHA SIGN NUKTA A9B3 ; 7 # Mn JAVANESE SIGN CECAK TELU 110BA ; 7 # Mn KAITHI SIGN NUKTA +116B7 ; 7 # Mn TAKRI SIGN NUKTA -# Total code points: 12 +# Total code points: 13 # ================================================ @@ -1507,22 +1584,27 @@ A9B3 ; 7 # Mn JAVANESE SIGN CECAK TELU 1A60 ; 9 # Mn TAI THAM SIGN SAKOT 1B44 ; 9 # Mc BALINESE ADEG ADEG 1BAA ; 9 # Mc SUNDANESE SIGN PAMAAEH +1BAB ; 9 # Mn SUNDANESE SIGN VIRAMA 1BF2..1BF3 ; 9 # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN 2D7F ; 9 # Mn TIFINAGH CONSONANT JOINER A806 ; 9 # Mn SYLOTI NAGRI SIGN HASANTA A8C4 ; 9 # Mn SAURASHTRA SIGN VIRAMA A953 ; 9 # Mc REJANG VIRAMA A9C0 ; 9 # Mc JAVANESE PANGKON +AAF6 ; 9 # Mn MEETEI MAYEK VIRAMA ABED ; 9 # Mn MEETEI MAYEK APUN IYEK 10A3F ; 9 # Mn KHAROSHTHI VIRAMA 11046 ; 9 # Mn BRAHMI VIRAMA 110B9 ; 9 # Mn KAITHI SIGN VIRAMA +11133..11134 ; 9 # Mn [2] CHAKMA VIRAMA..CHAKMA MAAYYAA +111C0 ; 9 # Mc SHARADA SIGN VIRAMA +116B6 ; 9 # Mc TAKRI SIGN VIRAMA -# Total code points: 31 +# Total code points: 37 # ================================================ -# Canonical_Combining_Class=10 +# Canonical_Combining_Class=CCC10 05B0 ; 10 # Mn HEBREW POINT SHEVA @@ -1530,7 +1612,7 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK # ================================================ -# Canonical_Combining_Class=11 +# Canonical_Combining_Class=CCC11 05B1 ; 11 # Mn HEBREW POINT HATAF SEGOL @@ -1538,7 +1620,7 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK # ================================================ -# Canonical_Combining_Class=12 +# Canonical_Combining_Class=CCC12 05B2 ; 12 # Mn HEBREW POINT HATAF PATAH @@ -1546,7 +1628,7 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK # ================================================ -# Canonical_Combining_Class=13 +# Canonical_Combining_Class=CCC13 05B3 ; 13 # Mn HEBREW POINT HATAF QAMATS @@ -1554,7 +1636,7 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK # ================================================ -# Canonical_Combining_Class=14 +# Canonical_Combining_Class=CCC14 05B4 ; 14 # Mn HEBREW POINT HIRIQ @@ -1562,7 +1644,7 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK # ================================================ -# Canonical_Combining_Class=15 +# Canonical_Combining_Class=CCC15 05B5 ; 15 # Mn HEBREW POINT TSERE @@ -1570,7 +1652,7 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK # ================================================ -# Canonical_Combining_Class=16 +# Canonical_Combining_Class=CCC16 05B6 ; 16 # Mn HEBREW POINT SEGOL @@ -1578,7 +1660,7 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK # ================================================ -# Canonical_Combining_Class=17 +# Canonical_Combining_Class=CCC17 05B7 ; 17 # Mn HEBREW POINT PATAH @@ -1586,7 +1668,7 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK # ================================================ -# Canonical_Combining_Class=18 +# Canonical_Combining_Class=CCC18 05B8 ; 18 # Mn HEBREW POINT QAMATS 05C7 ; 18 # Mn HEBREW POINT QAMATS QATAN @@ -1595,7 +1677,7 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK # ================================================ -# Canonical_Combining_Class=19 +# Canonical_Combining_Class=CCC19 05B9..05BA ; 19 # Mn [2] HEBREW POINT HOLAM..HEBREW POINT HOLAM HASER FOR VAV @@ -1603,7 +1685,7 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK # ================================================ -# Canonical_Combining_Class=20 +# Canonical_Combining_Class=CCC20 05BB ; 20 # Mn HEBREW POINT QUBUTS @@ -1611,7 +1693,7 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK # ================================================ -# Canonical_Combining_Class=21 +# Canonical_Combining_Class=CCC21 05BC ; 21 # Mn HEBREW POINT DAGESH OR MAPIQ @@ -1619,7 +1701,7 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK # ================================================ -# Canonical_Combining_Class=22 +# Canonical_Combining_Class=CCC22 05BD ; 22 # Mn HEBREW POINT METEG @@ -1627,7 +1709,7 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK # ================================================ -# Canonical_Combining_Class=23 +# Canonical_Combining_Class=CCC23 05BF ; 23 # Mn HEBREW POINT RAFE @@ -1635,7 +1717,7 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK # ================================================ -# Canonical_Combining_Class=24 +# Canonical_Combining_Class=CCC24 05C1 ; 24 # Mn HEBREW POINT SHIN DOT @@ -1643,7 +1725,7 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK # ================================================ -# Canonical_Combining_Class=25 +# Canonical_Combining_Class=CCC25 05C2 ; 25 # Mn HEBREW POINT SIN DOT @@ -1651,7 +1733,7 @@ ABED ; 9 # Mn MEETEI MAYEK APUN IYEK # ================================================ -# Canonical_Combining_Class=26 +# Canonical_Combining_Class=CCC26 FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA @@ -1659,31 +1741,34 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA # ================================================ -# Canonical_Combining_Class=27 +# Canonical_Combining_Class=CCC27 064B ; 27 # Mn ARABIC FATHATAN +08F0 ; 27 # Mn ARABIC OPEN FATHATAN -# Total code points: 1 +# Total code points: 2 # ================================================ -# Canonical_Combining_Class=28 +# Canonical_Combining_Class=CCC28 064C ; 28 # Mn ARABIC DAMMATAN +08F1 ; 28 # Mn ARABIC OPEN DAMMATAN -# Total code points: 1 +# Total code points: 2 # ================================================ -# Canonical_Combining_Class=29 +# Canonical_Combining_Class=CCC29 064D ; 29 # Mn ARABIC KASRATAN +08F2 ; 29 # Mn ARABIC OPEN KASRATAN -# Total code points: 1 +# Total code points: 2 # ================================================ -# Canonical_Combining_Class=30 +# Canonical_Combining_Class=CCC30 0618 ; 30 # Mn ARABIC SMALL FATHA 064E ; 30 # Mn ARABIC FATHA @@ -1692,7 +1777,7 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA # ================================================ -# Canonical_Combining_Class=31 +# Canonical_Combining_Class=CCC31 0619 ; 31 # Mn ARABIC SMALL DAMMA 064F ; 31 # Mn ARABIC DAMMA @@ -1701,7 +1786,7 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA # ================================================ -# Canonical_Combining_Class=32 +# Canonical_Combining_Class=CCC32 061A ; 32 # Mn ARABIC SMALL KASRA 0650 ; 32 # Mn ARABIC KASRA @@ -1710,7 +1795,7 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA # ================================================ -# Canonical_Combining_Class=33 +# Canonical_Combining_Class=CCC33 0651 ; 33 # Mn ARABIC SHADDA @@ -1718,7 +1803,7 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA # ================================================ -# Canonical_Combining_Class=34 +# Canonical_Combining_Class=CCC34 0652 ; 34 # Mn ARABIC SUKUN @@ -1726,7 +1811,7 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA # ================================================ -# Canonical_Combining_Class=35 +# Canonical_Combining_Class=CCC35 0670 ; 35 # Mn ARABIC LETTER SUPERSCRIPT ALEF @@ -1734,7 +1819,7 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA # ================================================ -# Canonical_Combining_Class=36 +# Canonical_Combining_Class=CCC36 0711 ; 36 # Mn SYRIAC LETTER SUPERSCRIPT ALAPH @@ -1742,7 +1827,7 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA # ================================================ -# Canonical_Combining_Class=84 +# Canonical_Combining_Class=CCC84 0C55 ; 84 # Mn TELUGU LENGTH MARK @@ -1750,7 +1835,7 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA # ================================================ -# Canonical_Combining_Class=91 +# Canonical_Combining_Class=CCC91 0C56 ; 91 # Mn TELUGU AI LENGTH MARK @@ -1758,7 +1843,7 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA # ================================================ -# Canonical_Combining_Class=103 +# Canonical_Combining_Class=CCC103 0E38..0E39 ; 103 # Mn [2] THAI CHARACTER SARA U..THAI CHARACTER SARA UU @@ -1766,7 +1851,7 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA # ================================================ -# Canonical_Combining_Class=107 +# Canonical_Combining_Class=CCC107 0E48..0E4B ; 107 # Mn [4] THAI CHARACTER MAI EK..THAI CHARACTER MAI CHATTAWA @@ -1774,7 +1859,7 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA # ================================================ -# Canonical_Combining_Class=118 +# Canonical_Combining_Class=CCC118 0EB8..0EB9 ; 118 # Mn [2] LAO VOWEL SIGN U..LAO VOWEL SIGN UU @@ -1782,7 +1867,7 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA # ================================================ -# Canonical_Combining_Class=122 +# Canonical_Combining_Class=CCC122 0EC8..0ECB ; 122 # Mn [4] LAO TONE MAI EK..LAO TONE MAI CATAWA @@ -1790,7 +1875,7 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA # ================================================ -# Canonical_Combining_Class=129 +# Canonical_Combining_Class=CCC129 0F71 ; 129 # Mn TIBETAN VOWEL SIGN AA @@ -1798,7 +1883,7 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA # ================================================ -# Canonical_Combining_Class=130 +# Canonical_Combining_Class=CCC130 0F72 ; 130 # Mn TIBETAN VOWEL SIGN I 0F7A..0F7D ; 130 # Mn [4] TIBETAN VOWEL SIGN E..TIBETAN VOWEL SIGN OO @@ -1808,7 +1893,7 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA # ================================================ -# Canonical_Combining_Class=132 +# Canonical_Combining_Class=CCC133 0F74 ; 132 # Mn TIBETAN VOWEL SIGN U @@ -1887,6 +1972,11 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA 0748 ; 220 # Mn SYRIAC OBLIQUE LINE BELOW 07F2 ; 220 # Mn NKO COMBINING NASALIZATION MARK 0859..085B ; 220 # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +08E6 ; 220 # Mn ARABIC CURLY KASRA +08E9 ; 220 # Mn ARABIC CURLY KASRATAN +08ED..08EF ; 220 # Mn [3] ARABIC TONE ONE DOT BELOW..ARABIC TONE LOOP BELOW +08F6 ; 220 # Mn ARABIC KASRA WITH DOT BELOW +08F9..08FA ; 220 # Mn [2] ARABIC LEFT ARROWHEAD BELOW..ARABIC RIGHT ARROWHEAD BELOW 0952 ; 220 # Mn DEVANAGARI STRESS SIGN ANUDATTA 0F18..0F19 ; 220 # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS 0F35 ; 220 # Mn TIBETAN MARK NGAS BZUNG NYI ZLA @@ -1915,7 +2005,7 @@ AAB4 ; 220 # Mn TAI VIET VOWEL U 1D17B..1D182 ; 220 # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE 1D18A..1D18B ; 220 # Mn [2] MUSICAL SYMBOL COMBINING DOUBLE TONGUE..MUSICAL SYMBOL COMBINING TRIPLE TONGUE -# Total code points: 121 +# Total code points: 129 # ================================================ @@ -1932,7 +2022,7 @@ AAB4 ; 220 # Mn TAI VIET VOWEL U # Canonical_Combining_Class=Left -302E..302F ; 224 # Mn [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK +302E..302F ; 224 # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK # Total code points: 2 @@ -1999,6 +2089,12 @@ AAB4 ; 220 # Mn TAI VIET VOWEL U 081B..0823 ; 230 # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A 0825..0827 ; 230 # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; 230 # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA +08E4..08E5 ; 230 # Mn [2] ARABIC CURLY FATHA..ARABIC CURLY DAMMA +08E7..08E8 ; 230 # Mn [2] ARABIC CURLY FATHATAN..ARABIC CURLY DAMMATAN +08EA..08EC ; 230 # Mn [3] ARABIC TONE ONE DOT ABOVE..ARABIC TONE LOOP ABOVE +08F3..08F5 ; 230 # Mn [3] ARABIC SMALL HIGH WAW..ARABIC FATHA WITH DOT ABOVE +08F7..08F8 ; 230 # Mn [2] ARABIC LEFT ARROWHEAD ABOVE..ARABIC RIGHT ARROWHEAD ABOVE +08FB..08FE ; 230 # Mn [4] ARABIC DOUBLE RIGHT ARROWHEAD ABOVE..ARABIC DAMMA WITH DOT 0951 ; 230 # Mn DEVANAGARI STRESS SIGN UDATTA 0953..0954 ; 230 # Mn [2] DEVANAGARI GRAVE ACCENT..DEVANAGARI ACUTE ACCENT 0F82..0F83 ; 230 # Mn [2] TIBETAN SIGN NYI ZLA NAA DA..TIBETAN SIGN SNA LDAN @@ -2013,6 +2109,7 @@ AAB4 ; 220 # Mn TAI VIET VOWEL U 1CD0..1CD2 ; 230 # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA 1CDA..1CDB ; 230 # Mn [2] VEDIC TONE DOUBLE SVARITA..VEDIC TONE TRIPLE SVARITA 1CE0 ; 230 # Mn VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA +1CF4 ; 230 # Mn VEDIC TONE CANDRA ABOVE 1DC0..1DC1 ; 230 # Mn [2] COMBINING DOTTED GRAVE ACCENT..COMBINING DOTTED ACUTE ACCENT 1DC3..1DC9 ; 230 # Mn [7] COMBINING SUSPENSION MARK..COMBINING ACUTE-GRAVE-ACUTE 1DCB..1DCC ; 230 # Mn [2] COMBINING BREVE-MACRON..COMBINING MACRON-BREVE @@ -2028,7 +2125,8 @@ AAB4 ; 220 # Mn TAI VIET VOWEL U 2CEF..2CF1 ; 230 # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS 2DE0..2DFF ; 230 # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS A66F ; 230 # Mn COMBINING CYRILLIC VZMET -A67C..A67D ; 230 # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A674..A67D ; 230 # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A69F ; 230 # Mn COMBINING CYRILLIC LETTER IOTIFIED E A6F0..A6F1 ; 230 # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS A8E0..A8F1 ; 230 # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA AAB0 ; 230 # Mn TAI VIET MAI KANG @@ -2039,11 +2137,12 @@ AAC1 ; 230 # Mn TAI VIET TONE MAI THO FE20..FE26 ; 230 # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON 10A0F ; 230 # Mn KHAROSHTHI SIGN VISARGA 10A38 ; 230 # Mn KHAROSHTHI SIGN BAR ABOVE +11100..11102 ; 230 # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA 1D185..1D189 ; 230 # Mn [5] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING BEND 1D1AA..1D1AD ; 230 # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO 1D242..1D244 ; 230 # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME -# Total code points: 320 +# Total code points: 349 # ================================================ diff --git a/lib/unicore/extracted/DDecompositionType.txt b/lib/unicore/extracted/DDecompositionType.txt index ed03af79aa..7a40724f0c 100644 --- a/lib/unicore/extracted/DDecompositionType.txt +++ b/lib/unicore/extracted/DDecompositionType.txt @@ -1,8 +1,8 @@ -# DerivedDecompositionType-6.0.0.txt -# Date: 2010-05-18, 00:49:11 GMT [MD] +# DerivedDecompositionType-6.1.0.txt +# Date: 2011-07-25, 00:54:13 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -238,8 +238,7 @@ FA15..FA1E ; Canonical # Lo [10] CJK COMPATIBILITY IDEOGRAPH-FA15..CJK COMPA FA20 ; Canonical # Lo CJK COMPATIBILITY IDEOGRAPH-FA20 FA22 ; Canonical # Lo CJK COMPATIBILITY IDEOGRAPH-FA22 FA25..FA26 ; Canonical # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26 -FA2A..FA2D ; Canonical # Lo [4] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; Canonical # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA2A..FA6D ; Canonical # Lo [68] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; Canonical # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB1D ; Canonical # Lo HEBREW LETTER YOD WITH HIRIQ FB1F ; Canonical # Lo HEBREW LIGATURE YIDDISH YOD YOD PATAH @@ -252,11 +251,12 @@ FB46..FB4E ; Canonical # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBREW LET 1109A ; Canonical # Lo KAITHI LETTER DDDHA 1109C ; Canonical # Lo KAITHI LETTER RHA 110AB ; Canonical # Lo KAITHI LETTER VA +1112E..1112F ; Canonical # Mn [2] CHAKMA VOWEL SIGN O..CHAKMA VOWEL SIGN AU 1D15E..1D164 ; Canonical # So [7] MUSICAL SYMBOL HALF NOTE..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE 1D1BB..1D1C0 ; Canonical # So [6] MUSICAL SYMBOL MINIMA..MUSICAL SYMBOL FUSA BLACK 2F800..2FA1D ; Canonical # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 13221 +# Total code points: 13225 # ================================================ @@ -400,8 +400,41 @@ FB29 ; Font # Sm HEBREW LETTER ALTERNATIVE PLUS SIGN 1D7C3 ; Font # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL 1D7C4..1D7CB ; Font # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1D7CE..1D7FF ; Font # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE - -# Total code points: 1043 +1EE00..1EE03 ; Font # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; Font # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; Font # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; Font # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; Font # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; Font # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; Font # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; Font # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; Font # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; Font # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; Font # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; Font # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; Font # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; Font # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; Font # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; Font # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; Font # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; Font # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; Font # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; Font # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; Font # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; Font # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; Font # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; Font # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; Font # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; Font # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; Font # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; Font # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; Font # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; Font # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; Font # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; Font # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; Font # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN + +# Total code points: 1184 # ================================================ @@ -793,10 +826,10 @@ FEFB ; Isolated # Lo ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM # ================================================ -00AA ; Super # L& FEMININE ORDINAL INDICATOR +00AA ; Super # Lo FEMININE ORDINAL INDICATOR 00B2..00B3 ; Super # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE 00B9 ; Super # No SUPERSCRIPT ONE -00BA ; Super # L& MASCULINE ORDINAL INDICATOR +00BA ; Super # Lo MASCULINE ORDINAL INDICATOR 02B0..02B8 ; Super # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y 02E0..02E4 ; Super # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP 10FC ; Super # Lm MODIFIER LETTER GEORGIAN NAR @@ -820,18 +853,20 @@ FEFB ; Isolated # Lo ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM 3192..3195 ; Super # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK 3196..319F ; Super # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK A770 ; Super # Lm MODIFIER LETTER US +A7F8..A7F9 ; Super # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +1F16A..1F16B ; Super # So [2] RAISED MC SIGN..RAISED MD SIGN -# Total code points: 142 +# Total code points: 146 # ================================================ -1D62..1D6A ; Sub # L& [9] LATIN SUBSCRIPT SMALL LETTER I..GREEK SUBSCRIPT SMALL LETTER CHI +1D62..1D6A ; Sub # Lm [9] LATIN SUBSCRIPT SMALL LETTER I..GREEK SUBSCRIPT SMALL LETTER CHI 2080..2089 ; Sub # No [10] SUBSCRIPT ZERO..SUBSCRIPT NINE 208A..208C ; Sub # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN 208D ; Sub # Ps SUBSCRIPT LEFT PARENTHESIS 208E ; Sub # Pe SUBSCRIPT RIGHT PARENTHESIS 2090..209C ; Sub # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T -2C7C ; Sub # L& LATIN SUBSCRIPT SMALL LETTER J +2C7C ; Sub # Lm LATIN SUBSCRIPT SMALL LETTER J # Total code points: 38 diff --git a/lib/unicore/extracted/DEastAsianWidth.txt b/lib/unicore/extracted/DEastAsianWidth.txt index 778bde9f66..f55967a245 100644 --- a/lib/unicore/extracted/DEastAsianWidth.txt +++ b/lib/unicore/extracted/DEastAsianWidth.txt @@ -1,8 +1,8 @@ -# DerivedEastAsianWidth-6.0.0.txt -# Date: 2010-08-19, 00:48:08 GMT [MD] +# DerivedEastAsianWidth-6.1.0.txt +# Date: 2011-11-27, 05:10:22 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -118,6 +118,7 @@ 0561..0587 ; N # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN 0589 ; N # Po ARMENIAN FULL STOP 058A ; N # Pd ARMENIAN HYPHEN +058F ; N # Sc ARMENIAN DRAM SIGN 0591..05BD ; N # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG 05BE ; N # Pd HEBREW PUNCTUATION MAQAF 05BF ; N # Mn HEBREW POINT RAFE @@ -130,7 +131,7 @@ 05D0..05EA ; N # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV 05F0..05F2 ; N # Lo [3] HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD 05F3..05F4 ; N # Po [2] HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM -0600..0603 ; N # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA +0600..0604 ; N # Cf [5] ARABIC NUMBER SIGN..ARABIC SIGN SAMVAT 0606..0608 ; N # Sm [3] ARABIC-INDIC CUBE ROOT..ARABIC RAY 0609..060A ; N # Po [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN 060B ; N # Sc AFGHANI SIGN @@ -191,6 +192,9 @@ 0840..0858 ; N # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 0859..085B ; N # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK 085E ; N # Po MANDAIC PUNCTUATION +08A0 ; N # Lo ARABIC LETTER BEH WITH SMALL V BELOW +08A2..08AC ; N # Lo [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH +08E4..08FE ; N # Mn [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT 0900..0902 ; N # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 0903 ; N # Mc DEVANAGARI SIGN VISARGA 0904..0939 ; N # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA @@ -280,6 +284,7 @@ 0AE0..0AE1 ; N # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL 0AE2..0AE3 ; N # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL 0AE6..0AEF ; N # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE +0AF0 ; N # Po GUJARATI ABBREVIATION SIGN 0AF1 ; N # Sc GUJARATI RUPEE SIGN 0B01 ; N # Mn ORIYA SIGN CANDRABINDU 0B02..0B03 ; N # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA @@ -433,11 +438,13 @@ 0EC6 ; N # Lm LAO KO LA 0EC8..0ECD ; N # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA 0ED0..0ED9 ; N # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE -0EDC..0EDD ; N # Lo [2] LAO HO NO..LAO HO MO +0EDC..0EDF ; N # Lo [4] LAO HO NO..LAO LETTER KHMU NYO 0F00 ; N # Lo TIBETAN SYLLABLE OM 0F01..0F03 ; N # So [3] TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA 0F04..0F12 ; N # Po [15] TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK RGYA GRAM SHAD -0F13..0F17 ; N # So [5] TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS +0F13 ; N # So TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN +0F14 ; N # Po TIBETAN MARK GTER TSHEG +0F15..0F17 ; N # So [3] TIBETAN LOGOTYPE SIGN CHAD RTAGS..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS 0F18..0F19 ; N # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS 0F1A..0F1F ; N # So [6] TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RDEL DKAR RDEL NAG 0F20..0F29 ; N # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE @@ -506,9 +513,12 @@ 109D ; N # Mn MYANMAR VOWEL SIGN AITON AI 109E..109F ; N # So [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION 10A0..10C5 ; N # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; N # L& GEORGIAN CAPITAL LETTER YN +10CD ; N # L& GEORGIAN CAPITAL LETTER AEN 10D0..10FA ; N # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN 10FB ; N # Po GEORGIAN PARAGRAPH SEPARATOR 10FC ; N # Lm MODIFIER LETTER GEORGIAN NAR +10FD..10FF ; N # Lo [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN 1160..11A2 ; N # Lo [67] HANGUL JUNGSEONG FILLER..HANGUL JUNGSEONG SSANGARAEA 11A8..11F9 ; N # Lo [82] HANGUL JONGSEONG KIYEOK..HANGUL JONGSEONG YEORINHIEUH 1200..1248 ; N # Lo [73] ETHIOPIC SYLLABLE HA..ETHIOPIC SYLLABLE QWA @@ -528,8 +538,7 @@ 1312..1315 ; N # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE 1318..135A ; N # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA 135D..135F ; N # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK -1360 ; N # So ETHIOPIC SECTION MARK -1361..1368 ; N # Po [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR +1360..1368 ; N # Po [9] ETHIOPIC SECTION MARK..ETHIOPIC PARAGRAPH SEPARATOR 1369..137C ; N # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND 1380..138F ; N # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE 1390..1399 ; N # So [10] ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT @@ -557,7 +566,7 @@ 176E..1770 ; N # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA 1772..1773 ; N # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U 1780..17B3 ; N # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU -17B4..17B5 ; N # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B4..17B5 ; N # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 17B6 ; N # Mc KHMER VOWEL SIGN AA 17B7..17BD ; N # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA 17BE..17C5 ; N # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU @@ -653,9 +662,11 @@ 1BA6..1BA7 ; N # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG 1BA8..1BA9 ; N # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG 1BAA ; N # Mc SUNDANESE SIGN PAMAAEH +1BAB ; N # Mn SUNDANESE SIGN VIRAMA +1BAC..1BAD ; N # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA 1BAE..1BAF ; N # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA 1BB0..1BB9 ; N # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE -1BC0..1BE5 ; N # Lo [38] BATAK LETTER A..BATAK LETTER U +1BBA..1BE5 ; N # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U 1BE6 ; N # Mn BATAK SIGN TOMPI 1BE7 ; N # Mc BATAK VOWEL SIGN E 1BE8..1BE9 ; N # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE @@ -677,6 +688,7 @@ 1C5A..1C77 ; N # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; N # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD 1C7E..1C7F ; N # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD +1CC0..1CC7 ; N # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA 1CD0..1CD2 ; N # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA 1CD3 ; N # Po VEDIC SIGN NIHSHVASA 1CD4..1CE0 ; N # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA @@ -685,10 +697,12 @@ 1CE9..1CEC ; N # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL 1CED ; N # Mn VEDIC SIGN TIRYAK 1CEE..1CF1 ; N # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA -1CF2 ; N # Mc VEDIC SIGN ARDHAVISARGA +1CF2..1CF3 ; N # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF4 ; N # Mn VEDIC TONE CANDRA ABOVE +1CF5..1CF6 ; N # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 1D00..1D2B ; N # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D61 ; N # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D77 ; N # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D2C..1D6A ; N # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; N # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; N # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; N # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; N # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA @@ -930,9 +944,7 @@ 27C0..27C4 ; N # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET 27C5 ; N # Ps LEFT S-SHAPED BAG DELIMITER 27C6 ; N # Pe RIGHT S-SHAPED BAG DELIMITER -27C7..27CA ; N # Sm [4] OR WITH DOT INSIDE..VERTICAL BAR WITH HORIZONTAL STROKE -27CC ; N # Sm LONG DIVISION -27CE..27E5 ; N # Sm [24] SQUARED LOGICAL AND..WHITE SQUARE WITH RIGHTWARDS TICK +27C7..27E5 ; N # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK 27EE ; N # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS 27EF ; N # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS 27F0..27FF ; N # Sm [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW @@ -974,17 +986,20 @@ 2B50..2B54 ; N # So [5] WHITE MEDIUM STAR..WHITE RIGHT-POINTING PENTAGON 2C00..2C2E ; N # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 2C30..2C5E ; N # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE -2C60..2C7C ; N # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; N # Lm MODIFIER LETTER CAPITAL V +2C60..2C7B ; N # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; N # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; N # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI 2CE5..2CEA ; N # So [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA 2CEB..2CEE ; N # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA 2CEF..2CF1 ; N # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2CF2..2CF3 ; N # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2CF9..2CFC ; N # Po [4] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN VERSE DIVIDER 2CFD ; N # No COPTIC FRACTION ONE HALF 2CFE..2CFF ; N # Po [2] COPTIC FULL STOP..COPTIC MORPHOLOGICAL DIVIDER 2D00..2D25 ; N # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE -2D30..2D65 ; N # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D27 ; N # L& GEORGIAN SMALL LETTER YN +2D2D ; N # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; N # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO 2D6F ; N # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D70 ; N # Po TIFINAGH SEPARATOR MARK 2D7F ; N # Mn TIFINAGH CONSONANT JOINER @@ -1029,7 +1044,8 @@ 2E29 ; N # Pe RIGHT DOUBLE PARENTHESIS 2E2A..2E2E ; N # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK 2E2F ; N # Lm VERTICAL TILDE -2E30..2E31 ; N # Po [2] RING POINT..WORD SEPARATOR MIDDLE DOT +2E30..2E39 ; N # Po [10] RING POINT..TOP HALF SECTION SIGN +2E3A..2E3B ; N # Pd [2] TWO-EM DASH..THREE-EM DASH 303F ; N # So IDEOGRAPHIC HALF FILL SPACE 4DC0..4DFF ; N # So [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION A4D0..A4F7 ; N # Lo [40] LISU LETTER BA..LISU LETTER OE @@ -1046,10 +1062,11 @@ A66E ; N # Lo CYRILLIC LETTER MULTIOCULAR O A66F ; N # Mn COMBINING CYRILLIC VZMET A670..A672 ; N # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN A673 ; N # Po SLAVONIC ASTERISK -A67C..A67D ; N # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A674..A67D ; N # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK A67E ; N # Po CYRILLIC KAVYKA A67F ; N # Lm CYRILLIC PAYEROK A680..A697 ; N # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE +A69F ; N # Mn COMBINING CYRILLIC LETTER IOTIFIED E A6A0..A6E5 ; N # Lo [70] BAMUM LETTER A..BAMUM LETTER KI A6E6..A6EF ; N # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM A6F0..A6F1 ; N # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS @@ -1063,8 +1080,9 @@ A771..A787 ; N # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR A788 ; N # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; N # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; N # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A791 ; N # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER -A7A0..A7A9 ; N # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE +A790..A793 ; N # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR +A7A0..A7AA ; N # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK +A7F8..A7F9 ; N # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; N # L& LATIN LETTER SMALL CAPITAL TURNED M A7FB..A801 ; N # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I A802 ; N # Mn SYLOTI NAGRI SIGN DVISVARA @@ -1147,6 +1165,15 @@ AAC2 ; N # Lo TAI VIET TONE MAI SONG AADB..AADC ; N # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG AADD ; N # Lm TAI VIET SYMBOL SAM AADE..AADF ; N # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI +AAE0..AAEA ; N # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAEB ; N # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; N # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; N # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF0..AAF1 ; N # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM +AAF2 ; N # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; N # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF5 ; N # Mc MEETEI MAYEK VOWEL SIGN VISARGA +AAF6 ; N # Mn MEETEI MAYEK VIRAMA AB01..AB06 ; N # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO AB09..AB0E ; N # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO AB11..AB16 ; N # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO @@ -1196,8 +1223,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 1003F..1004D ; N # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO 10050..1005D ; N # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 10080..100FA ; N # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 -10100..10101 ; N # Po [2] AEGEAN WORD SEPARATOR LINE..AEGEAN WORD SEPARATOR DOT -10102 ; N # So AEGEAN CHECK MARK +10100..10102 ; N # Po [3] AEGEAN WORD SEPARATOR LINE..AEGEAN CHECK MARK 10107..10133 ; N # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND 10137..1013F ; N # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT 10140..10174 ; N # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS @@ -1237,6 +1263,8 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 1091F ; N # Po PHOENICIAN WORD SEPARATOR 10920..10939 ; N # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C 1093F ; N # Po LYDIAN TRIANGULAR MARK +10980..109B7 ; N # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; N # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; N # Lo KHAROSHTHI LETTER A 10A01..10A03 ; N # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R 10A05..10A06 ; N # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O @@ -1277,11 +1305,43 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 110BB..110BC ; N # Po [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN 110BD ; N # Cf KAITHI NUMBER SIGN 110BE..110C1 ; N # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +110D0..110E8 ; N # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +110F0..110F9 ; N # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11100..11102 ; N # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11103..11126 ; N # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11127..1112B ; N # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; N # Mc CHAKMA VOWEL SIGN E +1112D..11134 ; N # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11136..1113F ; N # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +11140..11143 ; N # Po [4] CHAKMA SECTION MARK..CHAKMA QUESTION MARK +11180..11181 ; N # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; N # Mc SHARADA SIGN VISARGA +11183..111B2 ; N # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111B3..111B5 ; N # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; N # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF..111C0 ; N # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +111C1..111C4 ; N # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111C5..111C8 ; N # Po [4] SHARADA DANDA..SHARADA SEPARATOR +111D0..111D9 ; N # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +11680..116AA ; N # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116AB ; N # Mn TAKRI SIGN ANUSVARA +116AC ; N # Mc TAKRI SIGN VISARGA +116AD ; N # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; N # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; N # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B6 ; N # Mc TAKRI SIGN VIRAMA +116B7 ; N # Mn TAKRI SIGN NUKTA +116C0..116C9 ; N # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 12000..1236E ; N # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM 12400..12462 ; N # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER 12470..12473 ; N # Po [4] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON 13000..1342E ; N # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 16800..16A38 ; N # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16F00..16F44 ; N # Lo [69] MIAO LETTER PA..MIAO LETTER HHA +16F50 ; N # Lo MIAO LETTER NASALIZATION +16F51..16F7E ; N # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG +16F8F..16F92 ; N # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16F93..16F9F ; N # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 1D000..1D0F5 ; N # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO 1D100..1D126 ; N # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 1D129..1D164 ; N # So [60] MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE @@ -1342,6 +1402,40 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 1D7C3 ; N # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL 1D7C4..1D7CB ; N # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1D7CE..1D7FF ; N # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1EE00..1EE03 ; N # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; N # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; N # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; N # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; N # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; N # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; N # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; N # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; N # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; N # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; N # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; N # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; N # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; N # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; N # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; N # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; N # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; N # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; N # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; N # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; N # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; N # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; N # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; N # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; N # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; N # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; N # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; N # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; N # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; N # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; N # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; N # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; N # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1EEF0..1EEF1 ; N # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL 1F000..1F02B ; N # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK 1F030..1F093 ; N # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06 1F0A0..1F0AE ; N # So [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES @@ -1349,6 +1443,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 1F0C1..1F0CF ; N # So [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER 1F0D1..1F0DF ; N # So [15] PLAYING CARD ACE OF CLUBS..PLAYING CARD WHITE JOKER 1F12E ; N # So CIRCLED WZ +1F16A..1F16B ; N # So [2] RAISED MC SIGN..RAISED MD SIGN 1F1E6..1F1FF ; N # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z 1F300..1F320 ; N # So [33] CYCLONE..SHOOTING STAR 1F330..1F335 ; N # So [6] CHESTNUT..CACTUS @@ -1362,26 +1457,16 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 1F442..1F4F7 ; N # So [182] EAR..CAMERA 1F4F9..1F4FC ; N # So [4] VIDEO CAMERA..VIDEOCASSETTE 1F500..1F53D ; N # So [62] TWISTED RIGHTWARDS ARROWS..DOWN-POINTING SMALL RED TRIANGLE +1F540..1F543 ; N # So [4] CIRCLED CROSS POMMEE..NOTCHED LEFT SEMICIRCLE WITH THREE DOTS 1F550..1F567 ; N # So [24] CLOCK FACE ONE OCLOCK..CLOCK FACE TWELVE-THIRTY -1F5FB..1F5FF ; N # So [5] MOUNT FUJI..MOYAI -1F601..1F610 ; N # So [16] GRINNING FACE WITH SMILING EYES..NEUTRAL FACE -1F612..1F614 ; N # So [3] UNAMUSED FACE..PENSIVE FACE -1F616 ; N # So CONFOUNDED FACE -1F618 ; N # So FACE THROWING A KISS -1F61A ; N # So KISSING FACE WITH CLOSED EYES -1F61C..1F61E ; N # So [3] FACE WITH STUCK-OUT TONGUE AND WINKING EYE..DISAPPOINTED FACE -1F620..1F625 ; N # So [6] ANGRY FACE..DISAPPOINTED BUT RELIEVED FACE -1F628..1F62B ; N # So [4] FEARFUL FACE..TIRED FACE -1F62D ; N # So LOUDLY CRYING FACE -1F630..1F633 ; N # So [4] FACE WITH OPEN MOUTH AND COLD SWEAT..FLUSHED FACE -1F635..1F640 ; N # So [12] DIZZY FACE..WEARY CAT FACE +1F5FB..1F640 ; N # So [70] MOUNT FUJI..WEARY CAT FACE 1F645..1F64F ; N # So [11] FACE WITH NO GOOD GESTURE..PERSON WITH FOLDED HANDS 1F680..1F6C5 ; N # So [70] ROCKET..LEFT LUGGAGE 1F700..1F773 ; N # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 783647 code points not listed here. +# The above property value applies to 782918 code points not listed here. # Total code points: 801811 # ================================================ @@ -1390,20 +1475,19 @@ E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG 00A1 ; A # Po INVERTED EXCLAMATION MARK 00A4 ; A # Sc CURRENCY SIGN -00A7 ; A # So SECTION SIGN +00A7 ; A # Po SECTION SIGN 00A8 ; A # Sk DIAERESIS -00AA ; A # L& FEMININE ORDINAL INDICATOR +00AA ; A # Lo FEMININE ORDINAL INDICATOR 00AD ; A # Cf SOFT HYPHEN 00AE ; A # So REGISTERED SIGN 00B0 ; A # So DEGREE SIGN 00B1 ; A # Sm PLUS-MINUS SIGN 00B2..00B3 ; A # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE 00B4 ; A # Sk ACUTE ACCENT -00B6 ; A # So PILCROW SIGN -00B7 ; A # Po MIDDLE DOT +00B6..00B7 ; A # Po [2] PILCROW SIGN..MIDDLE DOT 00B8 ; A # Sk CEDILLA 00B9 ; A # No SUPERSCRIPT ONE -00BA ; A # L& MASCULINE ORDINAL INDICATOR +00BA ; A # Lo MASCULINE ORDINAL INDICATOR 00BC..00BE ; A # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS 00BF ; A # Po INVERTED QUESTION MARK 00C6 ; A # L& LATIN CAPITAL LETTER AE @@ -1570,7 +1654,7 @@ E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG 2757 ; A # So HEAVY EXCLAMATION MARK SYMBOL 2776..277F ; A # No [10] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED NUMBER TEN 2B55..2B59 ; A # So [5] HEAVY LARGE CIRCLE..HEAVY CIRCLED SALTIRE -3248..324F ; A # So [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE +3248..324F ; A # No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE E000..F8FF ; A # Co [6400] <private-use-E000>..<private-use-F8FF> FE00..FE0F ; A # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 FFFD ; A # So REPLACEMENT CHARACTER @@ -1650,7 +1734,8 @@ FFED..FFEE ; H # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 301E..301F ; W # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK 3020 ; W # So POSTAL MARK FACE 3021..3029 ; W # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE -302A..302F ; W # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK +302A..302D ; W # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; W # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK 3030 ; W # Pd WAVY DASH 3031..3035 ; W # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF 3036..3037 ; W # So [2] CIRCLED POSTAL MARK..IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL @@ -1690,8 +1775,8 @@ FFED..FFEE ; H # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 3300..33FF ; W # So [256] SQUARE APAATO..SQUARE GAL 3400..4DB5 ; W # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 4DB6..4DBF ; W # Cn [10] <reserved-4DB6>..<reserved-4DBF> -4E00..9FCB ; W # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB -9FCC..9FFF ; W # Cn [52] <reserved-9FCC>..<reserved-9FFF> +4E00..9FCC ; W # Lo [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC +9FCD..9FFF ; W # Cn [51] <reserved-9FCD>..<reserved-9FFF> A000..A014 ; W # Lo [21] YI SYLLABLE IT..YI SYLLABLE E A015 ; W # Lm YI SYLLABLE WU A016..A48C ; W # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR @@ -1700,9 +1785,7 @@ A960..A97C ; W # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANG AC00..D7A3 ; W # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH D7B0..D7C6 ; W # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E D7CB..D7FB ; W # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH -F900..FA2D ; W # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA2E..FA2F ; W # Cn [2] <reserved-FA2E>..<reserved-FA2F> -FA30..FA6D ; W # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +F900..FA6D ; W # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA6E..FA6F ; W # Cn [2] <reserved-FA6E>..<reserved-FA6F> FA70..FAD9 ; W # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FADA..FAFF ; W # Cn [38] <reserved-FADA>..<reserved-FAFF> diff --git a/lib/unicore/extracted/DGeneralCategory.txt b/lib/unicore/extracted/DGeneralCategory.txt index ee2bbb7bbd..12a346f753 100644 --- a/lib/unicore/extracted/DGeneralCategory.txt +++ b/lib/unicore/extracted/DGeneralCategory.txt @@ -1,8 +1,8 @@ -# DerivedGeneralCategory-6.0.0.txt -# Date: 2010-08-19, 00:48:09 GMT [MD] +# DerivedGeneralCategory-6.1.0.txt +# Date: 2011-11-27, 05:10:22 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -23,11 +23,12 @@ 0557..0558 ; Cn # [2] <reserved-0557>..<reserved-0558> 0560 ; Cn # <reserved-0560> 0588 ; Cn # <reserved-0588> -058B..0590 ; Cn # [6] <reserved-058B>..<reserved-0590> +058B..058E ; Cn # [4] <reserved-058B>..<reserved-058E> +0590 ; Cn # <reserved-0590> 05C8..05CF ; Cn # [8] <reserved-05C8>..<reserved-05CF> 05EB..05EF ; Cn # [5] <reserved-05EB>..<reserved-05EF> 05F5..05FF ; Cn # [11] <reserved-05F5>..<reserved-05FF> -0604..0605 ; Cn # [2] <reserved-0604>..<reserved-0605> +0605 ; Cn # <reserved-0605> 061C..061D ; Cn # [2] <reserved-061C>..<reserved-061D> 070E ; Cn # <reserved-070E> 074B..074C ; Cn # [2] <reserved-074B>..<reserved-074C> @@ -36,7 +37,10 @@ 082E..082F ; Cn # [2] <reserved-082E>..<reserved-082F> 083F ; Cn # <reserved-083F> 085C..085D ; Cn # [2] <reserved-085C>..<reserved-085D> -085F..08FF ; Cn # [161] <reserved-085F>..<reserved-08FF> +085F..089F ; Cn # [65] <reserved-085F>..<reserved-089F> +08A1 ; Cn # <reserved-08A1> +08AD..08E3 ; Cn # [55] <reserved-08AD>..<reserved-08E3> +08FF ; Cn # <reserved-08FF> 0978 ; Cn # <reserved-0978> 0980 ; Cn # <reserved-0980> 0984 ; Cn # <reserved-0984> @@ -81,7 +85,6 @@ 0ACE..0ACF ; Cn # [2] <reserved-0ACE>..<reserved-0ACF> 0AD1..0ADF ; Cn # [15] <reserved-0AD1>..<reserved-0ADF> 0AE4..0AE5 ; Cn # [2] <reserved-0AE4>..<reserved-0AE5> -0AF0 ; Cn # <reserved-0AF0> 0AF2..0B00 ; Cn # [15] <reserved-0AF2>..<reserved-0B00> 0B04 ; Cn # <reserved-0B04> 0B0D..0B0E ; Cn # [2] <reserved-0B0D>..<reserved-0B0E> @@ -182,15 +185,16 @@ 0EC7 ; Cn # <reserved-0EC7> 0ECE..0ECF ; Cn # [2] <reserved-0ECE>..<reserved-0ECF> 0EDA..0EDB ; Cn # [2] <reserved-0EDA>..<reserved-0EDB> -0EDE..0EFF ; Cn # [34] <reserved-0EDE>..<reserved-0EFF> +0EE0..0EFF ; Cn # [32] <reserved-0EE0>..<reserved-0EFF> 0F48 ; Cn # <reserved-0F48> 0F6D..0F70 ; Cn # [4] <reserved-0F6D>..<reserved-0F70> 0F98 ; Cn # <reserved-0F98> 0FBD ; Cn # <reserved-0FBD> 0FCD ; Cn # <reserved-0FCD> 0FDB..0FFF ; Cn # [37] <reserved-0FDB>..<reserved-0FFF> -10C6..10CF ; Cn # [10] <reserved-10C6>..<reserved-10CF> -10FD..10FF ; Cn # [3] <reserved-10FD>..<reserved-10FF> +10C6 ; Cn # <reserved-10C6> +10C8..10CC ; Cn # [5] <reserved-10C8>..<reserved-10CC> +10CE..10CF ; Cn # [2] <reserved-10CE>..<reserved-10CF> 1249 ; Cn # <reserved-1249> 124E..124F ; Cn # [2] <reserved-124E>..<reserved-124F> 1257 ; Cn # <reserved-1257> @@ -244,13 +248,12 @@ 1AAE..1AFF ; Cn # [82] <reserved-1AAE>..<reserved-1AFF> 1B4C..1B4F ; Cn # [4] <reserved-1B4C>..<reserved-1B4F> 1B7D..1B7F ; Cn # [3] <reserved-1B7D>..<reserved-1B7F> -1BAB..1BAD ; Cn # [3] <reserved-1BAB>..<reserved-1BAD> -1BBA..1BBF ; Cn # [6] <reserved-1BBA>..<reserved-1BBF> 1BF4..1BFB ; Cn # [8] <reserved-1BF4>..<reserved-1BFB> 1C38..1C3A ; Cn # [3] <reserved-1C38>..<reserved-1C3A> 1C4A..1C4C ; Cn # [3] <reserved-1C4A>..<reserved-1C4C> -1C80..1CCF ; Cn # [80] <reserved-1C80>..<reserved-1CCF> -1CF3..1CFF ; Cn # [13] <reserved-1CF3>..<reserved-1CFF> +1C80..1CBF ; Cn # [64] <reserved-1C80>..<reserved-1CBF> +1CC8..1CCF ; Cn # [8] <reserved-1CC8>..<reserved-1CCF> +1CF7..1CFF ; Cn # [9] <reserved-1CF7>..<reserved-1CFF> 1DE7..1DFB ; Cn # [21] <reserved-1DE7>..<reserved-1DFB> 1F16..1F17 ; Cn # [2] <reserved-1F16>..<reserved-1F17> 1F1E..1F1F ; Cn # [2] <reserved-1F1E>..<reserved-1F1F> @@ -279,15 +282,15 @@ 2427..243F ; Cn # [25] <reserved-2427>..<reserved-243F> 244B..245F ; Cn # [21] <reserved-244B>..<reserved-245F> 2700 ; Cn # <reserved-2700> -27CB ; Cn # <reserved-27CB> -27CD ; Cn # <reserved-27CD> 2B4D..2B4F ; Cn # [3] <reserved-2B4D>..<reserved-2B4F> 2B5A..2BFF ; Cn # [166] <reserved-2B5A>..<reserved-2BFF> 2C2F ; Cn # <reserved-2C2F> 2C5F ; Cn # <reserved-2C5F> -2CF2..2CF8 ; Cn # [7] <reserved-2CF2>..<reserved-2CF8> -2D26..2D2F ; Cn # [10] <reserved-2D26>..<reserved-2D2F> -2D66..2D6E ; Cn # [9] <reserved-2D66>..<reserved-2D6E> +2CF4..2CF8 ; Cn # [5] <reserved-2CF4>..<reserved-2CF8> +2D26 ; Cn # <reserved-2D26> +2D28..2D2C ; Cn # [5] <reserved-2D28>..<reserved-2D2C> +2D2E..2D2F ; Cn # [2] <reserved-2D2E>..<reserved-2D2F> +2D68..2D6E ; Cn # [7] <reserved-2D68>..<reserved-2D6E> 2D71..2D7E ; Cn # [14] <reserved-2D71>..<reserved-2D7E> 2D97..2D9F ; Cn # [9] <reserved-2D97>..<reserved-2D9F> 2DA7 ; Cn # <reserved-2DA7> @@ -298,7 +301,7 @@ 2DCF ; Cn # <reserved-2DCF> 2DD7 ; Cn # <reserved-2DD7> 2DDF ; Cn # <reserved-2DDF> -2E32..2E7F ; Cn # [78] <reserved-2E32>..<reserved-2E7F> +2E3C..2E7F ; Cn # [68] <reserved-2E3C>..<reserved-2E7F> 2E9A ; Cn # <reserved-2E9A> 2EF4..2EFF ; Cn # [12] <reserved-2EF4>..<reserved-2EFF> 2FD6..2FEF ; Cn # [26] <reserved-2FD6>..<reserved-2FEF> @@ -313,16 +316,15 @@ 321F ; Cn # <reserved-321F> 32FF ; Cn # <reserved-32FF> 4DB6..4DBF ; Cn # [10] <reserved-4DB6>..<reserved-4DBF> -9FCC..9FFF ; Cn # [52] <reserved-9FCC>..<reserved-9FFF> +9FCD..9FFF ; Cn # [51] <reserved-9FCD>..<reserved-9FFF> A48D..A48F ; Cn # [3] <reserved-A48D>..<reserved-A48F> A4C7..A4CF ; Cn # [9] <reserved-A4C7>..<reserved-A4CF> A62C..A63F ; Cn # [20] <reserved-A62C>..<reserved-A63F> -A674..A67B ; Cn # [8] <reserved-A674>..<reserved-A67B> -A698..A69F ; Cn # [8] <reserved-A698>..<reserved-A69F> +A698..A69E ; Cn # [7] <reserved-A698>..<reserved-A69E> A6F8..A6FF ; Cn # [8] <reserved-A6F8>..<reserved-A6FF> A78F ; Cn # <reserved-A78F> -A792..A79F ; Cn # [14] <reserved-A792>..<reserved-A79F> -A7AA..A7F9 ; Cn # [80] <reserved-A7AA>..<reserved-A7F9> +A794..A79F ; Cn # [12] <reserved-A794>..<reserved-A79F> +A7AB..A7F7 ; Cn # [77] <reserved-A7AB>..<reserved-A7F7> A82C..A82F ; Cn # [4] <reserved-A82C>..<reserved-A82F> A83A..A83F ; Cn # [6] <reserved-A83A>..<reserved-A83F> A878..A87F ; Cn # [8] <reserved-A878>..<reserved-A87F> @@ -339,7 +341,7 @@ AA4E..AA4F ; Cn # [2] <reserved-AA4E>..<reserved-AA4F> AA5A..AA5B ; Cn # [2] <reserved-AA5A>..<reserved-AA5B> AA7C..AA7F ; Cn # [4] <reserved-AA7C>..<reserved-AA7F> AAC3..AADA ; Cn # [24] <reserved-AAC3>..<reserved-AADA> -AAE0..AB00 ; Cn # [33] <reserved-AAE0>..<reserved-AB00> +AAF7..AB00 ; Cn # [10] <reserved-AAF7>..<reserved-AB00> AB07..AB08 ; Cn # [2] <reserved-AB07>..<reserved-AB08> AB0F..AB10 ; Cn # [2] <reserved-AB0F>..<reserved-AB10> AB17..AB1F ; Cn # [9] <reserved-AB17>..<reserved-AB1F> @@ -350,7 +352,6 @@ ABFA..ABFF ; Cn # [6] <reserved-ABFA>..<reserved-ABFF> D7A4..D7AF ; Cn # [12] <reserved-D7A4>..<reserved-D7AF> D7C7..D7CA ; Cn # [4] <reserved-D7C7>..<reserved-D7CA> D7FC..D7FF ; Cn # [4] <reserved-D7FC>..<reserved-D7FF> -FA2E..FA2F ; Cn # [2] <reserved-FA2E>..<reserved-FA2F> FA6E..FA6F ; Cn # [2] <reserved-FA6E>..<reserved-FA6F> FADA..FAFF ; Cn # [38] <reserved-FADA>..<reserved-FAFF> FB07..FB12 ; Cn # [12] <reserved-FB07>..<reserved-FB12> @@ -412,7 +413,9 @@ FFFE..FFFF ; Cn # [2] <noncharacter-FFFE>..<noncharacter-FFFF> 10860..108FF ; Cn # [160] <reserved-10860>..<reserved-108FF> 1091C..1091E ; Cn # [3] <reserved-1091C>..<reserved-1091E> 1093A..1093E ; Cn # [5] <reserved-1093A>..<reserved-1093E> -10940..109FF ; Cn # [192] <reserved-10940>..<reserved-109FF> +10940..1097F ; Cn # [64] <reserved-10940>..<reserved-1097F> +109B8..109BD ; Cn # [6] <reserved-109B8>..<reserved-109BD> +109C0..109FF ; Cn # [64] <reserved-109C0>..<reserved-109FF> 10A04 ; Cn # <reserved-10A04> 10A07..10A0B ; Cn # [5] <reserved-10A07>..<reserved-10A0B> 10A14 ; Cn # <reserved-10A14> @@ -430,12 +433,23 @@ FFFE..FFFF ; Cn # [2] <noncharacter-FFFE>..<noncharacter-FFFF> 10E7F..10FFF ; Cn # [385] <reserved-10E7F>..<reserved-10FFF> 1104E..11051 ; Cn # [4] <reserved-1104E>..<reserved-11051> 11070..1107F ; Cn # [16] <reserved-11070>..<reserved-1107F> -110C2..11FFF ; Cn # [3902] <reserved-110C2>..<reserved-11FFF> +110C2..110CF ; Cn # [14] <reserved-110C2>..<reserved-110CF> +110E9..110EF ; Cn # [7] <reserved-110E9>..<reserved-110EF> +110FA..110FF ; Cn # [6] <reserved-110FA>..<reserved-110FF> +11135 ; Cn # <reserved-11135> +11144..1117F ; Cn # [60] <reserved-11144>..<reserved-1117F> +111C9..111CF ; Cn # [7] <reserved-111C9>..<reserved-111CF> +111DA..1167F ; Cn # [1190] <reserved-111DA>..<reserved-1167F> +116B8..116BF ; Cn # [8] <reserved-116B8>..<reserved-116BF> +116CA..11FFF ; Cn # [2358] <reserved-116CA>..<reserved-11FFF> 1236F..123FF ; Cn # [145] <reserved-1236F>..<reserved-123FF> 12463..1246F ; Cn # [13] <reserved-12463>..<reserved-1246F> 12474..12FFF ; Cn # [2956] <reserved-12474>..<reserved-12FFF> 1342F..167FF ; Cn # [13265] <reserved-1342F>..<reserved-167FF> -16A39..1AFFF ; Cn # [17863] <reserved-16A39>..<reserved-1AFFF> +16A39..16EFF ; Cn # [1223] <reserved-16A39>..<reserved-16EFF> +16F45..16F4F ; Cn # [11] <reserved-16F45>..<reserved-16F4F> +16F7F..16F8E ; Cn # [16] <reserved-16F7F>..<reserved-16F8E> +16FA0..1AFFF ; Cn # [16480] <reserved-16FA0>..<reserved-1AFFF> 1B002..1CFFF ; Cn # [8190] <reserved-1B002>..<reserved-1CFFF> 1D0F6..1D0FF ; Cn # [10] <reserved-1D0F6>..<reserved-1D0FF> 1D127..1D128 ; Cn # [2] <reserved-1D127>..<reserved-1D128> @@ -463,7 +477,41 @@ FFFE..FFFF ; Cn # [2] <noncharacter-FFFE>..<noncharacter-FFFF> 1D551 ; Cn # <reserved-1D551> 1D6A6..1D6A7 ; Cn # [2] <reserved-1D6A6>..<reserved-1D6A7> 1D7CC..1D7CD ; Cn # [2] <reserved-1D7CC>..<reserved-1D7CD> -1D800..1EFFF ; Cn # [6144] <reserved-1D800>..<reserved-1EFFF> +1D800..1EDFF ; Cn # [5632] <reserved-1D800>..<reserved-1EDFF> +1EE04 ; Cn # <reserved-1EE04> +1EE20 ; Cn # <reserved-1EE20> +1EE23 ; Cn # <reserved-1EE23> +1EE25..1EE26 ; Cn # [2] <reserved-1EE25>..<reserved-1EE26> +1EE28 ; Cn # <reserved-1EE28> +1EE33 ; Cn # <reserved-1EE33> +1EE38 ; Cn # <reserved-1EE38> +1EE3A ; Cn # <reserved-1EE3A> +1EE3C..1EE41 ; Cn # [6] <reserved-1EE3C>..<reserved-1EE41> +1EE43..1EE46 ; Cn # [4] <reserved-1EE43>..<reserved-1EE46> +1EE48 ; Cn # <reserved-1EE48> +1EE4A ; Cn # <reserved-1EE4A> +1EE4C ; Cn # <reserved-1EE4C> +1EE50 ; Cn # <reserved-1EE50> +1EE53 ; Cn # <reserved-1EE53> +1EE55..1EE56 ; Cn # [2] <reserved-1EE55>..<reserved-1EE56> +1EE58 ; Cn # <reserved-1EE58> +1EE5A ; Cn # <reserved-1EE5A> +1EE5C ; Cn # <reserved-1EE5C> +1EE5E ; Cn # <reserved-1EE5E> +1EE60 ; Cn # <reserved-1EE60> +1EE63 ; Cn # <reserved-1EE63> +1EE65..1EE66 ; Cn # [2] <reserved-1EE65>..<reserved-1EE66> +1EE6B ; Cn # <reserved-1EE6B> +1EE73 ; Cn # <reserved-1EE73> +1EE78 ; Cn # <reserved-1EE78> +1EE7D ; Cn # <reserved-1EE7D> +1EE7F ; Cn # <reserved-1EE7F> +1EE8A ; Cn # <reserved-1EE8A> +1EE9C..1EEA0 ; Cn # [5] <reserved-1EE9C>..<reserved-1EEA0> +1EEA4 ; Cn # <reserved-1EEA4> +1EEAA ; Cn # <reserved-1EEAA> +1EEBC..1EEEF ; Cn # [52] <reserved-1EEBC>..<reserved-1EEEF> +1EEF2..1EFFF ; Cn # [270] <reserved-1EEF2>..<reserved-1EFFF> 1F02C..1F02F ; Cn # [4] <reserved-1F02C>..<reserved-1F02F> 1F094..1F09F ; Cn # [12] <reserved-1F094>..<reserved-1F09F> 1F0AF..1F0B0 ; Cn # [2] <reserved-1F0AF>..<reserved-1F0B0> @@ -472,7 +520,7 @@ FFFE..FFFF ; Cn # [2] <noncharacter-FFFE>..<noncharacter-FFFF> 1F0E0..1F0FF ; Cn # [32] <reserved-1F0E0>..<reserved-1F0FF> 1F10B..1F10F ; Cn # [5] <reserved-1F10B>..<reserved-1F10F> 1F12F ; Cn # <reserved-1F12F> -1F16A..1F16F ; Cn # [6] <reserved-1F16A>..<reserved-1F16F> +1F16C..1F16F ; Cn # [4] <reserved-1F16C>..<reserved-1F16F> 1F19B..1F1E5 ; Cn # [75] <reserved-1F19B>..<reserved-1F1E5> 1F203..1F20F ; Cn # [13] <reserved-1F203>..<reserved-1F20F> 1F23B..1F23F ; Cn # [5] <reserved-1F23B>..<reserved-1F23F> @@ -489,19 +537,9 @@ FFFE..FFFF ; Cn # [2] <noncharacter-FFFE>..<noncharacter-FFFF> 1F441 ; Cn # <reserved-1F441> 1F4F8 ; Cn # <reserved-1F4F8> 1F4FD..1F4FF ; Cn # [3] <reserved-1F4FD>..<reserved-1F4FF> -1F53E..1F54F ; Cn # [18] <reserved-1F53E>..<reserved-1F54F> +1F53E..1F53F ; Cn # [2] <reserved-1F53E>..<reserved-1F53F> +1F544..1F54F ; Cn # [12] <reserved-1F544>..<reserved-1F54F> 1F568..1F5FA ; Cn # [147] <reserved-1F568>..<reserved-1F5FA> -1F600 ; Cn # <reserved-1F600> -1F611 ; Cn # <reserved-1F611> -1F615 ; Cn # <reserved-1F615> -1F617 ; Cn # <reserved-1F617> -1F619 ; Cn # <reserved-1F619> -1F61B ; Cn # <reserved-1F61B> -1F61F ; Cn # <reserved-1F61F> -1F626..1F627 ; Cn # [2] <reserved-1F626>..<reserved-1F627> -1F62C ; Cn # <reserved-1F62C> -1F62E..1F62F ; Cn # [2] <reserved-1F62E>..<reserved-1F62F> -1F634 ; Cn # <reserved-1F634> 1F641..1F644 ; Cn # [4] <reserved-1F641>..<reserved-1F644> 1F650..1F67F ; Cn # [48] <reserved-1F650>..<reserved-1F67F> 1F6C6..1F6FF ; Cn # [58] <reserved-1F6C6>..<reserved-1F6FF> @@ -516,7 +554,7 @@ E01F0..EFFFF ; Cn # [65040] <reserved-E01F0>..<noncharacter-EFFFF> FFFFE..FFFFF ; Cn # [2] <noncharacter-FFFFE>..<noncharacter-FFFFF> 10FFFE..10FFFF; Cn # [2] <noncharacter-10FFFE>..<noncharacter-10FFFF> -# Total code points: 865147 +# Total code points: 864415 # ================================================ @@ -790,6 +828,8 @@ FFFFE..FFFFF ; Cn # [2] <noncharacter-FFFFE>..<noncharacter-FFFFF> 0526 ; Lu # CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER 0531..0556 ; Lu # [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH 10A0..10C5 ; Lu # [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Lu # GEORGIAN CAPITAL LETTER YN +10CD ; Lu # GEORGIAN CAPITAL LETTER AEN 1E00 ; Lu # LATIN CAPITAL LETTER A WITH RING BELOW 1E02 ; Lu # LATIN CAPITAL LETTER B WITH DOT ABOVE 1E04 ; Lu # LATIN CAPITAL LETTER B WITH DOT BELOW @@ -1004,6 +1044,7 @@ FFFFE..FFFFF ; Cn # [2] <noncharacter-FFFFE>..<noncharacter-FFFFF> 2CE2 ; Lu # COPTIC CAPITAL LETTER OLD NUBIAN WAU 2CEB ; Lu # COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI 2CED ; Lu # COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA +2CF2 ; Lu # COPTIC CAPITAL LETTER BOHAIRIC KHEI A640 ; Lu # CYRILLIC CAPITAL LETTER ZEMLYA A642 ; Lu # CYRILLIC CAPITAL LETTER DZELO A644 ; Lu # CYRILLIC CAPITAL LETTER REVERSED DZE @@ -1087,11 +1128,13 @@ A786 ; Lu # LATIN CAPITAL LETTER INSULAR T A78B ; Lu # LATIN CAPITAL LETTER SALTILLO A78D ; Lu # LATIN CAPITAL LETTER TURNED H A790 ; Lu # LATIN CAPITAL LETTER N WITH DESCENDER +A792 ; Lu # LATIN CAPITAL LETTER C WITH BAR A7A0 ; Lu # LATIN CAPITAL LETTER G WITH OBLIQUE STROKE A7A2 ; Lu # LATIN CAPITAL LETTER K WITH OBLIQUE STROKE A7A4 ; Lu # LATIN CAPITAL LETTER N WITH OBLIQUE STROKE A7A6 ; Lu # LATIN CAPITAL LETTER R WITH OBLIQUE STROKE A7A8 ; Lu # LATIN CAPITAL LETTER S WITH OBLIQUE STROKE +A7AA ; Lu # LATIN CAPITAL LETTER H WITH HOOK FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z 10400..10427 ; Lu # [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW 1D400..1D419 ; Lu # [26] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL BOLD CAPITAL Z @@ -1126,16 +1169,14 @@ FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP 1D790..1D7A8 ; Lu # [25] MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA 1D7CA ; Lu # MATHEMATICAL BOLD CAPITAL DIGAMMA -# Total code points: 1436 +# Total code points: 1441 # ================================================ # General_Category=Lowercase_Letter 0061..007A ; Ll # [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z -00AA ; Ll # FEMININE ORDINAL INDICATOR 00B5 ; Ll # MICRO SIGN -00BA ; Ll # MASCULINE ORDINAL INDICATOR 00DF..00F6 ; Ll # [24] LATIN SMALL LETTER SHARP S..LATIN SMALL LETTER O WITH DIAERESIS 00F8..00FF ; Ll # [8] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER Y WITH DIAERESIS 0101 ; Ll # LATIN SMALL LETTER A WITH MACRON @@ -1401,7 +1442,7 @@ FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP 0527 ; Ll # CYRILLIC SMALL LETTER SHHA WITH DESCENDER 0561..0587 ; Ll # [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN 1D00..1D2B ; Ll # [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D62..1D77 ; Ll # [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D6B..1D77 ; Ll # [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D79..1D9A ; Ll # [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1E01 ; Ll # LATIN SMALL LETTER A WITH RING BELOW 1E03 ; Ll # LATIN SMALL LETTER B WITH DOT ABOVE @@ -1565,7 +1606,7 @@ FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP 2C6C ; Ll # LATIN SMALL LETTER Z WITH DESCENDER 2C71 ; Ll # LATIN SMALL LETTER V WITH RIGHT HOOK 2C73..2C74 ; Ll # [2] LATIN SMALL LETTER W WITH HOOK..LATIN SMALL LETTER V WITH CURL -2C76..2C7C ; Ll # [7] LATIN SMALL LETTER HALF H..LATIN SUBSCRIPT SMALL LETTER J +2C76..2C7B ; Ll # [6] LATIN SMALL LETTER HALF H..LATIN LETTER SMALL CAPITAL TURNED E 2C81 ; Ll # COPTIC SMALL LETTER ALFA 2C83 ; Ll # COPTIC SMALL LETTER VIDA 2C85 ; Ll # COPTIC SMALL LETTER GAMMA @@ -1618,7 +1659,10 @@ FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP 2CE3..2CE4 ; Ll # [2] COPTIC SMALL LETTER OLD NUBIAN WAU..COPTIC SYMBOL KAI 2CEC ; Ll # COPTIC SMALL LETTER CRYPTOGRAMMIC SHEI 2CEE ; Ll # COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF3 ; Ll # COPTIC SMALL LETTER BOHAIRIC KHEI 2D00..2D25 ; Ll # [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Ll # GEORGIAN SMALL LETTER YN +2D2D ; Ll # GEORGIAN SMALL LETTER AEN A641 ; Ll # CYRILLIC SMALL LETTER ZEMLYA A643 ; Ll # CYRILLIC SMALL LETTER DZELO A645 ; Ll # CYRILLIC SMALL LETTER REVERSED DZE @@ -1703,6 +1747,7 @@ A787 ; Ll # LATIN SMALL LETTER INSULAR T A78C ; Ll # LATIN SMALL LETTER SALTILLO A78E ; Ll # LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT A791 ; Ll # LATIN SMALL LETTER N WITH DESCENDER +A793 ; Ll # LATIN SMALL LETTER C WITH BAR A7A1 ; Ll # LATIN SMALL LETTER G WITH OBLIQUE STROKE A7A3 ; Ll # LATIN SMALL LETTER K WITH OBLIQUE STROKE A7A5 ; Ll # LATIN SMALL LETTER N WITH OBLIQUE STROKE @@ -1742,7 +1787,7 @@ FF41..FF5A ; Ll # [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL 1D7C4..1D7C9 ; Ll # [6] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC PI SYMBOL 1D7CB ; Ll # MATHEMATICAL BOLD SMALL DIGAMMA -# Total code points: 1759 +# Total code points: 1751 # ================================================ @@ -1788,13 +1833,13 @@ FF41..FF5A ; Ll # [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL 1843 ; Lm # MONGOLIAN LETTER TODO LONG VOWEL SIGN 1AA7 ; Lm # TAI THAM SIGN MAI YAMOK 1C78..1C7D ; Lm # [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD -1D2C..1D61 ; Lm # [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI +1D2C..1D6A ; Lm # [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI 1D78 ; Lm # MODIFIER LETTER CYRILLIC EN 1D9B..1DBF ; Lm # [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA 2071 ; Lm # SUPERSCRIPT LATIN SMALL LETTER I 207F ; Lm # SUPERSCRIPT LATIN SMALL LETTER N 2090..209C ; Lm # [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T -2C7D ; Lm # MODIFIER LETTER CAPITAL V +2C7C..2C7D ; Lm # [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2D6F ; Lm # TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2E2F ; Lm # VERTICAL TILDE 3005 ; Lm # IDEOGRAPHIC ITERATION MARK @@ -1809,18 +1854,23 @@ A67F ; Lm # CYRILLIC PAYEROK A717..A71F ; Lm # [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK A770 ; Lm # MODIFIER LETTER US A788 ; Lm # MODIFIER LETTER LOW CIRCUMFLEX ACCENT +A7F8..A7F9 ; Lm # [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A9CF ; Lm # JAVANESE PANGRANGKEP AA70 ; Lm # MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION AADD ; Lm # TAI VIET SYMBOL SAM +AAF3..AAF4 ; Lm # [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK FF70 ; Lm # HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +16F93..16F9F ; Lm # [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 -# Total code points: 210 +# Total code points: 237 # ================================================ # General_Category=Other_Letter +00AA ; Lo # FEMININE ORDINAL INDICATOR +00BA ; Lo # MASCULINE ORDINAL INDICATOR 01BB ; Lo # LATIN LETTER TWO WITH STROKE 01C0..01C3 ; Lo # [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK 0294 ; Lo # LATIN LETTER GLOTTAL STOP @@ -1841,6 +1891,8 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 07CA..07EA ; Lo # [33] NKO LETTER A..NKO LETTER JONA RA 0800..0815 ; Lo # [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF 0840..0858 ; Lo # [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +08A0 ; Lo # ARABIC LETTER BEH WITH SMALL V BELOW +08A2..08AC ; Lo # [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH 0904..0939 ; Lo # [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA 093D ; Lo # DEVANAGARI SIGN AVAGRAHA 0950 ; Lo # DEVANAGARI OM @@ -1945,7 +1997,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 0EB2..0EB3 ; Lo # [2] LAO VOWEL SIGN AA..LAO VOWEL SIGN AM 0EBD ; Lo # LAO SEMIVOWEL SIGN NYO 0EC0..0EC4 ; Lo # [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI -0EDC..0EDD ; Lo # [2] LAO HO NO..LAO HO MO +0EDC..0EDF ; Lo # [4] LAO HO NO..LAO LETTER KHMU NYO 0F00 ; Lo # TIBETAN SYLLABLE OM 0F40..0F47 ; Lo # [8] TIBETAN LETTER KA..TIBETAN LETTER JA 0F49..0F6C ; Lo # [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA @@ -1960,7 +2012,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 1075..1081 ; Lo # [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA 108E ; Lo # MYANMAR LETTER RUMAI PALAUNG FA 10D0..10FA ; Lo # [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN -1100..1248 ; Lo # [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +10FD..1248 ; Lo # [332] GEORGIAN LETTER AEN..ETHIOPIC SYLLABLE QWA 124A..124D ; Lo # [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE 1250..1256 ; Lo # [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO 1258 ; Lo # ETHIOPIC SYLLABLE QHWA @@ -2006,14 +2058,15 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 1B45..1B4B ; Lo # [7] BALINESE LETTER KAF SASAK..BALINESE LETTER ASYURA SASAK 1B83..1BA0 ; Lo # [30] SUNDANESE LETTER A..SUNDANESE LETTER HA 1BAE..1BAF ; Lo # [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA -1BC0..1BE5 ; Lo # [38] BATAK LETTER A..BATAK LETTER U +1BBA..1BE5 ; Lo # [44] SUNDANESE AVAGRAHA..BATAK LETTER U 1C00..1C23 ; Lo # [36] LEPCHA LETTER KA..LEPCHA LETTER A 1C4D..1C4F ; Lo # [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA 1C5A..1C77 ; Lo # [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1CE9..1CEC ; Lo # [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL 1CEE..1CF1 ; Lo # [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA +1CF5..1CF6 ; Lo # [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 2135..2138 ; Lo # [4] ALEF SYMBOL..DALET SYMBOL -2D30..2D65 ; Lo # [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D30..2D67 ; Lo # [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO 2D80..2D96 ; Lo # [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE 2DA0..2DA6 ; Lo # [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO 2DA8..2DAE ; Lo # [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO @@ -2034,7 +2087,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 31A0..31BA ; Lo # [27] BOPOMOFO LETTER BU..BOPOMOFO LETTER ZY 31F0..31FF ; Lo # [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3400..4DB5 ; Lo # [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 -4E00..9FCB ; Lo # [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB +4E00..9FCC ; Lo # [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC A000..A014 ; Lo # [21] YI SYLLABLE IT..YI SYLLABLE E A016..A48C ; Lo # [1143] YI SYLLABLE BIT..YI SYLLABLE YYR A4D0..A4F7 ; Lo # [40] LISU LETTER BA..LISU LETTER OE @@ -2068,6 +2121,8 @@ AAB9..AABD ; Lo # [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN AAC0 ; Lo # TAI VIET TONE MAI NUENG AAC2 ; Lo # TAI VIET TONE MAI SONG AADB..AADC ; Lo # [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG +AAE0..AAEA ; Lo # [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAF2 ; Lo # MEETEI MAYEK ANJI AB01..AB06 ; Lo # [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO AB09..AB0E ; Lo # [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO AB11..AB16 ; Lo # [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO @@ -2077,8 +2132,7 @@ ABC0..ABE2 ; Lo # [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM AC00..D7A3 ; Lo # [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH D7B0..D7C6 ; Lo # [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E D7CB..D7FB ; Lo # [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH -F900..FA2D ; Lo # [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA30..FA6D ; Lo # [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +F900..FA6D ; Lo # [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA70..FAD9 ; Lo # [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FB1D ; Lo # HEBREW LETTER YOD WITH HIRIQ FB1F..FB28 ; Lo # [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV @@ -2125,6 +2179,8 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 1083F..10855 ; Lo # [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW 10900..10915 ; Lo # [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU 10920..10939 ; Lo # [26] LYDIAN LETTER A..LYDIAN LETTER C +10980..109B7 ; Lo # [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; Lo # [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; Lo # KHAROSHTHI LETTER A 10A10..10A13 ; Lo # [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA 10A15..10A17 ; Lo # [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA @@ -2136,16 +2192,56 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 10C00..10C48 ; Lo # [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH 11003..11037 ; Lo # [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA 11083..110AF ; Lo # [45] KAITHI LETTER A..KAITHI LETTER HA +110D0..110E8 ; Lo # [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +11103..11126 ; Lo # [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11183..111B2 ; Lo # [48] SHARADA LETTER A..SHARADA LETTER HA +111C1..111C4 ; Lo # [4] SHARADA SIGN AVAGRAHA..SHARADA OM +11680..116AA ; Lo # [43] TAKRI LETTER A..TAKRI LETTER RRA 12000..1236E ; Lo # [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM 13000..1342E ; Lo # [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 16800..16A38 ; Lo # [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16F00..16F44 ; Lo # [69] MIAO LETTER PA..MIAO LETTER HHA +16F50 ; Lo # MIAO LETTER NASALIZATION 1B000..1B001 ; Lo # [2] KATAKANA LETTER ARCHAIC E..HIRAGANA LETTER ARCHAIC YE +1EE00..1EE03 ; Lo # [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; Lo # [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; Lo # [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; Lo # ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; Lo # ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; Lo # [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; Lo # [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; Lo # ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; Lo # ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; Lo # ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; Lo # ARABIC MATHEMATICAL TAILED HAH +1EE49 ; Lo # ARABIC MATHEMATICAL TAILED YEH +1EE4B ; Lo # ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; Lo # [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; Lo # [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; Lo # ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; Lo # ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; Lo # ARABIC MATHEMATICAL TAILED DAD +1EE5B ; Lo # ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; Lo # ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; Lo # ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; Lo # [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; Lo # ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; Lo # [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; Lo # [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; Lo # [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; Lo # [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; Lo # ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; Lo # [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; Lo # [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; Lo # [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; Lo # [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; Lo # [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 20000..2A6D6 ; Lo # [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 2A700..2B734 ; Lo # [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 2B740..2B81D ; Lo # [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2F800..2FA1D ; Lo # [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 97084 +# Total code points: 97553 # ================================================ @@ -2174,6 +2270,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 0825..0827 ; Mn # [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; Mn # [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; Mn # [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +08E4..08FE ; Mn # [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT 0900..0902 ; Mn # [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 093A ; Mn # DEVANAGARI VOWEL SIGN OE 093C ; Mn # DEVANAGARI SIGN NUKTA @@ -2259,6 +2356,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 1732..1734 ; Mn # [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD 1752..1753 ; Mn # [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U 1772..1773 ; Mn # [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +17B4..17B5 ; Mn # [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 17B7..17BD ; Mn # [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA 17C6 ; Mn # KHMER SIGN NIKAHIT 17C9..17D3 ; Mn # [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT @@ -2286,6 +2384,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 1B80..1B81 ; Mn # [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR 1BA2..1BA5 ; Mn # [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU 1BA8..1BA9 ; Mn # [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAB ; Mn # SUNDANESE SIGN VIRAMA 1BE6 ; Mn # BATAK SIGN TOMPI 1BE8..1BE9 ; Mn # [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE 1BED ; Mn # BATAK VOWEL SIGN KARO O @@ -2296,6 +2395,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 1CD4..1CE0 ; Mn # [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA 1CE2..1CE8 ; Mn # [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL 1CED ; Mn # VEDIC SIGN TIRYAK +1CF4 ; Mn # VEDIC TONE CANDRA ABOVE 1DC0..1DE6 ; Mn # [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z 1DFC..1DFF ; Mn # [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW 20D0..20DC ; Mn # [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE @@ -2304,10 +2404,11 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 2CEF..2CF1 ; Mn # [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS 2D7F ; Mn # TIFINAGH CONSONANT JOINER 2DE0..2DFF ; Mn # [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS -302A..302F ; Mn # [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK +302A..302D ; Mn # [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK 3099..309A ; Mn # [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK A66F ; Mn # COMBINING CYRILLIC VZMET -A67C..A67D ; Mn # [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A674..A67D ; Mn # [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A69F ; Mn # COMBINING CYRILLIC LETTER IOTIFIED E A6F0..A6F1 ; Mn # [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS A802 ; Mn # SYLOTI NAGRI SIGN DVISVARA A806 ; Mn # SYLOTI NAGRI SIGN HASANTA @@ -2331,6 +2432,8 @@ AAB2..AAB4 ; Mn # [3] TAI VIET VOWEL I..TAI VIET VOWEL U AAB7..AAB8 ; Mn # [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA AABE..AABF ; Mn # [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK AAC1 ; Mn # TAI VIET TONE MAI THO +AAEC..AAED ; Mn # [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAF6 ; Mn # MEETEI MAYEK VIRAMA ABE5 ; Mn # MEETEI MAYEK VOWEL SIGN ANAP ABE8 ; Mn # MEETEI MAYEK VOWEL SIGN UNAP ABED ; Mn # MEETEI MAYEK APUN IYEK @@ -2348,6 +2451,16 @@ FE20..FE26 ; Mn # [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MA 11080..11081 ; Mn # [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA 110B3..110B6 ; Mn # [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B9..110BA ; Mn # [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +11100..11102 ; Mn # [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; Mn # [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112D..11134 ; Mn # [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11180..11181 ; Mn # [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +111B6..111BE ; Mn # [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +116AB ; Mn # TAKRI SIGN ANUSVARA +116AD ; Mn # TAKRI VOWEL SIGN AA +116B0..116B5 ; Mn # [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B7 ; Mn # TAKRI SIGN NUKTA +16F8F..16F92 ; Mn # [4] MIAO TONE RIGHT..MIAO TONE BELOW 1D167..1D169 ; Mn # [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 1D17B..1D182 ; Mn # [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE 1D185..1D18B ; Mn # [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE @@ -2355,7 +2468,7 @@ FE20..FE26 ; Mn # [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MA 1D242..1D244 ; Mn # [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME E0100..E01EF ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1199 +# Total code points: 1280 # ================================================ @@ -2453,6 +2566,7 @@ A670..A672 ; Me # [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRIL 1BA1 ; Mc # SUNDANESE CONSONANT SIGN PAMINGKAL 1BA6..1BA7 ; Mc # [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG 1BAA ; Mc # SUNDANESE SIGN PAMAAEH +1BAC..1BAD ; Mc # [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA 1BE7 ; Mc # BATAK VOWEL SIGN E 1BEA..1BEC ; Mc # [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O 1BEE ; Mc # BATAK VOWEL SIGN U @@ -2460,7 +2574,8 @@ A670..A672 ; Me # [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRIL 1C24..1C2B ; Mc # [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU 1C34..1C35 ; Mc # [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG 1CE1 ; Mc # VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA -1CF2 ; Mc # VEDIC SIGN ARDHAVISARGA +1CF2..1CF3 ; Mc # [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA +302E..302F ; Mc # [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK A823..A824 ; Mc # [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I A827 ; Mc # SYLOTI NAGRI VOWEL SIGN OO A880..A881 ; Mc # [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA @@ -2474,6 +2589,9 @@ AA2F..AA30 ; Mc # [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI AA33..AA34 ; Mc # [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA AA4D ; Mc # CHAM CONSONANT SIGN FINAL H AA7B ; Mc # MYANMAR SIGN PAO KAREN TONE +AAEB ; Mc # MEETEI MAYEK VOWEL SIGN II +AAEE..AAEF ; Mc # [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF5 ; Mc # MEETEI MAYEK VOWEL SIGN VISARGA ABE3..ABE4 ; Mc # [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP ABE6..ABE7 ; Mc # [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP ABE9..ABEA ; Mc # [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG @@ -2483,10 +2601,18 @@ ABEC ; Mc # MEETEI MAYEK LUM IYEK 11082 ; Mc # KAITHI SIGN VISARGA 110B0..110B2 ; Mc # [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II 110B7..110B8 ; Mc # [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +1112C ; Mc # CHAKMA VOWEL SIGN E +11182 ; Mc # SHARADA SIGN VISARGA +111B3..111B5 ; Mc # [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111BF..111C0 ; Mc # [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +116AC ; Mc # TAKRI SIGN VISARGA +116AE..116AF ; Mc # [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B6 ; Mc # TAKRI SIGN VIRAMA +16F51..16F7E ; Mc # [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG 1D165..1D166 ; Mc # [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM 1D16D..1D172 ; Mc # [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 -# Total code points: 287 +# Total code points: 353 # ================================================ @@ -2529,9 +2655,13 @@ ABF0..ABF9 ; Nd # [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE FF10..FF19 ; Nd # [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 104A0..104A9 ; Nd # [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE 11066..1106F ; Nd # [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE +110F0..110F9 ; Nd # [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11136..1113F ; Nd # [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +111D0..111D9 ; Nd # [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +116C0..116C9 ; Nd # [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 1D7CE..1D7FF ; Nd # [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE -# Total code points: 420 +# Total code points: 460 # ================================================ @@ -2579,6 +2709,7 @@ A6E6..A6EF ; Nl # [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM 2CFD ; No # COPTIC FRACTION ONE HALF 3192..3195 ; No # [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK 3220..3229 ; No # [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN +3248..324F ; No # [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE 3251..325F ; No # [15] CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE 3280..3289 ; No # [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN 32B1..32BF ; No # [15] CIRCLED NUMBER THIRTY SIX..CIRCLED NUMBER FIFTY @@ -2598,7 +2729,7 @@ A830..A835 ; No # [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTIO 1D360..1D371 ; No # [18] COUNTING ROD UNIT DIGIT ONE..COUNTING ROD TENS DIGIT NINE 1F100..1F10A ; No # [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA -# Total code points: 456 +# Total code points: 464 # ================================================ @@ -2645,10 +2776,9 @@ A830..A835 ; No # [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTIO # General_Category=Format 00AD ; Cf # SOFT HYPHEN -0600..0603 ; Cf # [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA +0600..0604 ; Cf # [5] ARABIC NUMBER SIGN..ARABIC SIGN SAMVAT 06DD ; Cf # ARABIC END OF AYAH 070F ; Cf # SYRIAC ABBREVIATION MARK -17B4..17B5 ; Cf # [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 200B..200F ; Cf # [5] ZERO WIDTH SPACE..RIGHT-TO-LEFT MARK 202A..202E ; Cf # [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE 2060..2064 ; Cf # [5] WORD JOINER..INVISIBLE PLUS @@ -2660,7 +2790,7 @@ FFF9..FFFB ; Cf # [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION E0001 ; Cf # LANGUAGE TAG E0020..E007F ; Cf # [96] TAG SPACE..CANCEL TAG -# Total code points: 140 +# Total code points: 139 # ================================================ @@ -2692,6 +2822,7 @@ D800..DFFF ; Cs # [2048] <surrogate-D800>..<surrogate-DFFF> 2010..2015 ; Pd # [6] HYPHEN..HORIZONTAL BAR 2E17 ; Pd # DOUBLE OBLIQUE HYPHEN 2E1A ; Pd # HYPHEN WITH DIAERESIS +2E3A..2E3B ; Pd # [2] TWO-EM DASH..THREE-EM DASH 301C ; Pd # WAVE DASH 3030 ; Pd # WAVY DASH 30A0 ; Pd # KATAKANA-HIRAGANA DOUBLE HYPHEN @@ -2700,7 +2831,7 @@ FE58 ; Pd # SMALL EM DASH FE63 ; Pd # SMALL HYPHEN-MINUS FF0D ; Pd # FULLWIDTH HYPHEN-MINUS -# Total code points: 21 +# Total code points: 23 # ================================================ @@ -2884,7 +3015,8 @@ FF3F ; Pc # FULLWIDTH LOW LINE 003F..0040 ; Po # [2] QUESTION MARK..COMMERCIAL AT 005C ; Po # REVERSE SOLIDUS 00A1 ; Po # INVERTED EXCLAMATION MARK -00B7 ; Po # MIDDLE DOT +00A7 ; Po # SECTION SIGN +00B6..00B7 ; Po # [2] PILCROW SIGN..MIDDLE DOT 00BF ; Po # INVERTED QUESTION MARK 037E ; Po # GREEK QUESTION MARK 0387 ; Po # GREEK ANO TELEIA @@ -2906,16 +3038,18 @@ FF3F ; Pc # FULLWIDTH LOW LINE 085E ; Po # MANDAIC PUNCTUATION 0964..0965 ; Po # [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA 0970 ; Po # DEVANAGARI ABBREVIATION SIGN +0AF0 ; Po # GUJARATI ABBREVIATION SIGN 0DF4 ; Po # SINHALA PUNCTUATION KUNDDALIYA 0E4F ; Po # THAI CHARACTER FONGMAN 0E5A..0E5B ; Po # [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT 0F04..0F12 ; Po # [15] TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK RGYA GRAM SHAD +0F14 ; Po # TIBETAN MARK GTER TSHEG 0F85 ; Po # TIBETAN MARK PALUTA 0FD0..0FD4 ; Po # [5] TIBETAN MARK BSKA- SHOG GI MGO RGYAN..TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA 0FD9..0FDA ; Po # [2] TIBETAN MARK LEADING MCHAN RTAGS..TIBETAN MARK TRAILING MCHAN RTAGS 104A..104F ; Po # [6] MYANMAR SIGN LITTLE SECTION..MYANMAR SYMBOL GENITIVE 10FB ; Po # GEORGIAN PARAGRAPH SEPARATOR -1361..1368 ; Po # [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR +1360..1368 ; Po # [9] ETHIOPIC SECTION MARK..ETHIOPIC PARAGRAPH SEPARATOR 166D..166E ; Po # [2] CANADIAN SYLLABICS CHI SIGN..CANADIAN SYLLABICS FULL STOP 16EB..16ED ; Po # [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION 1735..1736 ; Po # [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION @@ -2931,6 +3065,7 @@ FF3F ; Pc # FULLWIDTH LOW LINE 1BFC..1BFF ; Po # [4] BATAK SYMBOL BINDU NA METEK..BATAK SYMBOL BINDU PANGOLAT 1C3B..1C3F ; Po # [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK 1C7E..1C7F ; Po # [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD +1CC0..1CC7 ; Po # [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA 1CD3 ; Po # VEDIC SIGN NIHSHVASA 2016..2017 ; Po # [2] DOUBLE VERTICAL LINE..DOUBLE LOW LINE 2020..2027 ; Po # [8] DAGGER..HYPHENATION POINT @@ -2951,7 +3086,7 @@ FF3F ; Pc # FULLWIDTH LOW LINE 2E1B ; Po # TILDE WITH RING ABOVE 2E1E..2E1F ; Po # [2] TILDE WITH DOT ABOVE..TILDE WITH DOT BELOW 2E2A..2E2E ; Po # [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK -2E30..2E31 ; Po # [2] RING POINT..WORD SEPARATOR MIDDLE DOT +2E30..2E39 ; Po # [10] RING POINT..TOP HALF SECTION SIGN 3001..3003 ; Po # [3] IDEOGRAPHIC COMMA..DITTO MARK 303D ; Po # PART ALTERNATION MARK 30FB ; Po # KATAKANA MIDDLE DOT @@ -2969,6 +3104,7 @@ A9C1..A9CD ; Po # [13] JAVANESE LEFT RERENGGAN..JAVANESE TURNED PADA PISELEH A9DE..A9DF ; Po # [2] JAVANESE PADA TIRTA TUMETES..JAVANESE PADA ISEN-ISEN AA5C..AA5F ; Po # [4] CHAM PUNCTUATION SPIRAL..CHAM PUNCTUATION TRIPLE DANDA AADE..AADF ; Po # [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI +AAF0..AAF1 ; Po # [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM ABEB ; Po # MEETEI MAYEK CHEIKHEI FE10..FE16 ; Po # [7] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL QUESTION MARK FE19 ; Po # PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS @@ -2990,7 +3126,7 @@ FF1F..FF20 ; Po # [2] FULLWIDTH QUESTION MARK..FULLWIDTH COMMERCIAL AT FF3C ; Po # FULLWIDTH REVERSE SOLIDUS FF61 ; Po # HALFWIDTH IDEOGRAPHIC FULL STOP FF64..FF65 ; Po # [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDLE DOT -10100..10101 ; Po # [2] AEGEAN WORD SEPARATOR LINE..AEGEAN WORD SEPARATOR DOT +10100..10102 ; Po # [3] AEGEAN WORD SEPARATOR LINE..AEGEAN CHECK MARK 1039F ; Po # UGARITIC WORD DIVIDER 103D0 ; Po # OLD PERSIAN WORD DIVIDER 10857 ; Po # IMPERIAL ARAMAIC SECTION SIGN @@ -3002,9 +3138,11 @@ FF64..FF65 ; Po # [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDL 11047..1104D ; Po # [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS 110BB..110BC ; Po # [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN 110BE..110C1 ; Po # [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +11140..11143 ; Po # [4] CHAKMA SECTION MARK..CHAKMA QUESTION MARK +111C5..111C8 ; Po # [4] SHARADA DANDA..SHARADA SEPARATOR 12470..12473 ; Po # [4] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON -# Total code points: 402 +# Total code points: 434 # ================================================ @@ -3047,9 +3185,7 @@ FF64..FF65 ; Po # [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDL 25F8..25FF ; Sm # [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE 266F ; Sm # MUSIC SHARP SIGN 27C0..27C4 ; Sm # [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET -27C7..27CA ; Sm # [4] OR WITH DOT INSIDE..VERTICAL BAR WITH HORIZONTAL STROKE -27CC ; Sm # LONG DIVISION -27CE..27E5 ; Sm # [24] SQUARED LOGICAL AND..WHITE SQUARE WITH RIGHTWARDS TICK +27C7..27E5 ; Sm # [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK 27F0..27FF ; Sm # [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW 2900..2982 ; Sm # [131] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..Z NOTATION TYPE COLON 2999..29D7 ; Sm # [63] DOTTED FENCE..BLACK HOURGLASS @@ -3076,8 +3212,9 @@ FFE9..FFEC ; Sm # [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW 1D789 ; Sm # MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL 1D7A9 ; Sm # MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA 1D7C3 ; Sm # MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL +1EEF0..1EEF1 ; Sm # [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL -# Total code points: 948 +# Total code points: 952 # ================================================ @@ -3085,6 +3222,7 @@ FFE9..FFEC ; Sm # [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW 0024 ; Sc # DOLLAR SIGN 00A2..00A5 ; Sc # [4] CENT SIGN..YEN SIGN +058F ; Sc # ARMENIAN DRAM SIGN 060B ; Sc # AFGHANI SIGN 09F2..09F3 ; Sc # [2] BENGALI RUPEE MARK..BENGALI RUPEE SIGN 09FB ; Sc # BENGALI GANDA MARK @@ -3100,7 +3238,7 @@ FF04 ; Sc # FULLWIDTH DOLLAR SIGN FFE0..FFE1 ; Sc # [2] FULLWIDTH CENT SIGN..FULLWIDTH POUND SIGN FFE5..FFE6 ; Sc # [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN -# Total code points: 47 +# Total code points: 48 # ================================================ @@ -3140,11 +3278,10 @@ FFE3 ; Sk # FULLWIDTH MACRON # General_Category=Other_Symbol -00A6..00A7 ; So # [2] BROKEN BAR..SECTION SIGN +00A6 ; So # BROKEN BAR 00A9 ; So # COPYRIGHT SIGN 00AE ; So # REGISTERED SIGN 00B0 ; So # DEGREE SIGN -00B6 ; So # PILCROW SIGN 0482 ; So # CYRILLIC THOUSANDS SIGN 060E..060F ; So # [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA 06DE ; So # ARABIC START OF RUB EL HIZB @@ -3158,7 +3295,8 @@ FFE3 ; Sk # FULLWIDTH MACRON 0C7F ; So # TELUGU SIGN TUUMU 0D79 ; So # MALAYALAM DATE MARK 0F01..0F03 ; So # [3] TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA -0F13..0F17 ; So # [5] TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS +0F13 ; So # TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN +0F15..0F17 ; So # [3] TIBETAN LOGOTYPE SIGN CHAD RTAGS..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS 0F1A..0F1F ; So # [6] TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RDEL DKAR RDEL NAG 0F34 ; So # TIBETAN MARK BSDUS RTAGS 0F36 ; So # TIBETAN MARK CARET -DZUD RTAGS BZHI MIG CAN @@ -3168,7 +3306,6 @@ FFE3 ; Sk # FULLWIDTH MACRON 0FCE..0FCF ; So # [2] TIBETAN SIGN RDEL NAG RDEL DKAR..TIBETAN SIGN RDEL NAG GSUM 0FD5..0FD8 ; So # [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS 109E..109F ; So # [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION -1360 ; So # ETHIOPIC SECTION MARK 1390..1399 ; So # [10] ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT 1940 ; So # LIMBU SIGN LOO 19DE..19FF ; So # [34] NEW TAI LUE SIGN LAE..KHMER SYMBOL DAP-PRAM ROC @@ -3232,7 +3369,8 @@ FFE3 ; Sk # FULLWIDTH MACRON 3196..319F ; So # [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK 31C0..31E3 ; So # [36] CJK STROKE T..CJK STROKE Q 3200..321E ; So # [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU -322A..3250 ; So # [39] PARENTHESIZED IDEOGRAPH MOON..PARTNERSHIP SIGN +322A..3247 ; So # [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO +3250 ; So # PARTNERSHIP SIGN 3260..327F ; So # [32] CIRCLED HANGUL KIYEOK..KOREAN STANDARD SYMBOL 328A..32B0 ; So # [39] CIRCLED IDEOGRAPH MOON..CIRCLED IDEOGRAPH NIGHT 32C0..32FE ; So # [63] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..CIRCLED KATAKANA WO @@ -3248,7 +3386,6 @@ FFE4 ; So # FULLWIDTH BROKEN BAR FFE8 ; So # HALFWIDTH FORMS LIGHT VERTICAL FFED..FFEE ; So # [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER -10102 ; So # AEGEAN CHECK MARK 10137..1013F ; So # [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT 10179..10189 ; So # [17] GREEK YEAR SIGN..GREEK TRYBLION BASE SIGN 10190..1019B ; So # [12] ROMAN SEXTANS SIGN..ROMAN CENTURIAL SIGN @@ -3270,7 +3407,7 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1F0C1..1F0CF ; So # [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER 1F0D1..1F0DF ; So # [15] PLAYING CARD ACE OF CLUBS..PLAYING CARD WHITE JOKER 1F110..1F12E ; So # [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ -1F130..1F169 ; So # [58] SQUARED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F130..1F16B ; So # [60] SQUARED LATIN CAPITAL LETTER A..RAISED MD SIGN 1F170..1F19A ; So # [43] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VS 1F1E6..1F202 ; So # [29] REGIONAL INDICATOR SYMBOL LETTER A..SQUARED KATAKANA SA 1F210..1F23A ; So # [43] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-55B6 @@ -3288,24 +3425,14 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1F442..1F4F7 ; So # [182] EAR..CAMERA 1F4F9..1F4FC ; So # [4] VIDEO CAMERA..VIDEOCASSETTE 1F500..1F53D ; So # [62] TWISTED RIGHTWARDS ARROWS..DOWN-POINTING SMALL RED TRIANGLE +1F540..1F543 ; So # [4] CIRCLED CROSS POMMEE..NOTCHED LEFT SEMICIRCLE WITH THREE DOTS 1F550..1F567 ; So # [24] CLOCK FACE ONE OCLOCK..CLOCK FACE TWELVE-THIRTY -1F5FB..1F5FF ; So # [5] MOUNT FUJI..MOYAI -1F601..1F610 ; So # [16] GRINNING FACE WITH SMILING EYES..NEUTRAL FACE -1F612..1F614 ; So # [3] UNAMUSED FACE..PENSIVE FACE -1F616 ; So # CONFOUNDED FACE -1F618 ; So # FACE THROWING A KISS -1F61A ; So # KISSING FACE WITH CLOSED EYES -1F61C..1F61E ; So # [3] FACE WITH STUCK-OUT TONGUE AND WINKING EYE..DISAPPOINTED FACE -1F620..1F625 ; So # [6] ANGRY FACE..DISAPPOINTED BUT RELIEVED FACE -1F628..1F62B ; So # [4] FEARFUL FACE..TIRED FACE -1F62D ; So # LOUDLY CRYING FACE -1F630..1F633 ; So # [4] FACE WITH OPEN MOUTH AND COLD SWEAT..FLUSHED FACE -1F635..1F640 ; So # [12] DIZZY FACE..WEARY CAT FACE +1F5FB..1F640 ; So # [70] MOUNT FUJI..WEARY CAT FACE 1F645..1F64F ; So # [11] FACE WITH NO GOOD GESTURE..PERSON WITH FOLDED HANDS 1F680..1F6C5 ; So # [70] ROCKET..LEFT LUGGAGE 1F700..1F773 ; So # [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE -# Total code points: 4398 +# Total code points: 4404 # ================================================ diff --git a/lib/unicore/extracted/DJoinGroup.txt b/lib/unicore/extracted/DJoinGroup.txt index 5958abbb84..bf3f10c8eb 100644 --- a/lib/unicore/extracted/DJoinGroup.txt +++ b/lib/unicore/extracted/DJoinGroup.txt @@ -1,8 +1,8 @@ -# DerivedJoiningGroup-6.0.0.txt -# Date: 2010-07-17, 22:46:14 GMT [MD] +# DerivedJoiningGroup-6.1.0.txt +# Date: 2011-07-25, 00:54:14 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -48,8 +48,9 @@ 066E ; Beh # Lo ARABIC LETTER DOTLESS BEH 0679..0680 ; Beh # Lo [8] ARABIC LETTER TTEH..ARABIC LETTER BEHEH 0750..0756 ; Beh # Lo [7] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER BEH WITH SMALL V +08A0 ; Beh # Lo ARABIC LETTER BEH WITH SMALL V BELOW -# Total code points: 19 +# Total code points: 20 # ================================================ @@ -86,8 +87,9 @@ 0641 ; Feh # Lo ARABIC LETTER FEH 06A1..06A6 ; Feh # Lo [6] ARABIC LETTER DOTLESS FEH..ARABIC LETTER PEHEH 0760..0761 ; Feh # Lo [2] ARABIC LETTER FEH WITH TWO DOTS BELOW..ARABIC LETTER FEH WITH THREE DOTS POINTING UPWARDS BELOW +08A4 ; Feh # Lo ARABIC LETTER FEH WITH DOT BELOW AND THREE DOTS ABOVE -# Total code points: 9 +# Total code points: 10 # ================================================ @@ -121,8 +123,9 @@ 076E..076F ; Hah # Lo [2] ARABIC LETTER HAH WITH SMALL ARABIC LETTER TAH BELOW..ARABIC LETTER HAH WITH SMALL ARABIC LETTER TAH AND TWO DOTS 0772 ; Hah # Lo ARABIC LETTER HAH WITH SMALL ARABIC LETTER TAH ABOVE 077C ; Hah # Lo ARABIC LETTER HAH WITH EXTENDED ARABIC-INDIC DIGIT FOUR BELOW +08A2 ; Hah # Lo ARABIC LETTER JEEM WITH TWO DOTS ABOVE -# Total code points: 17 +# Total code points: 18 # ================================================ @@ -180,8 +183,9 @@ 0644 ; Lam # Lo ARABIC LETTER LAM 06B5..06B8 ; Lam # Lo [4] ARABIC LETTER LAM WITH SMALL V..ARABIC LETTER LAM WITH THREE DOTS BELOW 076A ; Lam # Lo ARABIC LETTER LAM WITH BAR +08A6 ; Lam # Lo ARABIC LETTER LAM WITH DOUBLE BAR -# Total code points: 6 +# Total code points: 7 # ================================================ @@ -193,8 +197,9 @@ 0645 ; Meem # Lo ARABIC LETTER MEEM 0765..0766 ; Meem # Lo [2] ARABIC LETTER MEEM WITH DOT ABOVE..ARABIC LETTER MEEM WITH DOT BELOW +08A7 ; Meem # Lo ARABIC LETTER MEEM WITH THREE DOTS ABOVE -# Total code points: 3 +# Total code points: 4 # ================================================ @@ -227,8 +232,9 @@ 0642 ; Qaf # Lo ARABIC LETTER QAF 066F ; Qaf # Lo ARABIC LETTER DOTLESS QAF 06A7..06A8 ; Qaf # Lo [2] ARABIC LETTER QAF WITH DOT ABOVE..ARABIC LETTER QAF WITH THREE DOTS ABOVE +08A5 ; Qaf # Lo ARABIC LETTER QAF WITH DOT BELOW -# Total code points: 4 +# Total code points: 5 # ================================================ @@ -244,8 +250,9 @@ 075B ; Reh # Lo ARABIC LETTER REH WITH STROKE 076B..076C ; Reh # Lo [2] ARABIC LETTER REH WITH TWO DOTS VERTICALLY ABOVE..ARABIC LETTER REH WITH HAMZA ABOVE 0771 ; Reh # Lo ARABIC LETTER REH WITH SMALL ARABIC LETTER TAH AND TWO DOTS +08AA ; Reh # Lo ARABIC LETTER REH WITH LOOP -# Total code points: 16 +# Total code points: 17 # ================================================ @@ -301,8 +308,9 @@ 0637..0638 ; Tah # Lo [2] ARABIC LETTER TAH..ARABIC LETTER ZAH 069F ; Tah # Lo ARABIC LETTER TAH WITH THREE DOTS ABOVE +08A3 ; Tah # Lo ARABIC LETTER TAH WITH TWO DOTS ABOVE -# Total code points: 3 +# Total code points: 4 # ================================================ @@ -332,8 +340,9 @@ 06C4..06CB ; Waw # Lo [8] ARABIC LETTER WAW WITH RING..ARABIC LETTER VE 06CF ; Waw # Lo ARABIC LETTER WAW WITH DOT ABOVE 0778..0779 ; Waw # Lo [2] ARABIC LETTER WAW WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER WAW WITH EXTENDED ARABIC-INDIC DIGIT THREE ABOVE +08AB ; Waw # Lo ARABIC LETTER WAW WITH DOT WITHIN -# Total code points: 15 +# Total code points: 16 # ================================================ @@ -349,8 +358,9 @@ 0678 ; Yeh # Lo ARABIC LETTER HIGH HAMZA YEH 06D0..06D1 ; Yeh # Lo [2] ARABIC LETTER E..ARABIC LETTER YEH WITH THREE DOTS BELOW 0777 ; Yeh # Lo ARABIC LETTER FARSI YEH WITH EXTENDED ARABIC-INDIC DIGIT FOUR BELOW +08A8..08A9 ; Yeh # Lo [2] ARABIC LETTER YEH WITH TWO DOTS BELOW AND HAMZA ABOVE..ARABIC LETTER YEH WITH TWO DOTS BELOW AND DOT ABOVE -# Total code points: 8 +# Total code points: 10 # ================================================ @@ -421,4 +431,10 @@ # Total code points: 1 +# ================================================ + +08AC ; Rohingya_Yeh # Lo ARABIC LETTER ROHINGYA YEH + +# Total code points: 1 + # EOF diff --git a/lib/unicore/extracted/DJoinType.txt b/lib/unicore/extracted/DJoinType.txt index 32272c7894..f9d7c7af9c 100644 --- a/lib/unicore/extracted/DJoinType.txt +++ b/lib/unicore/extracted/DJoinType.txt @@ -1,8 +1,8 @@ -# DerivedJoiningType-6.0.0.txt -# Date: 2010-08-19, 00:48:10 GMT [MD] +# DerivedJoiningType-6.1.0.txt +# Date: 2011-11-27, 05:10:23 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -58,8 +58,15 @@ 0775..0777 ; D # Lo [3] ARABIC LETTER FARSI YEH WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER FARSI YEH WITH EXTENDED ARABIC-INDIC DIGIT FOUR BELOW 077A..077F ; D # Lo [6] ARABIC LETTER YEH BARREE WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER KAF WITH TWO DOTS ABOVE 07CA..07EA ; D # Lo [33] NKO LETTER A..NKO LETTER JONA RA +0841..0845 ; D # Lo [5] MANDAIC LETTER AB..MANDAIC LETTER USHENNA +0847..0848 ; D # Lo [2] MANDAIC LETTER IT..MANDAIC LETTER ATT +084A..084E ; D # Lo [5] MANDAIC LETTER AK..MANDAIC LETTER AS +0850..0853 ; D # Lo [4] MANDAIC LETTER AP..MANDAIC LETTER AR +0855 ; D # Lo MANDAIC LETTER AT +08A0 ; D # Lo ARABIC LETTER BEH WITH SMALL V BELOW +08A2..08A9 ; D # Lo [8] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER YEH WITH TWO DOTS BELOW AND DOT ABOVE -# Total code points: 189 +# Total code points: 215 # ================================================ @@ -93,8 +100,14 @@ 0771 ; R # Lo ARABIC LETTER REH WITH SMALL ARABIC LETTER TAH AND TWO DOTS 0773..0774 ; R # Lo [2] ARABIC LETTER ALEF WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER ALEF WITH EXTENDED ARABIC-INDIC DIGIT THREE ABOVE 0778..0779 ; R # Lo [2] ARABIC LETTER WAW WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER WAW WITH EXTENDED ARABIC-INDIC DIGIT THREE ABOVE +0840 ; R # Lo MANDAIC LETTER HALQA +0846 ; R # Lo MANDAIC LETTER AZ +0849 ; R # Lo MANDAIC LETTER AKSA +084F ; R # Lo MANDAIC LETTER IN +0854 ; R # Lo MANDAIC LETTER ASH +08AA..08AC ; R # Lo [3] ARABIC LETTER REH WITH LOOP..ARABIC LETTER ROHINGYA YEH -# Total code points: 74 +# Total code points: 82 # ================================================ @@ -126,6 +139,7 @@ 0825..0827 ; T # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; T # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; T # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +08E4..08FE ; T # Mn [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT 0900..0902 ; T # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 093A ; T # Mn DEVANAGARI VOWEL SIGN OE 093C ; T # Mn DEVANAGARI SIGN NUKTA @@ -211,7 +225,7 @@ 1732..1734 ; T # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD 1752..1753 ; T # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U 1772..1773 ; T # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U -17B4..17B5 ; T # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B4..17B5 ; T # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 17B7..17BD ; T # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA 17C6 ; T # Mn KHMER SIGN NIKAHIT 17C9..17D3 ; T # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT @@ -239,6 +253,7 @@ 1B80..1B81 ; T # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR 1BA2..1BA5 ; T # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU 1BA8..1BA9 ; T # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAB ; T # Mn SUNDANESE SIGN VIRAMA 1BE6 ; T # Mn BATAK SIGN TOMPI 1BE8..1BE9 ; T # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE 1BED ; T # Mn BATAK VOWEL SIGN KARO O @@ -249,6 +264,7 @@ 1CD4..1CE0 ; T # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA 1CE2..1CE8 ; T # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL 1CED ; T # Mn VEDIC SIGN TIRYAK +1CF4 ; T # Mn VEDIC TONE CANDRA ABOVE 1DC0..1DE6 ; T # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z 1DFC..1DFF ; T # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW 200B ; T # Cf ZERO WIDTH SPACE @@ -264,11 +280,12 @@ 2CEF..2CF1 ; T # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS 2D7F ; T # Mn TIFINAGH CONSONANT JOINER 2DE0..2DFF ; T # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS -302A..302F ; T # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK +302A..302D ; T # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK 3099..309A ; T # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK A66F ; T # Mn COMBINING CYRILLIC VZMET A670..A672 ; T # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN -A67C..A67D ; T # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A674..A67D ; T # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A69F ; T # Mn COMBINING CYRILLIC LETTER IOTIFIED E A6F0..A6F1 ; T # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS A802 ; T # Mn SYLOTI NAGRI SIGN DVISVARA A806 ; T # Mn SYLOTI NAGRI SIGN HASANTA @@ -292,6 +309,8 @@ AAB2..AAB4 ; T # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U AAB7..AAB8 ; T # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA AABE..AABF ; T # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK AAC1 ; T # Mn TAI VIET TONE MAI THO +AAEC..AAED ; T # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAF6 ; T # Mn MEETEI MAYEK VIRAMA ABE5 ; T # Mn MEETEI MAYEK VOWEL SIGN ANAP ABE8 ; T # Mn MEETEI MAYEK VOWEL SIGN UNAP ABED ; T # Mn MEETEI MAYEK APUN IYEK @@ -312,6 +331,16 @@ FFF9..FFFB ; T # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATI 110B3..110B6 ; T # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B9..110BA ; T # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA 110BD ; T # Cf KAITHI NUMBER SIGN +11100..11102 ; T # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; T # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112D..11134 ; T # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11180..11181 ; T # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +111B6..111BE ; T # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +116AB ; T # Mn TAKRI SIGN ANUSVARA +116AD ; T # Mn TAKRI VOWEL SIGN AA +116B0..116B5 ; T # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B7 ; T # Mn TAKRI SIGN NUKTA +16F8F..16F92 ; T # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 1D167..1D169 ; T # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 1D173..1D17A ; T # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE 1D17B..1D182 ; T # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE @@ -322,6 +351,6 @@ E0001 ; T # Cf LANGUAGE TAG E0020..E007F ; T # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; T # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1344 +# Total code points: 1423 # EOF diff --git a/lib/unicore/extracted/DLineBreak.txt b/lib/unicore/extracted/DLineBreak.txt index 296b31d203..c2bae071d5 100644 --- a/lib/unicore/extracted/DLineBreak.txt +++ b/lib/unicore/extracted/DLineBreak.txt @@ -1,8 +1,8 @@ -# DerivedLineBreak-6.0.0.txt -# Date: 2010-08-19, 00:48:10 GMT [MD] +# DerivedLineBreak-6.1.0.txt +# Date: 2011-11-27, 05:10:24 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ @@ -23,8 +23,8 @@ E000..F8FF ; XX # Co [6400] <private-use-E000>..<private-use-F8FF> F0000..FFFFD ; XX # Co [65534] <private-use-F0000>..<private-use-FFFFD> 100000..10FFFD; XX # Co [65534] <private-use-100000>..<private-use-10FFFD> -# The above property value applies to 781599 code points not listed here. -# Total code points: 919067 +# The above property value applies to 780870 code points not listed here. +# Total code points: 918338 # ================================================ @@ -263,43 +263,18 @@ FF64 ; CL # Po HALFWIDTH IDEOGRAPHIC COMMA 301C ; NS # Pd WAVE DASH 303B ; NS # Lm VERTICAL IDEOGRAPHIC ITERATION MARK 303C ; NS # Lo MASU MARK -3041 ; NS # Lo HIRAGANA LETTER SMALL A -3043 ; NS # Lo HIRAGANA LETTER SMALL I -3045 ; NS # Lo HIRAGANA LETTER SMALL U -3047 ; NS # Lo HIRAGANA LETTER SMALL E -3049 ; NS # Lo HIRAGANA LETTER SMALL O -3063 ; NS # Lo HIRAGANA LETTER SMALL TU -3083 ; NS # Lo HIRAGANA LETTER SMALL YA -3085 ; NS # Lo HIRAGANA LETTER SMALL YU -3087 ; NS # Lo HIRAGANA LETTER SMALL YO -308E ; NS # Lo HIRAGANA LETTER SMALL WA -3095..3096 ; NS # Lo [2] HIRAGANA LETTER SMALL KA..HIRAGANA LETTER SMALL KE 309B..309C ; NS # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK 309D..309E ; NS # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK 30A0 ; NS # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN -30A1 ; NS # Lo KATAKANA LETTER SMALL A -30A3 ; NS # Lo KATAKANA LETTER SMALL I -30A5 ; NS # Lo KATAKANA LETTER SMALL U -30A7 ; NS # Lo KATAKANA LETTER SMALL E -30A9 ; NS # Lo KATAKANA LETTER SMALL O -30C3 ; NS # Lo KATAKANA LETTER SMALL TU -30E3 ; NS # Lo KATAKANA LETTER SMALL YA -30E5 ; NS # Lo KATAKANA LETTER SMALL YU -30E7 ; NS # Lo KATAKANA LETTER SMALL YO -30EE ; NS # Lo KATAKANA LETTER SMALL WA -30F5..30F6 ; NS # Lo [2] KATAKANA LETTER SMALL KA..KATAKANA LETTER SMALL KE 30FB ; NS # Po KATAKANA MIDDLE DOT -30FC..30FE ; NS # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK -31F0..31FF ; NS # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO +30FD..30FE ; NS # Lm [2] KATAKANA ITERATION MARK..KATAKANA VOICED ITERATION MARK A015 ; NS # Lm YI SYLLABLE WU FE54..FE55 ; NS # Po [2] SMALL SEMICOLON..SMALL COLON FF1A..FF1B ; NS # Po [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON FF65 ; NS # Po HALFWIDTH KATAKANA MIDDLE DOT -FF67..FF6F ; NS # Lo [9] HALFWIDTH KATAKANA LETTER SMALL A..HALFWIDTH KATAKANA LETTER SMALL TU -FF70 ; NS # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK FF9E..FF9F ; NS # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK -# Total code points: 77 +# Total code points: 26 # ================================================ @@ -313,7 +288,7 @@ FF9E..FF9F ; NS # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KA 06D4 ; EX # Po ARABIC FULL STOP 07F9 ; EX # Po NKO EXCLAMATION MARK 0F0D..0F11 ; EX # Po [5] TIBETAN MARK SHAD..TIBETAN MARK RIN CHEN SPUNGS SHAD -0F14 ; EX # So TIBETAN MARK GTER TSHEG +0F14 ; EX # Po TIBETAN MARK GTER TSHEG 1802..1803 ; EX # Po [2] MONGOLIAN COMMA..MONGOLIAN FULL STOP 1808..1809 ; EX # Po [2] MONGOLIAN MANCHU COMMA..MONGOLIAN MANCHU FULL STOP 1944..1945 ; EX # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK @@ -364,6 +339,7 @@ FE13..FE14 ; IS # Po [2] PRESENTATION FORM FOR VERTICAL COLON..PRESENTATION 005C ; PR # Po REVERSE SOLIDUS 00A3..00A5 ; PR # Sc [3] POUND SIGN..YEN SIGN 00B1 ; PR # Sm PLUS-MINUS SIGN +058F ; PR # Sc ARMENIAN DRAM SIGN 09FB ; PR # Sc BENGALI GANDA MARK 0AF1 ; PR # Sc GUJARATI RUPEE SIGN 0BF9 ; PR # Sc TAMIL RUPEE SIGN @@ -379,7 +355,7 @@ FF04 ; PR # Sc FULLWIDTH DOLLAR SIGN FFE1 ; PR # Sc FULLWIDTH POUND SIGN FFE5..FFE6 ; PR # Sc [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN -# Total code points: 44 +# Total code points: 45 # ================================================ @@ -448,9 +424,13 @@ AA50..AA59 ; NU # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 104A0..104A9 ; NU # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE 11066..1106F ; NU # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE +110F0..110F9 ; NU # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11136..1113F ; NU # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +111D0..111D9 ; NU # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +116C0..116C9 ; NU # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 1D7CE..1D7FF ; NU # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE -# Total code points: 412 +# Total code points: 452 # ================================================ @@ -519,10 +499,8 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 0561..0587 ; AL # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN 05C0 ; AL # Po HEBREW PUNCTUATION PASEQ 05C3 ; AL # Po HEBREW PUNCTUATION SOF PASUQ -05D0..05EA ; AL # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV -05F0..05F2 ; AL # Lo [3] HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD 05F3..05F4 ; AL # Po [2] HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM -0600..0603 ; AL # Cf [4] ARABIC NUMBER SIGN..ARABIC SIGN SAFHA +0600..0604 ; AL # Cf [5] ARABIC NUMBER SIGN..ARABIC SIGN SAMVAT 0606..0608 ; AL # Sm [3] ARABIC-INDIC CUBE ROOT..ARABIC RAY 060E..060F ; AL # So [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA 0620..063F ; AL # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE @@ -558,6 +536,8 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 0830..083E ; AL # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU 0840..0858 ; AL # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN 085E ; AL # Po MANDAIC PUNCTUATION +08A0 ; AL # Lo ARABIC LETTER BEH WITH SMALL V BELOW +08A2..08AC ; AL # Lo [11] ARABIC LETTER JEEM WITH TWO DOTS ABOVE..ARABIC LETTER ROHINGYA YEH 0904..0939 ; AL # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA 093D ; AL # Lo DEVANAGARI SIGN AVAGRAHA 0950 ; AL # Lo DEVANAGARI OM @@ -598,6 +578,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 0ABD ; AL # Lo GUJARATI SIGN AVAGRAHA 0AD0 ; AL # Lo GUJARATI OM 0AE0..0AE1 ; AL # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL +0AF0 ; AL # Po GUJARATI ABBREVIATION SIGN 0B05..0B0C ; AL # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L 0B0F..0B10 ; AL # Lo [2] ORIYA LETTER E..ORIYA LETTER AI 0B13..0B28 ; AL # Lo [22] ORIYA LETTER O..ORIYA LETTER NA @@ -676,9 +657,12 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 0FD5..0FD8 ; AL # So [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS 104C..104F ; AL # Po [4] MYANMAR SYMBOL LOCATIVE..MYANMAR SYMBOL GENITIVE 10A0..10C5 ; AL # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; AL # L& GEORGIAN CAPITAL LETTER YN +10CD ; AL # L& GEORGIAN CAPITAL LETTER AEN 10D0..10FA ; AL # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN 10FB ; AL # Po GEORGIAN PARAGRAPH SEPARATOR 10FC ; AL # Lm MODIFIER LETTER GEORGIAN NAR +10FD..10FF ; AL # Lo [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN 1200..1248 ; AL # Lo [73] ETHIOPIC SYLLABLE HA..ETHIOPIC SYLLABLE QWA 124A..124D ; AL # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE 1250..1256 ; AL # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO @@ -695,7 +679,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 12D8..1310 ; AL # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA 1312..1315 ; AL # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE 1318..135A ; AL # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA -1360 ; AL # So ETHIOPIC SECTION MARK +1360 ; AL # Po ETHIOPIC SECTION MARK 1362..1368 ; AL # Po [7] ETHIOPIC FULL STOP..ETHIOPIC PARAGRAPH SEPARATOR 1369..137C ; AL # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND 1380..138F ; AL # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE @@ -736,18 +720,20 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 1B74..1B7C ; AL # So [9] BALINESE MUSICAL SYMBOL RIGHT-HAND OPEN DUG..BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING 1B83..1BA0 ; AL # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA 1BAE..1BAF ; AL # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA -1BC0..1BE5 ; AL # Lo [38] BATAK LETTER A..BATAK LETTER U +1BBA..1BE5 ; AL # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U 1BFC..1BFF ; AL # Po [4] BATAK SYMBOL BINDU NA METEK..BATAK SYMBOL BINDU PANGOLAT 1C00..1C23 ; AL # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A 1C4D..1C4F ; AL # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA 1C5A..1C77 ; AL # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; AL # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD +1CC0..1CC7 ; AL # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA 1CD3 ; AL # Po VEDIC SIGN NIHSHVASA 1CE9..1CEC ; AL # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL 1CEE..1CF1 ; AL # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA +1CF5..1CF6 ; AL # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA 1D00..1D2B ; AL # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL -1D2C..1D61 ; AL # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI -1D62..1D77 ; AL # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G +1D2C..1D6A ; AL # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; AL # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D78 ; AL # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; AL # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; AL # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA @@ -941,9 +927,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 2764..2767 ; AL # So [4] HEAVY BLACK HEART..ROTATED FLORAL HEART BULLET 2794..27BF ; AL # So [44] HEAVY WIDE-HEADED RIGHTWARDS ARROW..DOUBLE CURLY LOOP 27C0..27C4 ; AL # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET -27C7..27CA ; AL # Sm [4] OR WITH DOT INSIDE..VERTICAL BAR WITH HORIZONTAL STROKE -27CC ; AL # Sm LONG DIVISION -27CE..27E5 ; AL # Sm [24] SQUARED LOGICAL AND..WHITE SQUARE WITH RIGHTWARDS TICK +27C7..27E5 ; AL # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK 27F0..27FF ; AL # Sm [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW 2800..28FF ; AL # So [256] BRAILLE PATTERN BLANK..BRAILLE PATTERN DOTS-12345678 2900..2982 ; AL # Sm [131] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..Z NOTATION TYPE COLON @@ -957,14 +941,17 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 2B50..2B54 ; AL # So [5] WHITE MEDIUM STAR..WHITE RIGHT-POINTING PENTAGON 2C00..2C2E ; AL # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 2C30..2C5E ; AL # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE -2C60..2C7C ; AL # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J -2C7D ; AL # Lm MODIFIER LETTER CAPITAL V +2C60..2C7B ; AL # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; AL # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V 2C7E..2CE4 ; AL # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI 2CE5..2CEA ; AL # So [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA 2CEB..2CEE ; AL # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; AL # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI 2CFD ; AL # No COPTIC FRACTION ONE HALF 2D00..2D25 ; AL # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE -2D30..2D65 ; AL # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ +2D27 ; AL # L& GEORGIAN SMALL LETTER YN +2D2D ; AL # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; AL # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO 2D6F ; AL # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK 2D80..2D96 ; AL # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE 2DA0..2DA6 ; AL # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO @@ -980,6 +967,8 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 2E1B ; AL # Po TILDE WITH RING ABOVE 2E1E..2E1F ; AL # Po [2] TILDE WITH DOT ABOVE..TILDE WITH DOT BELOW 2E2F ; AL # Lm VERTICAL TILDE +2E32 ; AL # Po TURNED COMMA +2E35..2E39 ; AL # Po [5] TURNED SEMICOLON..TOP HALF SECTION SIGN 4DC0..4DFF ; AL # So [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION A4D0..A4F7 ; AL # Lo [40] LISU LETTER BA..LISU LETTER OE A4F8..A4FD ; AL # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU @@ -1005,8 +994,9 @@ A771..A787 ; AL # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR A788 ; AL # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT A789..A78A ; AL # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN A78B..A78E ; AL # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT -A790..A791 ; AL # L& [2] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER N WITH DESCENDER -A7A0..A7A9 ; AL # L& [10] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN SMALL LETTER S WITH OBLIQUE STROKE +A790..A793 ; AL # L& [4] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH BAR +A7A0..A7AA ; AL # L& [11] LATIN CAPITAL LETTER G WITH OBLIQUE STROKE..LATIN CAPITAL LETTER H WITH HOOK +A7F8..A7F9 ; AL # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE A7FA ; AL # L& LATIN LETTER SMALL CAPITAL TURNED M A7FB..A801 ; AL # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I A803..A805 ; AL # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O @@ -1033,6 +1023,9 @@ AA00..AA28 ; AL # Lo [41] CHAM LETTER A..CHAM LETTER HA AA40..AA42 ; AL # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG AA44..AA4B ; AL # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS AA5C ; AL # Po CHAM PUNCTUATION SPIRAL +AAE0..AAEA ; AL # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAF2 ; AL # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; AL # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK AB01..AB06 ; AL # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO AB09..AB0E ; AL # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO AB11..AB16 ; AL # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO @@ -1041,15 +1034,8 @@ AB28..AB2E ; AL # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO ABC0..ABE2 ; AL # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM FB00..FB06 ; AL # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST FB13..FB17 ; AL # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH -FB1D ; AL # Lo HEBREW LETTER YOD WITH HIRIQ -FB1F..FB28 ; AL # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV FB29 ; AL # Sm HEBREW LETTER ALTERNATIVE PLUS SIGN -FB2A..FB36 ; AL # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH -FB38..FB3C ; AL # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH -FB3E ; AL # Lo HEBREW LETTER MEM WITH DAGESH -FB40..FB41 ; AL # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH -FB43..FB44 ; AL # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH -FB46..FBB1 ; AL # Lo [108] HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM +FB50..FBB1 ; AL # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM FBB2..FBC1 ; AL # Sk [16] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL SMALL TAH BELOW FBD3..FD3D ; AL # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM FD50..FD8F ; AL # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM @@ -1108,6 +1094,8 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 10916..1091B ; AL # No [6] PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THREE 10920..10939 ; AL # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C 1093F ; AL # Po LYDIAN TRIANGULAR MARK +10980..109B7 ; AL # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; AL # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN 10A00 ; AL # Lo KHAROSHTHI LETTER A 10A10..10A13 ; AL # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA 10A15..10A17 ; AL # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA @@ -1130,6 +1118,12 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 11083..110AF ; AL # Lo [45] KAITHI LETTER A..KAITHI LETTER HA 110BB..110BC ; AL # Po [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN 110BD ; AL # Cf KAITHI NUMBER SIGN +110D0..110E8 ; AL # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +11103..11126 ; AL # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11183..111B2 ; AL # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111C1..111C4 ; AL # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111C7 ; AL # Po SHARADA ABBREVIATION SIGN +11680..116AA ; AL # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA 12000..1236E ; AL # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM 12400..12462 ; AL # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER 13000..13257 ; AL # Lo [600] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH O006 @@ -1138,6 +1132,9 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1328A..13378 ; AL # Lo [239] EGYPTIAN HIEROGLYPH O037..EGYPTIAN HIEROGLYPH V011 1337C..1342E ; AL # Lo [179] EGYPTIAN HIEROGLYPH V012..EGYPTIAN HIEROGLYPH AA032 16800..16A38 ; AL # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16F00..16F44 ; AL # Lo [69] MIAO LETTER PA..MIAO LETTER HHA +16F50 ; AL # Lo MIAO LETTER NASALIZATION +16F93..16F9F ; AL # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 1D000..1D0F5 ; AL # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO 1D100..1D126 ; AL # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 1D129..1D164 ; AL # So [60] MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE @@ -1189,6 +1186,40 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1D7AA..1D7C2 ; AL # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA 1D7C3 ; AL # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL 1D7C4..1D7CB ; AL # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1EE00..1EE03 ; AL # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; AL # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; AL # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; AL # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; AL # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; AL # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; AL # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; AL # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; AL # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; AL # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; AL # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; AL # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; AL # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; AL # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; AL # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; AL # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; AL # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; AL # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; AL # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; AL # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; AL # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; AL # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; AL # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; AL # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; AL # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; AL # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; AL # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; AL # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; AL # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; AL # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; AL # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; AL # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; AL # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1EEF0..1EEF1 ; AL # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL 1F000..1F02B ; AL # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK 1F030..1F093 ; AL # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06 1F0A0..1F0AE ; AL # So [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES @@ -1196,6 +1227,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1F0C1..1F0CF ; AL # So [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER 1F0D1..1F0DF ; AL # So [15] PLAYING CARD ACE OF CLUBS..PLAYING CARD WHITE JOKER 1F12E ; AL # So CIRCLED WZ +1F16A..1F16B ; AL # So [2] RAISED MC SIGN..RAISED MD SIGN 1F1E6..1F1FF ; AL # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z 1F300..1F320 ; AL # So [33] CYCLONE..SHOOTING STAR 1F330..1F335 ; AL # So [6] CHESTNUT..CACTUS @@ -1209,24 +1241,14 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1F442..1F4F7 ; AL # So [182] EAR..CAMERA 1F4F9..1F4FC ; AL # So [4] VIDEO CAMERA..VIDEOCASSETTE 1F500..1F53D ; AL # So [62] TWISTED RIGHTWARDS ARROWS..DOWN-POINTING SMALL RED TRIANGLE +1F540..1F543 ; AL # So [4] CIRCLED CROSS POMMEE..NOTCHED LEFT SEMICIRCLE WITH THREE DOTS 1F550..1F567 ; AL # So [24] CLOCK FACE ONE OCLOCK..CLOCK FACE TWELVE-THIRTY -1F5FB..1F5FF ; AL # So [5] MOUNT FUJI..MOYAI -1F601..1F610 ; AL # So [16] GRINNING FACE WITH SMILING EYES..NEUTRAL FACE -1F612..1F614 ; AL # So [3] UNAMUSED FACE..PENSIVE FACE -1F616 ; AL # So CONFOUNDED FACE -1F618 ; AL # So FACE THROWING A KISS -1F61A ; AL # So KISSING FACE WITH CLOSED EYES -1F61C..1F61E ; AL # So [3] FACE WITH STUCK-OUT TONGUE AND WINKING EYE..DISAPPOINTED FACE -1F620..1F625 ; AL # So [6] ANGRY FACE..DISAPPOINTED BUT RELIEVED FACE -1F628..1F62B ; AL # So [4] FEARFUL FACE..TIRED FACE -1F62D ; AL # So LOUDLY CRYING FACE -1F630..1F633 ; AL # So [4] FACE WITH OPEN MOUTH AND COLD SWEAT..FLUSHED FACE -1F635..1F640 ; AL # So [12] DIZZY FACE..WEARY CAT FACE +1F5FB..1F640 ; AL # So [70] MOUNT FUJI..WEARY CAT FACE 1F645..1F64F ; AL # So [11] FACE WITH NO GOOD GESTURE..PERSON WITH FOLDED HANDS 1F680..1F6C5 ; AL # So [70] ROCKET..LEFT LUGGAGE 1F700..1F773 ; AL # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE -# Total code points: 15797 +# Total code points: 16251 # ================================================ @@ -1293,14 +1315,12 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 3300..33FF ; ID # So [256] SQUARE APAATO..SQUARE GAL 3400..4DB5 ; ID # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5 4DB6..4DBF ; ID # Cn [10] <reserved-4DB6>..<reserved-4DBF> -4E00..9FCB ; ID # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB -9FCC..9FFF ; ID # Cn [52] <reserved-9FCC>..<reserved-9FFF> +4E00..9FCC ; ID # Lo [20941] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCC +9FCD..9FFF ; ID # Cn [51] <reserved-9FCD>..<reserved-9FFF> A000..A014 ; ID # Lo [21] YI SYLLABLE IT..YI SYLLABLE E A016..A48C ; ID # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR A490..A4C6 ; ID # So [55] YI RADICAL QOT..YI RADICAL KE -F900..FA2D ; ID # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D -FA2E..FA2F ; ID # Cn [2] <reserved-FA2E>..<reserved-FA2F> -FA30..FA6D ; ID # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D +F900..FA6D ; ID # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D FA6E..FA6F ; ID # Cn [2] <reserved-FA6E>..<reserved-FA6F> FA70..FAD9 ; ID # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 FADA..FAFF ; ID # Cn [38] <reserved-FADA>..<reserved-FAFF> @@ -1406,6 +1426,7 @@ FE19 ; IN # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS 0825..0827 ; CM # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; CM # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; CM # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +08E4..08FE ; CM # Mn [27] ARABIC CURLY FATHA..ARABIC DAMMA WITH DOT 0900..0902 ; CM # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA 0903 ; CM # Mc DEVANAGARI SIGN VISARGA 093A ; CM # Mn DEVANAGARI VOWEL SIGN OE @@ -1549,6 +1570,8 @@ FE19 ; IN # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS 1BA6..1BA7 ; CM # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG 1BA8..1BA9 ; CM # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG 1BAA ; CM # Mc SUNDANESE SIGN PAMAAEH +1BAB ; CM # Mn SUNDANESE SIGN VIRAMA +1BAC..1BAD ; CM # Mc [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA 1BE6 ; CM # Mn BATAK SIGN TOMPI 1BE7 ; CM # Mc BATAK VOWEL SIGN E 1BE8..1BE9 ; CM # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE @@ -1566,7 +1589,8 @@ FE19 ; IN # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS 1CE1 ; CM # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA 1CE2..1CE8 ; CM # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL 1CED ; CM # Mn VEDIC SIGN TIRYAK -1CF2 ; CM # Mc VEDIC SIGN ARDHAVISARGA +1CF2..1CF3 ; CM # Mc [2] VEDIC SIGN ARDHAVISARGA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF4 ; CM # Mn VEDIC TONE CANDRA ABOVE 1DC0..1DE6 ; CM # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z 1DFC..1DFF ; CM # Mn [4] COMBINING DOUBLE INVERTED BREVE BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW 200C..200F ; CM # Cf [4] ZERO WIDTH NON-JOINER..RIGHT-TO-LEFT MARK @@ -1580,11 +1604,13 @@ FE19 ; IN # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS 2CEF..2CF1 ; CM # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS 2D7F ; CM # Mn TIFINAGH CONSONANT JOINER 2DE0..2DFF ; CM # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS -302A..302F ; CM # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK +302A..302D ; CM # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; CM # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK 3099..309A ; CM # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK A66F ; CM # Mn COMBINING CYRILLIC VZMET A670..A672 ; CM # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN -A67C..A67D ; CM # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A674..A67D ; CM # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A69F ; CM # Mn COMBINING CYRILLIC LETTER IOTIFIED E A6F0..A6F1 ; CM # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS A802 ; CM # Mn SYLOTI NAGRI SIGN DVISVARA A806 ; CM # Mn SYLOTI NAGRI SIGN HASANTA @@ -1615,6 +1641,11 @@ AA35..AA36 ; CM # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA AA43 ; CM # Mn CHAM CONSONANT SIGN FINAL NG AA4C ; CM # Mn CHAM CONSONANT SIGN FINAL M AA4D ; CM # Mc CHAM CONSONANT SIGN FINAL H +AAEB ; CM # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; CM # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; CM # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF5 ; CM # Mc MEETEI MAYEK VOWEL SIGN VISARGA +AAF6 ; CM # Mn MEETEI MAYEK VIRAMA ABE3..ABE4 ; CM # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP ABE5 ; CM # Mn MEETEI MAYEK VOWEL SIGN ANAP ABE6..ABE7 ; CM # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP @@ -1642,6 +1673,24 @@ FFF9..FFFB ; CM # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTAT 110B3..110B6 ; CM # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI 110B7..110B8 ; CM # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU 110B9..110BA ; CM # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +11100..11102 ; CM # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; CM # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; CM # Mc CHAKMA VOWEL SIGN E +1112D..11134 ; CM # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11180..11181 ; CM # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; CM # Mc SHARADA SIGN VISARGA +111B3..111B5 ; CM # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; CM # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF..111C0 ; CM # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +116AB ; CM # Mn TAKRI SIGN ANUSVARA +116AC ; CM # Mc TAKRI SIGN VISARGA +116AD ; CM # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; CM # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; CM # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B6 ; CM # Mc TAKRI SIGN VIRAMA +116B7 ; CM # Mn TAKRI SIGN NUKTA +16F51..16F7E ; CM # Mc [46] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN NG +16F8F..16F92 ; CM # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW 1D165..1D166 ; CM # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM 1D167..1D169 ; CM # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 1D16D..1D172 ; CM # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 @@ -1654,7 +1703,7 @@ E0001 ; CM # Cf LANGUAGE TAG E0020..E007F ; CM # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; CM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1483 +# Total code points: 1628 # ================================================ @@ -1724,6 +1773,7 @@ A874..A875 ; BB # Po [2] PHAGS-PA SINGLE HEAD MARK..PHAGS-PA DOUBLE HEAD MA 2E19 ; BA # Po PALM BRANCH 2E2A..2E2D ; BA # Po [4] TWO DOTS OVER ONE DOT PUNCTUATION..FIVE DOT MARK 2E30..2E31 ; BA # Po [2] RING POINT..WORD SEPARATOR MIDDLE DOT +2E33..2E34 ; BA # Po [2] RAISED DOT..RAISED COMMA A4FE..A4FF ; BA # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP A60D ; BA # Po VAI COMMA A60F ; BA # Po VAI QUESTION MARK @@ -1732,9 +1782,9 @@ A8CE..A8CF ; BA # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA A92E..A92F ; BA # Po [2] KAYAH LI SIGN CWI..KAYAH LI SIGN SHYA A9C7..A9C9 ; BA # Po [3] JAVANESE PADA PANGKAT..JAVANESE PADA LUNGSI AA5D..AA5F ; BA # Po [3] CHAM PUNCTUATION DANDA..CHAM PUNCTUATION TRIPLE DANDA +AAF0..AAF1 ; BA # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM ABEB ; BA # Po MEETEI MAYEK CHEIKHEI -10100..10101 ; BA # Po [2] AEGEAN WORD SEPARATOR LINE..AEGEAN WORD SEPARATOR DOT -10102 ; BA # So AEGEAN CHECK MARK +10100..10102 ; BA # Po [3] AEGEAN WORD SEPARATOR LINE..AEGEAN CHECK MARK 1039F ; BA # Po UGARITIC WORD DIVIDER 103D0 ; BA # Po OLD PERSIAN WORD DIVIDER 10857 ; BA # Po IMPERIAL ARAMAIC SECTION SIGN @@ -1743,9 +1793,12 @@ ABEB ; BA # Po MEETEI MAYEK CHEIKHEI 10B39..10B3F ; BA # Po [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION 11047..11048 ; BA # Po [2] BRAHMI DANDA..BRAHMI DOUBLE DANDA 110BE..110C1 ; BA # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +11140..11143 ; BA # Po [4] CHAKMA SECTION MARK..CHAKMA QUESTION MARK +111C5..111C6 ; BA # Po [2] SHARADA DANDA..SHARADA DOUBLE DANDA +111C8 ; BA # Po SHARADA SEPARATOR 12470..12473 ; BA # Po [4] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON -# Total code points: 140 +# Total code points: 151 # ================================================ @@ -1820,7 +1873,7 @@ FFFC ; CB # So OBJECT REPLACEMENT CHARACTER 0EC0..0EC4 ; SA # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI 0EC6 ; SA # Lm LAO KO LA 0EC8..0ECD ; SA # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA -0EDC..0EDD ; SA # Lo [2] LAO HO NO..LAO HO MO +0EDC..0EDF ; SA # Lo [4] LAO HO NO..LAO LETTER KHMU NYO 1000..102A ; SA # Lo [43] MYANMAR LETTER KA..MYANMAR LETTER AU 102B..102C ; SA # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA 102D..1030 ; SA # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU @@ -1854,7 +1907,7 @@ FFFC ; CB # So OBJECT REPLACEMENT CHARACTER 109D ; SA # Mn MYANMAR VOWEL SIGN AITON AI 109E..109F ; SA # So [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION 1780..17B3 ; SA # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU -17B4..17B5 ; SA # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B4..17B5 ; SA # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA 17B6 ; SA # Mc KHMER VOWEL SIGN AA 17B7..17BD ; SA # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA 17BE..17C5 ; SA # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU @@ -1908,21 +1961,20 @@ AADB..AADC ; SA # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG AADD ; SA # Lm TAI VIET SYMBOL SAM AADE..AADF ; SA # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI -# Total code points: 663 +# Total code points: 665 # ================================================ # Line_Break=Ambiguous -00A7 ; AI # So SECTION SIGN +00A7 ; AI # Po SECTION SIGN 00A8 ; AI # Sk DIAERESIS -00AA ; AI # L& FEMININE ORDINAL INDICATOR +00AA ; AI # Lo FEMININE ORDINAL INDICATOR 00B2..00B3 ; AI # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE -00B6 ; AI # So PILCROW SIGN -00B7 ; AI # Po MIDDLE DOT +00B6..00B7 ; AI # Po [2] PILCROW SIGN..MIDDLE DOT 00B8 ; AI # Sk CEDILLA 00B9 ; AI # No SUPERSCRIPT ONE -00BA ; AI # L& MASCULINE ORDINAL INDICATOR +00BA ; AI # Lo MASCULINE ORDINAL INDICATOR 00BC..00BE ; AI # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS 00D7 ; AI # Sm MULTIPLICATION SIGN 00F7 ; AI # Sm DIVISION SIGN @@ -2024,7 +2076,7 @@ AADE..AADF ; SA # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI 2757 ; AI # So HEAVY EXCLAMATION MARK SYMBOL 2776..2793 ; AI # No [30] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN 2B55..2B59 ; AI # So [5] HEAVY LARGE CIRCLE..HEAVY CIRCLED SALTIRE -3248..324F ; AI # So [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE +3248..324F ; AI # No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE FFFD ; AI # So REPLACEMENT CHARACTER 1F100..1F10A ; AI # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA 1F110..1F12D ; AI # So [30] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED CD @@ -2038,8 +2090,9 @@ FFFD ; AI # So REPLACEMENT CHARACTER # Line_Break=Break_Both 2014 ; B2 # Pd EM DASH +2E3A..2E3B ; B2 # Pd [2] TWO-EM DASH..THREE-EM DASH -# Total code points: 1 +# Total code points: 3 # ================================================ @@ -2922,4 +2975,54 @@ D789..D7A3 ; H3 # Lo [27] HANGUL SYLLABLE HIG..HANGUL SYLLABLE HIH # Total code points: 2 +# ================================================ + +# Line_Break=Hebrew_Letter + +05D0..05EA ; HL # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV +05F0..05F2 ; HL # Lo [3] HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD +FB1D ; HL # Lo HEBREW LETTER YOD WITH HIRIQ +FB1F..FB28 ; HL # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV +FB2A..FB36 ; HL # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH +FB38..FB3C ; HL # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH +FB3E ; HL # Lo HEBREW LETTER MEM WITH DAGESH +FB40..FB41 ; HL # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH +FB43..FB44 ; HL # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH +FB46..FB4F ; HL # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE ALEF LAMED + +# Total code points: 74 + +# ================================================ + +# Line_Break=Conditional_Japanese_Starter + +3041 ; CJ # Lo HIRAGANA LETTER SMALL A +3043 ; CJ # Lo HIRAGANA LETTER SMALL I +3045 ; CJ # Lo HIRAGANA LETTER SMALL U +3047 ; CJ # Lo HIRAGANA LETTER SMALL E +3049 ; CJ # Lo HIRAGANA LETTER SMALL O +3063 ; CJ # Lo HIRAGANA LETTER SMALL TU +3083 ; CJ # Lo HIRAGANA LETTER SMALL YA +3085 ; CJ # Lo HIRAGANA LETTER SMALL YU +3087 ; CJ # Lo HIRAGANA LETTER SMALL YO +308E ; CJ # Lo HIRAGANA LETTER SMALL WA +3095..3096 ; CJ # Lo [2] HIRAGANA LETTER SMALL KA..HIRAGANA LETTER SMALL KE +30A1 ; CJ # Lo KATAKANA LETTER SMALL A +30A3 ; CJ # Lo KATAKANA LETTER SMALL I +30A5 ; CJ # Lo KATAKANA LETTER SMALL U +30A7 ; CJ # Lo KATAKANA LETTER SMALL E +30A9 ; CJ # Lo KATAKANA LETTER SMALL O +30C3 ; CJ # Lo KATAKANA LETTER SMALL TU +30E3 ; CJ # Lo KATAKANA LETTER SMALL YA +30E5 ; CJ # Lo KATAKANA LETTER SMALL YU +30E7 ; CJ # Lo KATAKANA LETTER SMALL YO +30EE ; CJ # Lo KATAKANA LETTER SMALL WA +30F5..30F6 ; CJ # Lo [2] KATAKANA LETTER SMALL KA..KATAKANA LETTER SMALL KE +30FC ; CJ # Lm KATAKANA-HIRAGANA PROLONGED SOUND MARK +31F0..31FF ; CJ # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO +FF67..FF6F ; CJ # Lo [9] HALFWIDTH KATAKANA LETTER SMALL A..HALFWIDTH KATAKANA LETTER SMALL TU +FF70 ; CJ # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK + +# Total code points: 51 + # EOF diff --git a/lib/unicore/extracted/DNumType.txt b/lib/unicore/extracted/DNumType.txt index e1595fa29c..92866603e7 100644 --- a/lib/unicore/extracted/DNumType.txt +++ b/lib/unicore/extracted/DNumType.txt @@ -1,14 +1,22 @@ -# DerivedNumericType-6.0.0.txt -# Date: 2010-08-19, 00:48:13 GMT [MD] +# DerivedNumericType-6.1.0.txt +# Date: 2011-08-23, 00:47:14 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # ================================================ -# Numeric Type (from UnicodeData.txt, field 6/7/8 plus Unihan Database: see UAX #44: http://www.unicode.org/reports/tr44/) +# Derived Property: Numeric_Type +# The values are based on fields 6-8 of UnicodeData.txt, plus the fields +# kAccountingNumeric, kOtherNumeric, kPrimaryNumeric in the Unicode Han Database (Unihan). +# The derivations for these values are as follows. +# Numeric_Type=Decimal: When there is a value in field 6. +# Numeric_Type=Digit: When there is a value in field 7, but not in field 6. +# Numeric_Type=Numeric: When there are values for kAccountingNumeric, kOtherNumeric, kPrimaryNumeric, +# or there is a value in field 8, but not in field 7. +# Numeric_Type=None: Otherwise # All code points not explicitly listed for Numeric_Type # have the value None. @@ -45,6 +53,7 @@ 3038..303A ; Numeric # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY 3192..3195 ; Numeric # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK 3220..3229 ; Numeric # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN +3248..324F ; Numeric # No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE 3251..325F ; Numeric # No [15] CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE 3280..3289 ; Numeric # No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN 32B1..32BF ; Numeric # No [15] CIRCLED NUMBER THIRTY SIX..CIRCLED NUMBER FIFTY @@ -143,7 +152,7 @@ F9FD ; Numeric # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD 2626D ; Numeric # Lo CJK UNIFIED IDEOGRAPH-2626D 2F890 ; Numeric # Lo CJK COMPATIBILITY IDEOGRAPH-2F890 -# Total code points: 629 +# Total code points: 637 # ================================================ @@ -209,8 +218,12 @@ ABF0..ABF9 ; Decimal # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT N FF10..FF19 ; Decimal # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 104A0..104A9 ; Decimal # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE 11066..1106F ; Decimal # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE +110F0..110F9 ; Decimal # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11136..1113F ; Decimal # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +111D0..111D9 ; Decimal # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +116C0..116C9 ; Decimal # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE 1D7CE..1D7FF ; Decimal # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE -# Total code points: 420 +# Total code points: 460 # EOF diff --git a/lib/unicore/extracted/DNumValues.txt b/lib/unicore/extracted/DNumValues.txt index 654bb86b67..02d408eb4b 100644 --- a/lib/unicore/extracted/DNumValues.txt +++ b/lib/unicore/extracted/DNumValues.txt @@ -1,19 +1,28 @@ -# DerivedNumericValues-6.0.0.txt -# Date: 2010-08-19, 00:48:14 GMT [MD] +# DerivedNumericValues-6.1.0.txt +# Date: 2011-08-19, 17:58:36 GMT [MD] # # Unicode Character Database -# Copyright (c) 1991-2010 Unicode, Inc. +# Copyright (c) 1991-2011 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # ================================================ -# Numeric Values (from UnicodeData.txt, field 6/7/8) +# Derived Property: Numeric_Value +# The values are based on field 8 of UnicodeData.txt, plus the fields +# kAccountingNumeric, kOtherNumeric, kPrimaryNumeric in the Unicode Han Database (Unihan). +# The derivations for these values are as follows. +# Numeric_Value = the value of kAccountingNumeric, kOtherNumeric, or kPrimaryNumeric, if they exist; otherwise +# Numeric_Value = the value of field 8, if it exists; otherwise +# Numeric_Value = NaN +# # WARNING: Certain values, such as 0.16666667, are repeating fractions # Although they are only printed with a limited number of decimal places # in this file, they should be expressed to the limits of the precision # available when used. +# # The third field is empty; it used to be a copy of the numeric type. +# # A fourth field was added to this extracted data as of # Unicode 5.1.0, expressing the same numeric value either as # a whole integer where possible or as a rational fraction, e.g. "1/6". @@ -77,6 +86,10 @@ FF10 ; 0.0 ; ; 0 # Nd FULLWIDTH DIGIT ZERO 1018A ; 0.0 ; ; 0 # No GREEK ZERO SIGN 104A0 ; 0.0 ; ; 0 # Nd OSMANYA DIGIT ZERO 11066 ; 0.0 ; ; 0 # Nd BRAHMI DIGIT ZERO +110F0 ; 0.0 ; ; 0 # Nd SORA SOMPENG DIGIT ZERO +11136 ; 0.0 ; ; 0 # Nd CHAKMA DIGIT ZERO +111D0 ; 0.0 ; ; 0 # Nd SHARADA DIGIT ZERO +116C0 ; 0.0 ; ; 0 # Nd TAKRI DIGIT ZERO 1D7CE ; 0.0 ; ; 0 # Nd MATHEMATICAL BOLD DIGIT ZERO 1D7D8 ; 0.0 ; ; 0 # Nd MATHEMATICAL DOUBLE-STRUCK DIGIT ZERO 1D7E2 ; 0.0 ; ; 0 # Nd MATHEMATICAL SANS-SERIF DIGIT ZERO @@ -84,7 +97,7 @@ FF10 ; 0.0 ; ; 0 # Nd FULLWIDTH DIGIT ZERO 1D7F6 ; 0.0 ; ; 0 # Nd MATHEMATICAL MONOSPACE DIGIT ZERO 1F100..1F101 ; 0.0 ; ; 0 # No [2] DIGIT ZERO FULL STOP..DIGIT ZERO COMMA -# Total code points: 56 +# Total code points: 60 # ================================================ @@ -323,6 +336,10 @@ FF11 ; 1.0 ; ; 1 # Nd FULLWIDTH DIGIT ONE 10E60 ; 1.0 ; ; 1 # No RUMI DIGIT ONE 11052 ; 1.0 ; ; 1 # No BRAHMI NUMBER ONE 11067 ; 1.0 ; ; 1 # Nd BRAHMI DIGIT ONE +110F1 ; 1.0 ; ; 1 # Nd SORA SOMPENG DIGIT ONE +11137 ; 1.0 ; ; 1 # Nd CHAKMA DIGIT ONE +111D1 ; 1.0 ; ; 1 # Nd SHARADA DIGIT ONE +116C1 ; 1.0 ; ; 1 # Nd TAKRI DIGIT ONE 12415 ; 1.0 ; ; 1 # Nl CUNEIFORM NUMERIC SIGN ONE GESH2 1241E ; 1.0 ; ; 1 # Nl CUNEIFORM NUMERIC SIGN ONE GESHU 1242C ; 1.0 ; ; 1 # Nl CUNEIFORM NUMERIC SIGN ONE SHARU @@ -338,7 +355,7 @@ FF11 ; 1.0 ; ; 1 # Nd FULLWIDTH DIGIT ONE 1F102 ; 1.0 ; ; 1 # No DIGIT ONE COMMA 2092A ; 1.0 ; ; 1 # Lo CJK UNIFIED IDEOGRAPH-2092A -# Total code points: 93 +# Total code points: 97 # ================================================ @@ -424,6 +441,10 @@ FF12 ; 2.0 ; ; 2 # Nd FULLWIDTH DIGIT TWO 10E61 ; 2.0 ; ; 2 # No RUMI DIGIT TWO 11053 ; 2.0 ; ; 2 # No BRAHMI NUMBER TWO 11068 ; 2.0 ; ; 2 # Nd BRAHMI DIGIT TWO +110F2 ; 2.0 ; ; 2 # Nd SORA SOMPENG DIGIT TWO +11138 ; 2.0 ; ; 2 # Nd CHAKMA DIGIT TWO +111D2 ; 2.0 ; ; 2 # Nd SHARADA DIGIT TWO +116C2 ; 2.0 ; ; 2 # Nd TAKRI DIGIT TWO 12400 ; 2.0 ; ; 2 # Nl CUNEIFORM NUMERIC SIGN TWO ASH 12416 ; 2.0 ; ; 2 # Nl CUNEIFORM NUMERIC SIGN TWO GESH2 1241F ; 2.0 ; ; 2 # Nl CUNEIFORM NUMERIC SIGN TWO GESHU @@ -442,7 +463,7 @@ FF12 ; 2.0 ; ; 2 # Nd FULLWIDTH DIGIT TWO 1F103 ; 2.0 ; ; 2 # No DIGIT TWO COMMA 22390 ; 2.0 ; ; 2 # Lo CJK UNIFIED IDEOGRAPH-22390 -# Total code points: 96 +# Total code points: 100 # ================================================ @@ -522,6 +543,10 @@ FF13 ; 3.0 ; ; 3 # Nd FULLWIDTH DIGIT THREE 10E62 ; 3.0 ; ; 3 # No RUMI DIGIT THREE 11054 ; 3.0 ; ; 3 # No BRAHMI NUMBER THREE 11069 ; 3.0 ; ; 3 # Nd BRAHMI DIGIT THREE +110F3 ; 3.0 ; ; 3 # Nd SORA SOMPENG DIGIT THREE +11139 ; 3.0 ; ; 3 # Nd CHAKMA DIGIT THREE +111D3 ; 3.0 ; ; 3 # Nd SHARADA DIGIT THREE +116C3 ; 3.0 ; ; 3 # Nd TAKRI DIGIT THREE 12401 ; 3.0 ; ; 3 # Nl CUNEIFORM NUMERIC SIGN THREE ASH 12408 ; 3.0 ; ; 3 # Nl CUNEIFORM NUMERIC SIGN THREE DISH 12417 ; 3.0 ; ; 3 # Nl CUNEIFORM NUMERIC SIGN THREE GESH2 @@ -544,7 +569,7 @@ FF13 ; 3.0 ; ; 3 # Nd FULLWIDTH DIGIT THREE 22998 ; 3.0 ; ; 3 # Lo CJK UNIFIED IDEOGRAPH-22998 23B1B ; 3.0 ; ; 3 # Lo CJK UNIFIED IDEOGRAPH-23B1B -# Total code points: 98 +# Total code points: 102 # ================================================ @@ -618,6 +643,10 @@ FF14 ; 4.0 ; ; 4 # Nd FULLWIDTH DIGIT FOUR 10E63 ; 4.0 ; ; 4 # No RUMI DIGIT FOUR 11055 ; 4.0 ; ; 4 # No BRAHMI NUMBER FOUR 1106A ; 4.0 ; ; 4 # Nd BRAHMI DIGIT FOUR +110F4 ; 4.0 ; ; 4 # Nd SORA SOMPENG DIGIT FOUR +1113A ; 4.0 ; ; 4 # Nd CHAKMA DIGIT FOUR +111D4 ; 4.0 ; ; 4 # Nd SHARADA DIGIT FOUR +116C4 ; 4.0 ; ; 4 # Nd TAKRI DIGIT FOUR 12402 ; 4.0 ; ; 4 # Nl CUNEIFORM NUMERIC SIGN FOUR ASH 12409 ; 4.0 ; ; 4 # Nl CUNEIFORM NUMERIC SIGN FOUR DISH 1240F ; 4.0 ; ; 4 # Nl CUNEIFORM NUMERIC SIGN FOUR U @@ -640,7 +669,7 @@ FF14 ; 4.0 ; ; 4 # Nd FULLWIDTH DIGIT FOUR 200E2 ; 4.0 ; ; 4 # Lo CJK UNIFIED IDEOGRAPH-200E2 2626D ; 4.0 ; ; 4 # Lo CJK UNIFIED IDEOGRAPH-2626D -# Total code points: 89 +# Total code points: 93 # ================================================ @@ -717,6 +746,10 @@ FF15 ; 5.0 ; ; 5 # Nd FULLWIDTH DIGIT FIVE 10E64 ; 5.0 ; ; 5 # No RUMI DIGIT FIVE 11056 ; 5.0 ; ; 5 # No BRAHMI NUMBER FIVE 1106B ; 5.0 ; ; 5 # Nd BRAHMI DIGIT FIVE +110F5 ; 5.0 ; ; 5 # Nd SORA SOMPENG DIGIT FIVE +1113B ; 5.0 ; ; 5 # Nd CHAKMA DIGIT FIVE +111D5 ; 5.0 ; ; 5 # Nd SHARADA DIGIT FIVE +116C5 ; 5.0 ; ; 5 # Nd TAKRI DIGIT FIVE 12403 ; 5.0 ; ; 5 # Nl CUNEIFORM NUMERIC SIGN FIVE ASH 1240A ; 5.0 ; ; 5 # Nl CUNEIFORM NUMERIC SIGN FIVE DISH 12410 ; 5.0 ; ; 5 # Nl CUNEIFORM NUMERIC SIGN FIVE U @@ -736,7 +769,7 @@ FF15 ; 5.0 ; ; 5 # Nd FULLWIDTH DIGIT FIVE 1F106 ; 5.0 ; ; 5 # No DIGIT FIVE COMMA 20121 ; 5.0 ; ; 5 # Lo CJK UNIFIED IDEOGRAPH-20121 -# Total code points: 86 +# Total code points: 90 # ================================================ @@ -809,6 +842,10 @@ FF16 ; 6.0 ; ; 6 # Nd FULLWIDTH DIGIT SIX 10E65 ; 6.0 ; ; 6 # No RUMI DIGIT SIX 11057 ; 6.0 ; ; 6 # No BRAHMI NUMBER SIX 1106C ; 6.0 ; ; 6 # Nd BRAHMI DIGIT SIX +110F6 ; 6.0 ; ; 6 # Nd SORA SOMPENG DIGIT SIX +1113C ; 6.0 ; ; 6 # Nd CHAKMA DIGIT SIX +111D6 ; 6.0 ; ; 6 # Nd SHARADA DIGIT SIX +116C6 ; 6.0 ; ; 6 # Nd TAKRI DIGIT SIX 12404 ; 6.0 ; ; 6 # Nl CUNEIFORM NUMERIC SIGN SIX ASH 1240B ; 6.0 ; ; 6 # Nl CUNEIFORM NUMERIC SIGN SIX DISH 12411 ; 6.0 ; ; 6 # Nl CUNEIFORM NUMERIC SIGN SIX U @@ -825,7 +862,7 @@ FF16 ; 6.0 ; ; 6 # Nd FULLWIDTH DIGIT SIX 1F107 ; 6.0 ; ; 6 # No DIGIT SIX COMMA 20AEA ; 6.0 ; ; 6 # Lo CJK UNIFIED IDEOGRAPH-20AEA -# Total code points: 78 +# Total code points: 82 # ================================================ @@ -896,6 +933,10 @@ FF17 ; 7.0 ; ; 7 # Nd FULLWIDTH DIGIT SEVEN 10E66 ; 7.0 ; ; 7 # No RUMI DIGIT SEVEN 11058 ; 7.0 ; ; 7 # No BRAHMI NUMBER SEVEN 1106D ; 7.0 ; ; 7 # Nd BRAHMI DIGIT SEVEN +110F7 ; 7.0 ; ; 7 # Nd SORA SOMPENG DIGIT SEVEN +1113D ; 7.0 ; ; 7 # Nd CHAKMA DIGIT SEVEN +111D7 ; 7.0 ; ; 7 # Nd SHARADA DIGIT SEVEN +116C7 ; 7.0 ; ; 7 # Nd TAKRI DIGIT SEVEN 12405 ; 7.0 ; ; 7 # Nl CUNEIFORM NUMERIC SIGN SEVEN ASH 1240C ; 7.0 ; ; 7 # Nl CUNEIFORM NUMERIC SIGN SEVEN DISH 12412 ; 7.0 ; ; 7 # Nl CUNEIFORM NUMERIC SIGN SEVEN U @@ -911,7 +952,7 @@ FF17 ; 7.0 ; ; 7 # Nd FULLWIDTH DIGIT SEVEN 1F108 ; 7.0 ; ; 7 # No DIGIT SEVEN COMMA 20001 ; 7.0 ; ; 7 # Lo CJK UNIFIED IDEOGRAPH-20001 -# Total code points: 77 +# Total code points: 81 # ================================================ @@ -980,6 +1021,10 @@ FF18 ; 8.0 ; ; 8 # Nd FULLWIDTH DIGIT EIGHT 10E67 ; 8.0 ; ; 8 # No RUMI DIGIT EIGHT 11059 ; 8.0 ; ; 8 # No BRAHMI NUMBER EIGHT 1106E ; 8.0 ; ; 8 # Nd BRAHMI DIGIT EIGHT +110F8 ; 8.0 ; ; 8 # Nd SORA SOMPENG DIGIT EIGHT +1113E ; 8.0 ; ; 8 # Nd CHAKMA DIGIT EIGHT +111D8 ; 8.0 ; ; 8 # Nd SHARADA DIGIT EIGHT +116C8 ; 8.0 ; ; 8 # Nd TAKRI DIGIT EIGHT 12406 ; 8.0 ; ; 8 # Nl CUNEIFORM NUMERIC SIGN EIGHT ASH 1240D ; 8.0 ; ; 8 # Nl CUNEIFORM NUMERIC SIGN EIGHT DISH 12413 ; 8.0 ; ; 8 # Nl CUNEIFORM NUMERIC SIGN EIGHT U @@ -994,7 +1039,7 @@ FF18 ; 8.0 ; ; 8 # Nd FULLWIDTH DIGIT EIGHT 1D7FE ; 8.0 ; ; 8 # Nd MATHEMATICAL MONOSPACE DIGIT EIGHT 1F109 ; 8.0 ; ; 8 # No DIGIT EIGHT COMMA -# Total code points: 73 +# Total code points: 77 # ================================================ @@ -1064,6 +1109,10 @@ FF19 ; 9.0 ; ; 9 # Nd FULLWIDTH DIGIT NINE 10E68 ; 9.0 ; ; 9 # No RUMI DIGIT NINE 1105A ; 9.0 ; ; 9 # No BRAHMI NUMBER NINE 1106F ; 9.0 ; ; 9 # Nd BRAHMI DIGIT NINE +110F9 ; 9.0 ; ; 9 # Nd SORA SOMPENG DIGIT NINE +1113F ; 9.0 ; ; 9 # Nd CHAKMA DIGIT NINE +111D9 ; 9.0 ; ; 9 # Nd SHARADA DIGIT NINE +116C9 ; 9.0 ; ; 9 # Nd TAKRI DIGIT NINE 12407 ; 9.0 ; ; 9 # Nl CUNEIFORM NUMERIC SIGN NINE ASH 1240E ; 9.0 ; ; 9 # Nl CUNEIFORM NUMERIC SIGN NINE DISH 12414 ; 9.0 ; ; 9 # Nl CUNEIFORM NUMERIC SIGN NINE U @@ -1079,7 +1128,7 @@ FF19 ; 9.0 ; ; 9 # Nd FULLWIDTH DIGIT NINE 1F10A ; 9.0 ; ; 9 # No DIGIT NINE COMMA 2F890 ; 9.0 ; ; 9 # Lo CJK COMPATIBILITY IDEOGRAPH-2F890 -# Total code points: 77 +# Total code points: 81 # ================================================ @@ -1097,6 +1146,7 @@ FF19 ; 9.0 ; ; 9 # Nd FULLWIDTH DIGIT NINE 2793 ; 10.0 ; ; 10 # No DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN 3038 ; 10.0 ; ; 10 # Nl HANGZHOU NUMERAL TEN 3229 ; 10.0 ; ; 10 # No PARENTHESIZED IDEOGRAPH TEN +3248 ; 10.0 ; ; 10 # No CIRCLED NUMBER TEN ON BLACK SQUARE 3289 ; 10.0 ; ; 10 # No CIRCLED IDEOGRAPH TEN 4EC0 ; 10.0 ; ; 10 # Lo CJK UNIFIED IDEOGRAPH-4EC0 5341 ; 10.0 ; ; 10 # Lo CJK UNIFIED IDEOGRAPH-5341 @@ -1119,7 +1169,7 @@ F9FD ; 10.0 ; ; 10 # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD 1105B ; 10.0 ; ; 10 # No BRAHMI NUMBER TEN 1D369 ; 10.0 ; ; 10 # No COUNTING ROD TENS DIGIT ONE -# Total code points: 39 +# Total code points: 40 # ================================================ @@ -1218,6 +1268,7 @@ F9FD ; 10.0 ; ; 10 # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD 249B ; 20.0 ; ; 20 # No NUMBER TWENTY FULL STOP 24F4 ; 20.0 ; ; 20 # No NEGATIVE CIRCLED NUMBER TWENTY 3039 ; 20.0 ; ; 20 # Nl HANGZHOU NUMERAL TWENTY +3249 ; 20.0 ; ; 20 # No CIRCLED NUMBER TWENTY ON BLACK SQUARE 5344 ; 20.0 ; ; 20 # Lo CJK UNIFIED IDEOGRAPH-5344 5EFF ; 20.0 ; ; 20 # Lo CJK UNIFIED IDEOGRAPH-5EFF 10111 ; 20.0 ; ; 20 # No AEGEAN NUMBER TWENTY @@ -1231,7 +1282,7 @@ F9FD ; 10.0 ; ; 10 # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD 1105C ; 20.0 ; ; 20 # No BRAHMI NUMBER TWENTY 1D36A ; 20.0 ; ; 20 # No COUNTING ROD TENS DIGIT TWO -# Total code points: 18 +# Total code points: 19 # ================================================ @@ -1291,6 +1342,7 @@ F9FD ; 10.0 ; ; 10 # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD 1374 ; 30.0 ; ; 30 # No ETHIOPIC NUMBER THIRTY 303A ; 30.0 ; ; 30 # Nl HANGZHOU NUMERAL THIRTY +324A ; 30.0 ; ; 30 # No CIRCLED NUMBER THIRTY ON BLACK SQUARE 325A ; 30.0 ; ; 30 # No CIRCLED NUMBER THIRTY 5345 ; 30.0 ; ; 30 # Lo CJK UNIFIED IDEOGRAPH-5345 10112 ; 30.0 ; ; 30 # No AEGEAN NUMBER THIRTY @@ -1300,7 +1352,7 @@ F9FD ; 10.0 ; ; 10 # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD 1D36B ; 30.0 ; ; 30 # No COUNTING ROD TENS DIGIT THREE 20983 ; 30.0 ; ; 30 # Lo CJK UNIFIED IDEOGRAPH-20983 -# Total code points: 10 +# Total code points: 11 # ================================================ @@ -1359,6 +1411,7 @@ F9FD ; 10.0 ; ; 10 # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD # ================================================ 1375 ; 40.0 ; ; 40 # No ETHIOPIC NUMBER FORTY +324B ; 40.0 ; ; 40 # No CIRCLED NUMBER FORTY ON BLACK SQUARE 32B5 ; 40.0 ; ; 40 # No CIRCLED NUMBER FORTY 534C ; 40.0 ; ; 40 # Lo CJK UNIFIED IDEOGRAPH-534C 10113 ; 40.0 ; ; 40 # No AEGEAN NUMBER FORTY @@ -1368,7 +1421,7 @@ F9FD ; 10.0 ; ; 10 # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD 2098C ; 40.0 ; ; 40 # Lo CJK UNIFIED IDEOGRAPH-2098C 2099C ; 40.0 ; ; 40 # Lo CJK UNIFIED IDEOGRAPH-2099C -# Total code points: 9 +# Total code points: 10 # ================================================ @@ -1430,6 +1483,7 @@ F9FD ; 10.0 ; ; 10 # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD 216C ; 50.0 ; ; 50 # Nl ROMAN NUMERAL FIFTY 217C ; 50.0 ; ; 50 # Nl SMALL ROMAN NUMERAL FIFTY 2186 ; 50.0 ; ; 50 # Nl ROMAN NUMERAL FIFTY EARLY FORM +324C ; 50.0 ; ; 50 # No CIRCLED NUMBER FIFTY ON BLACK SQUARE 32BF ; 50.0 ; ; 50 # No CIRCLED NUMBER FIFTY 10114 ; 50.0 ; ; 50 # No AEGEAN NUMBER FIFTY 10144 ; 50.0 ; ; 50 # Nl GREEK ACROPHONIC ATTIC FIFTY @@ -1443,37 +1497,40 @@ F9FD ; 10.0 ; ; 10 # Lo CJK COMPATIBILITY IDEOGRAPH-F9FD 1105F ; 50.0 ; ; 50 # No BRAHMI NUMBER FIFTY 1D36D ; 50.0 ; ; 50 # No COUNTING ROD TENS DIGIT FIVE -# Total code points: 19 +# Total code points: 20 # ================================================ 1377 ; 60.0 ; ; 60 # No ETHIOPIC NUMBER SIXTY +324D ; 60.0 ; ; 60 # No CIRCLED NUMBER SIXTY ON BLACK SQUARE 10115 ; 60.0 ; ; 60 # No AEGEAN NUMBER SIXTY 10E6E ; 60.0 ; ; 60 # No RUMI NUMBER SIXTY 11060 ; 60.0 ; ; 60 # No BRAHMI NUMBER SIXTY 1D36E ; 60.0 ; ; 60 # No COUNTING ROD TENS DIGIT SIX -# Total code points: 5 +# Total code points: 6 # ================================================ 1378 ; 70.0 ; ; 70 # No ETHIOPIC NUMBER SEVENTY +324E ; 70.0 ; ; 70 # No CIRCLED NUMBER SEVENTY ON BLACK SQUARE 10116 ; 70.0 ; ; 70 # No AEGEAN NUMBER SEVENTY 10E6F ; 70.0 ; ; 70 # No RUMI NUMBER SEVENTY 11061 ; 70.0 ; ; 70 # No BRAHMI NUMBER SEVENTY 1D36F ; 70.0 ; ; 70 # No COUNTING ROD TENS DIGIT SEVEN -# Total code points: 5 +# Total code points: 6 # ================================================ 1379 ; 80.0 ; ; 80 # No ETHIOPIC NUMBER EIGHTY +324F ; 80.0 ; ; 80 # No CIRCLED NUMBER EIGHTY ON BLACK SQUARE 10117 ; 80.0 ; ; 80 # No AEGEAN NUMBER EIGHTY 10E70 ; 80.0 ; ; 80 # No RUMI NUMBER EIGHTY 11062 ; 80.0 ; ; 80 # No BRAHMI NUMBER EIGHTY 1D370 ; 80.0 ; ; 80 # No COUNTING ROD TENS DIGIT EIGHT -# Total code points: 5 +# Total code points: 6 # ================================================ diff --git a/lib/unicore/mktables b/lib/unicore/mktables index 7824fd4986..2aaaa56b4f 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -8596,15 +8596,6 @@ sub finish_property_setup { } } - # This entry is still missing as of 6.0, perhaps because no short name for - # it. - if (-e 'NameAliases.txt') { - my $aliases = property_ref('Name_Alias'); - if (! defined $aliases) { - $aliases = Property->new('Name_Alias'); - } - } - # These are used so much, that we set globals for them. $gc = property_ref('General_Category'); $block = property_ref('Block'); @@ -12313,11 +12304,14 @@ sub compile_perl() { $perl_charname->add_duplicate($abbreviations{$value}, $value, Replace => $MULTIPLE_AFTER); } $alias_sentence = <<END; -The Name_Alias property adds duplicate code point entries with a corrected -name. The original (less correct, but still valid) name will be physically -last. +The Name_Alias property adds duplicate code point entries that are +alternatives to the original name. If an addition is a corrected +name, it will be physically first in the table. The original (less correct, +but still valid) name will be next; then any alternatives, in no particular +order; and finally any abbreviations, again in no particular order. END } + my $comment; if (@composition <= 2) { # Always at least 2 $comment = join " and ", @composition; @@ -12329,8 +12323,8 @@ END $perl_charname->add_comment(join_lines( <<END This file is for charnames.pm. It is the union of the $comment properties. -Unicode_1_Name entries are used only for otherwise nameless code -points. +Unicode_1_Name entries are used only for nameless code points in the Name +property. $alias_sentence This file doesn't include the algorithmically determinable names. For those, use 'unicore/Name.pm' diff --git a/lib/unicore/version b/lib/unicore/version index 09b254e90c..dfda3e0b4f 100644 --- a/lib/unicore/version +++ b/lib/unicore/version @@ -1 +1 @@ -6.0.0 +6.1.0 diff --git a/pod/perldelta.pod b/pod/perldelta.pod index fdf0a0529b..215be08d49 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -66,6 +66,70 @@ Full details are in L<perlfunc/fc>. The C<_> character in subroutine prototypes is now allowed before C<@> or C<%>. +=head1 Supports (I<almost>) Unicode 6.1 + +Besides the addition of whole new scripts, and new characters in +existing scripts, this new version of Unicode, as always, makes some +changes to existing characters. One change that may trip up some +applications is that the General Category of two characters in the +Latin-1 range, PILCROW SIGN and SECTION SIGN, has been changed from +Other_Symbol to Other_Punctuation. The same change has been made for +a character in each of Tibetan, Ethiopic, and Aegean. +The code points U+3248..U+324F (CIRCLED NUMBER TEN ON BLACK SQUARE +through CIRCLED NUMBER EIGHTY ON BLACK SQUARE) have had their General +Category changed from Other_Symbol to Other_Numeric. The Line Break +property has changes for Hebrew and Japanese; and as a consequence of +other changes in 6.1, the Perl regular expression construct C<\X> now +works differently for some characters in Thai and Lao. + +New aliases (synonyms) have been defined for many property values; +these, along with the previously existing ones, are all cross indexed in +L<perluniprops>. + +The return value of C<charnames::viacode> is affected by other changes. +One of these is that the preferred name (which is what C<viacode> +returns) for the character at U+2118 has been changed from SCRIPT CAPITAL P +to WEIERSTRASS ELLIPTIC FUNCTION. But most of these changes are the +fallout of the mistake Unicode 6.0 made in naming a character used in +Japanese cell phones to be "BELL", which conflicts with the long +standing industry use of (and Unicode's recommendation to use) that name +to mean the ASCII control character at U+0007. As a result, that name +has been deprecated in Perl since v5.14; and any use of it will raise a +warning message (unless turned off). The name "ALERT" is now the +preferred name for this code point, with "BEL" being an acceptable short +form. The name for the new cell phone character, at code point U+1F514, +remains undefined in this version of Perl (hence we don't quite +implement all of Unicode 6.1), but starting in v5.18, BELL will mean +this character, and not U+0007. + +Unicode has taken steps to make sure that this sort of mistake does not +happen again. The Standard now includes all the generally accepted +names and abbreviations for control characters, whereas previously it +didn't. This means that all the names that Perl had previously +deprecated (except BELL) are no longer deprecated, such as FILE +SEPARATOR. Also, the names for four rarely used characters are subtly +different (a hyphen instead of a space) than before: + + Code point Old Name New Name + U+008E SINGLE-SHIFT 2 SINGLE-SHIFT-2 + U+008F SINGLE-SHIFT 3 SINGLE-SHIFT-3 + U+0091 PRIVATE USE 1 PRIVATE USE-1 + U+0092 PRIVATE USE 2 PRIVATE USE-2 + +Perl will accept either name as input, but C<charnames::viacode> now +returns the new name. + +Additional name abbreviations are accepted: +SP for SPACE; +TAB for CHARACTER TABULATION; +NEW LINE, END OF LINE, NL, and EOL for LINE FEED; +LOCKING-SHIFT ONE for SHIFT OUT; +LOCKING-SHIFT ZERO for SHIFT IN; +and ZWNBSP for ZERO WIDTH NO-BREAK SPACE. + +More details on this version of Unicode are provided in +L<http://www.unicode.org/versions/Unicode6.1.0/>. + =head1 Security XXX Any security-related notices go here. In particular, any security @@ -103,6 +167,10 @@ core typemap: T_DATAUNIT and T_CALLBACK. If you are, against all odds, a user of these, please see the instructions on how to regain them in L<perlxstypemap>. +=head2 Unicode 6.1 has incompatibilities with Unicode 6.0 + +These are detailed in L</Supports (almost) Unicode 6.1> above. + =head1 Deprecations XXX Any deprecated features, syntax, modules etc. should be listed here. diff --git a/t/re/pat_advanced.t b/t/re/pat_advanced.t index 4d88190a5e..43db3d4334 100644 --- a/t/re/pat_advanced.t +++ b/t/re/pat_advanced.t @@ -1743,7 +1743,7 @@ EOP my @isPunct = grep {/[[:punct:]]/ != /\p{IsPunct}/} map {chr} 0x80 .. 0xff; - is(join ('', @isPunct), "\xa1\xab\xb7\xbb\xbf", # ¡ « · » ¿ + is(join ('', @isPunct), "\xa1\xa7\xab\xb6\xb7\xbb\xbf", # ¡ « · » ¿ 'IsPunct disagrees with [:punct:] outside ASCII'); my @isPunctLatin1 = eval q { |