diff options
author | Andrew John Hughes <gnu_andrew@member.fsf.org> | 2005-06-11 14:27:18 +0000 |
---|---|---|
committer | Andrew John Hughes <gnu_andrew@member.fsf.org> | 2005-06-11 14:27:18 +0000 |
commit | 95575d51bc09195aa99112bd434833571c9e011c (patch) | |
tree | 1b59fc447a63dcfd8ebbd45d9230f355f9ea6b96 | |
parent | 88913bd165b7043de1b4e2eec5e37465321b8bb5 (diff) | |
download | classpath-95575d51bc09195aa99112bd434833571c9e011c.tar.gz |
2005-06-11 Andrew John Hughes <gnu_andrew@member.fsf.org>
* doc/unicode/Blocks-4.0.0.txt:
New block listings for Unicode 4.0.0.
* java/lang/Character.java:
Added new fields for 1.5, both in Character and UnicodeBlock.
(UnicodeBlock.of(int)): Implemented.
(UnicodeBlock.forName(String)): Implemented.
* java/lang/annotation/ElementType.java:
Corrected serial UID.
* java/lang/annotation/RetentionPolicy.java:
Likewise.
* scripts/unicode-blocks.pl
Updated to handle Unicode 4.0.0.
-rw-r--r-- | ChangeLog | 15 | ||||
-rw-r--r-- | doc/unicode/Blocks-4.0.0.txt | 133 | ||||
-rw-r--r-- | java/lang/Character.java | 1254 | ||||
-rw-r--r-- | java/lang/annotation/ElementType.java | 2 | ||||
-rw-r--r-- | java/lang/annotation/RetentionPolicy.java | 2 | ||||
-rwxr-xr-x | scripts/unicode-blocks.pl | 230 |
6 files changed, 1328 insertions, 308 deletions
@@ -1,3 +1,18 @@ +2005-06-11 Andrew John Hughes <gnu_andrew@member.fsf.org> + + * doc/unicode/Blocks-4.0.0.txt: + New block listings for Unicode 4.0.0. + * java/lang/Character.java: + Added new fields for 1.5, both in Character and UnicodeBlock. + (UnicodeBlock.of(int)): Implemented. + (UnicodeBlock.forName(String)): Implemented. + * java/lang/annotation/ElementType.java: + Corrected serial UID. + * java/lang/annotation/RetentionPolicy.java: + Likewise. + * scripts/unicode-blocks.pl + Updated to handle Unicode 4.0.0. + 2005-06-09 Andrew John Hughes <gnu_andrew@member.fsf.org> * java/lang/Class.java: diff --git a/doc/unicode/Blocks-4.0.0.txt b/doc/unicode/Blocks-4.0.0.txt new file mode 100644 index 000000000..6dc2bd2fe --- /dev/null +++ b/doc/unicode/Blocks-4.0.0.txt @@ -0,0 +1,133 @@ +# Blocks-4.0.0.txt +# Correlated with Unicode 4.0 +# Note: The casing of block names is not normative. +# For example, "Basic Latin" and "BASIC LATIN" are equivalent. +# +# Code points not explicitly listed in this file are given the value No_Block. +# +# Start Code..End Code; Block Name +0000..007F; Basic Latin +0080..00FF; Latin-1 Supplement +0100..017F; Latin Extended-A +0180..024F; Latin Extended-B +0250..02AF; IPA Extensions +02B0..02FF; Spacing Modifier Letters +0300..036F; Combining Diacritical Marks +0370..03FF; Greek and Coptic +0400..04FF; Cyrillic +0500..052F; Cyrillic Supplementary +0530..058F; Armenian +0590..05FF; Hebrew +0600..06FF; Arabic +0700..074F; Syriac +0780..07BF; Thaana +0900..097F; Devanagari +0980..09FF; Bengali +0A00..0A7F; Gurmukhi +0A80..0AFF; Gujarati +0B00..0B7F; Oriya +0B80..0BFF; Tamil +0C00..0C7F; Telugu +0C80..0CFF; Kannada +0D00..0D7F; Malayalam +0D80..0DFF; Sinhala +0E00..0E7F; Thai +0E80..0EFF; Lao +0F00..0FFF; Tibetan +1000..109F; Myanmar +10A0..10FF; Georgian +1100..11FF; Hangul Jamo +1200..137F; Ethiopic +13A0..13FF; Cherokee +1400..167F; Unified Canadian Aboriginal Syllabics +1680..169F; Ogham +16A0..16FF; Runic +1700..171F; Tagalog +1720..173F; Hanunoo +1740..175F; Buhid +1760..177F; Tagbanwa +1780..17FF; Khmer +1800..18AF; Mongolian +1900..194F; Limbu +1950..197F; Tai Le +19E0..19FF; Khmer Symbols +1D00..1D7F; Phonetic Extensions +1E00..1EFF; Latin Extended Additional +1F00..1FFF; Greek Extended +2000..206F; General Punctuation +2070..209F; Superscripts and Subscripts +20A0..20CF; Currency Symbols +20D0..20FF; Combining Diacritical Marks for Symbols +2100..214F; Letterlike Symbols +2150..218F; Number Forms +2190..21FF; Arrows +2200..22FF; Mathematical Operators +2300..23FF; Miscellaneous Technical +2400..243F; Control Pictures +2440..245F; Optical Character Recognition +2460..24FF; Enclosed Alphanumerics +2500..257F; Box Drawing +2580..259F; Block Elements +25A0..25FF; Geometric Shapes +2600..26FF; Miscellaneous Symbols +2700..27BF; Dingbats +27C0..27EF; Miscellaneous Mathematical Symbols-A +27F0..27FF; Supplemental Arrows-A +2800..28FF; Braille Patterns +2900..297F; Supplemental Arrows-B +2980..29FF; Miscellaneous Mathematical Symbols-B +2A00..2AFF; Supplemental Mathematical Operators +2B00..2BFF; Miscellaneous Symbols and Arrows +2E80..2EFF; CJK Radicals Supplement +2F00..2FDF; Kangxi Radicals +2FF0..2FFF; Ideographic Description Characters +3000..303F; CJK Symbols and Punctuation +3040..309F; Hiragana +30A0..30FF; Katakana +3100..312F; Bopomofo +3130..318F; Hangul Compatibility Jamo +3190..319F; Kanbun +31A0..31BF; Bopomofo Extended +31F0..31FF; Katakana Phonetic Extensions +3200..32FF; Enclosed CJK Letters and Months +3300..33FF; CJK Compatibility +3400..4DBF; CJK Unified Ideographs Extension A +4DC0..4DFF; Yijing Hexagram Symbols +4E00..9FFF; CJK Unified Ideographs +A000..A48F; Yi Syllables +A490..A4CF; Yi Radicals +AC00..D7AF; Hangul Syllables +D800..DB7F; High Surrogates +DB80..DBFF; High Private Use Surrogates +DC00..DFFF; Low Surrogates +E000..F8FF; Private Use Area +F900..FAFF; CJK Compatibility Ideographs +FB00..FB4F; Alphabetic Presentation Forms +FB50..FDFF; Arabic Presentation Forms-A +FE00..FE0F; Variation Selectors +FE20..FE2F; Combining Half Marks +FE30..FE4F; CJK Compatibility Forms +FE50..FE6F; Small Form Variants +FE70..FEFF; Arabic Presentation Forms-B +FF00..FFEF; Halfwidth and Fullwidth Forms +FFF0..FFFF; Specials +10000..1007F; Linear B Syllabary +10080..100FF; Linear B Ideograms +10100..1013F; Aegean Numbers +10300..1032F; Old Italic +10330..1034F; Gothic +10380..1039F; Ugaritic +10400..1044F; Deseret +10450..1047F; Shavian +10480..104AF; Osmanya +10800..1083F; Cypriot Syllabary +1D000..1D0FF; Byzantine Musical Symbols +1D100..1D1FF; Musical Symbols +1D300..1D35F; Tai Xuan Jing Symbols +1D400..1D7FF; Mathematical Alphanumeric Symbols +20000..2A6DF; CJK Unified Ideographs Extension B +2F800..2FA1F; CJK Compatibility Ideographs Supplement +E0000..E007F; Tags +E0100..E01EF; Variation Selectors Supplement +F0000..FFFFF; Supplementary Private Use Area-A +100000..10FFFF; Supplementary Private Use Area-B diff --git a/java/lang/Character.java b/java/lang/Character.java index 089590645..d8aef71c2 100644 --- a/java/lang/Character.java +++ b/java/lang/Character.java @@ -41,6 +41,8 @@ package java.lang; import gnu.java.lang.CharData; import java.io.Serializable; +import java.text.Collator; +import java.util.Locale; /** * Wrapper class for the primitive char data type. In addition, this class @@ -134,10 +136,10 @@ public final class Character implements Serializable, Comparable<Character> * is in at most one of these blocks. * * This inner class was generated automatically from - * <code>doc/unicode/Block-3.txt</code>, by some perl scripts. + * <code>doc/unicode/Blocks-4.0.0.txt</code>, by some perl scripts. * This Unicode definition file can be found on the * <a href="http://www.unicode.org">http://www.unicode.org</a> website. - * JDK 1.4 uses Unicode version 3.0.0. + * JDK 1.5 uses Unicode version 4.0.0. * * @author scripts/unicode-blocks.pl (written by Eric Blake) * @since 1.2 @@ -145,10 +147,16 @@ public final class Character implements Serializable, Comparable<Character> public static final class UnicodeBlock extends Subset { /** The start of the subset. */ - private final char start; + private final int start; /** The end of the subset. */ - private final char end; + private final int end; + + /** The canonical name of the block according to the Unicode standard. */ + private final String canonicalName; + + /** Enumeration for the <code>forName()</code> method */ + private enum NameType { CANONICAL, NO_SPACES, CONSTANT; }; /** * Constructor for strictly defined blocks. @@ -156,25 +164,46 @@ public final class Character implements Serializable, Comparable<Character> * @param start the start character of the range * @param end the end character of the range * @param name the block name + * @param canonicalName the name of the block as defined in the Unicode + * standard. */ - private UnicodeBlock(char start, char end, String name) + private UnicodeBlock(int start, int end, String name, + String canonicalName) { super(name); this.start = start; this.end = end; + this.canonicalName = canonicalName; } /** * Returns the Unicode character block which a character belongs to. + * <strong>Note</strong>: This method does not support the use of + * supplementary characters. For such support, <code>of(int)</code> + * should be used instead. * * @param ch the character to look up * @return the set it belongs to, or null if it is not in one */ public static UnicodeBlock of(char ch) { - // Special case, since SPECIALS contains two ranges. - if (ch == '\uFEFF') - return SPECIALS; + return of((int) ch); + } + + /** + * Returns the Unicode character block which a code point belongs to. + * + * @param codePoint the character to look up + * @return the set it belongs to, or null if it is not in one. + * @throws IllegalArgumentException if the specified code point is + * invalid. + * @since 1.5 + */ + public static UnicodeBlock of(int codePoint) + { + if (codePoint > MAX_CODE_POINT) + throw new IllegalArgumentException("The supplied integer value is " + + "too large to be a codepoint."); // Simple binary search for the correct block. int low = 0; int hi = sets.length - 1; @@ -182,9 +211,9 @@ public final class Character implements Serializable, Comparable<Character> { int mid = (low + hi) >> 1; UnicodeBlock b = sets[mid]; - if (ch < b.start) + if (codePoint < b.start) hi = mid - 1; - else if (ch > b.end) + else if (codePoint > b.end) low = mid + 1; else return b; @@ -193,703 +222,1293 @@ public final class Character implements Serializable, Comparable<Character> } /** + * <p> + * Returns the <code>UnicodeBlock</code> with the given name, as defined + * by the Unicode standard. The version of Unicode in use is defined by + * the <code>Character</code> class, and the names are given in the + * <code>Blocks-<version>.txt</code> file corresponding to that version. + * The name may be specified in one of three ways: + * </p> + * <ol> + * <li>The canonical, human-readable name used by the Unicode standard. + * This is the name with all spaces and hyphens retained. For example, + * `Basic Latin' retrieves the block, UnicodeBlock.BASIC_LATIN.</li> + * <li>The canonical name with all spaces removed e.g. `BasicLatin'.</li> + * <li>The name used for the constants specified by this class, which + * is the canonical name with all spaces and hyphens replaced with + * underscores e.g. `BASIC_LATIN'</li> + * </ol> + * <p> + * The names are compared case-insensitively using the case comparison + * associated with the U.S. English locale. The method recognises the + * previous names used for blocks as well as the current ones. At + * present, this simply means that the deprecated `SURROGATES_AREA' + * will be recognised by this method (the <code>of()</code> methods + * only return one of the three new surrogate blocks). + * </p> + * + * @param blockName the name of the block to look up. + * @return the specified block. + * @throws NullPointerException if the <code>blockName</code> is + * <code>null</code>. + * @throws IllegalArgumentException if the name does not match any Unicode + * block. + * @since 1.5 + */ + public static final UnicodeBlock forName(String blockName) + { + NameType type; + if (blockName.indexOf(' ') != -1) + type = NameType.CANONICAL; + else if (blockName.indexOf('_') != -1) + type = NameType.CONSTANT; + else + type = NameType.NO_SPACES; + Collator usCollator = Collator.getInstance(Locale.US); + usCollator.setStrength(Collator.PRIMARY); + /* Special case for deprecated blocks not in sets */ + switch (type) + { + case CANONICAL: + if (usCollator.compare(blockName, "Surrogates Area") == 0) + return SURROGATES_AREA; + break; + case NO_SPACES: + if (usCollator.compare(blockName, "SurrogatesArea") == 0) + return SURROGATES_AREA; + break; + case CONSTANT: + if (usCollator.compare(blockName, "SURROGATES_AREA") == 0) + return SURROGATES_AREA; + break; + } + /* Other cases */ + switch (type) + { + case CANONICAL: + for (UnicodeBlock block : sets) + if (usCollator.compare(blockName, block.canonicalName) == 0) + return block; + break; + case NO_SPACES: + for (UnicodeBlock block : sets) + { + String nsName = block.canonicalName.replaceAll(" ",""); + if (usCollator.compare(blockName, nsName) == 0) + return block; + } + break; + case CONSTANT: + for (UnicodeBlock block : sets) + if (usCollator.compare(blockName, block.toString()) == 0) + return block; + break; + } + throw new IllegalArgumentException("No Unicode block found for " + + blockName + "."); + } + + /** * Basic Latin. - * '\u0000' - '\u007F'. + * 0x0000 - 0x007F. */ public static final UnicodeBlock BASIC_LATIN - = new UnicodeBlock('\u0000', '\u007F', - "BASIC_LATIN"); + = new UnicodeBlock(0x0000, 0x007F, + "BASIC_LATIN", + "Basic Latin"); /** * Latin-1 Supplement. - * '\u0080' - '\u00FF'. + * 0x0080 - 0x00FF. */ public static final UnicodeBlock LATIN_1_SUPPLEMENT - = new UnicodeBlock('\u0080', '\u00FF', - "LATIN_1_SUPPLEMENT"); + = new UnicodeBlock(0x0080, 0x00FF, + "LATIN_1_SUPPLEMENT", + "Latin-1 Supplement"); /** * Latin Extended-A. - * '\u0100' - '\u017F'. + * 0x0100 - 0x017F. */ public static final UnicodeBlock LATIN_EXTENDED_A - = new UnicodeBlock('\u0100', '\u017F', - "LATIN_EXTENDED_A"); + = new UnicodeBlock(0x0100, 0x017F, + "LATIN_EXTENDED_A", + "Latin Extended-A"); /** * Latin Extended-B. - * '\u0180' - '\u024F'. + * 0x0180 - 0x024F. */ public static final UnicodeBlock LATIN_EXTENDED_B - = new UnicodeBlock('\u0180', '\u024F', - "LATIN_EXTENDED_B"); + = new UnicodeBlock(0x0180, 0x024F, + "LATIN_EXTENDED_B", + "Latin Extended-B"); /** * IPA Extensions. - * '\u0250' - '\u02AF'. + * 0x0250 - 0x02AF. */ public static final UnicodeBlock IPA_EXTENSIONS - = new UnicodeBlock('\u0250', '\u02AF', - "IPA_EXTENSIONS"); + = new UnicodeBlock(0x0250, 0x02AF, + "IPA_EXTENSIONS", + "IPA Extensions"); /** * Spacing Modifier Letters. - * '\u02B0' - '\u02FF'. + * 0x02B0 - 0x02FF. */ public static final UnicodeBlock SPACING_MODIFIER_LETTERS - = new UnicodeBlock('\u02B0', '\u02FF', - "SPACING_MODIFIER_LETTERS"); + = new UnicodeBlock(0x02B0, 0x02FF, + "SPACING_MODIFIER_LETTERS", + "Spacing Modifier Letters"); /** * Combining Diacritical Marks. - * '\u0300' - '\u036F'. + * 0x0300 - 0x036F. */ public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS - = new UnicodeBlock('\u0300', '\u036F', - "COMBINING_DIACRITICAL_MARKS"); + = new UnicodeBlock(0x0300, 0x036F, + "COMBINING_DIACRITICAL_MARKS", + "Combining Diacritical Marks"); /** * Greek. - * '\u0370' - '\u03FF'. + * 0x0370 - 0x03FF. */ public static final UnicodeBlock GREEK - = new UnicodeBlock('\u0370', '\u03FF', - "GREEK"); + = new UnicodeBlock(0x0370, 0x03FF, + "GREEK", + "Greek"); /** * Cyrillic. - * '\u0400' - '\u04FF'. + * 0x0400 - 0x04FF. */ public static final UnicodeBlock CYRILLIC - = new UnicodeBlock('\u0400', '\u04FF', - "CYRILLIC"); + = new UnicodeBlock(0x0400, 0x04FF, + "CYRILLIC", + "Cyrillic"); + + /** + * Cyrillic Supplementary. + * 0x0500 - 0x052F. + * @since 1.5 + */ + public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY + = new UnicodeBlock(0x0500, 0x052F, + "CYRILLIC_SUPPLEMENTARY", + "Cyrillic Supplementary"); /** * Armenian. - * '\u0530' - '\u058F'. + * 0x0530 - 0x058F. */ public static final UnicodeBlock ARMENIAN - = new UnicodeBlock('\u0530', '\u058F', - "ARMENIAN"); + = new UnicodeBlock(0x0530, 0x058F, + "ARMENIAN", + "Armenian"); /** * Hebrew. - * '\u0590' - '\u05FF'. + * 0x0590 - 0x05FF. */ public static final UnicodeBlock HEBREW - = new UnicodeBlock('\u0590', '\u05FF', - "HEBREW"); + = new UnicodeBlock(0x0590, 0x05FF, + "HEBREW", + "Hebrew"); /** * Arabic. - * '\u0600' - '\u06FF'. + * 0x0600 - 0x06FF. */ public static final UnicodeBlock ARABIC - = new UnicodeBlock('\u0600', '\u06FF', - "ARABIC"); + = new UnicodeBlock(0x0600, 0x06FF, + "ARABIC", + "Arabic"); /** * Syriac. - * '\u0700' - '\u074F'. + * 0x0700 - 0x074F. * @since 1.4 */ public static final UnicodeBlock SYRIAC - = new UnicodeBlock('\u0700', '\u074F', - "SYRIAC"); + = new UnicodeBlock(0x0700, 0x074F, + "SYRIAC", + "Syriac"); /** * Thaana. - * '\u0780' - '\u07BF'. + * 0x0780 - 0x07BF. * @since 1.4 */ public static final UnicodeBlock THAANA - = new UnicodeBlock('\u0780', '\u07BF', - "THAANA"); + = new UnicodeBlock(0x0780, 0x07BF, + "THAANA", + "Thaana"); /** * Devanagari. - * '\u0900' - '\u097F'. + * 0x0900 - 0x097F. */ public static final UnicodeBlock DEVANAGARI - = new UnicodeBlock('\u0900', '\u097F', - "DEVANAGARI"); + = new UnicodeBlock(0x0900, 0x097F, + "DEVANAGARI", + "Devanagari"); /** * Bengali. - * '\u0980' - '\u09FF'. + * 0x0980 - 0x09FF. */ public static final UnicodeBlock BENGALI - = new UnicodeBlock('\u0980', '\u09FF', - "BENGALI"); + = new UnicodeBlock(0x0980, 0x09FF, + "BENGALI", + "Bengali"); /** * Gurmukhi. - * '\u0A00' - '\u0A7F'. + * 0x0A00 - 0x0A7F. */ public static final UnicodeBlock GURMUKHI - = new UnicodeBlock('\u0A00', '\u0A7F', - "GURMUKHI"); + = new UnicodeBlock(0x0A00, 0x0A7F, + "GURMUKHI", + "Gurmukhi"); /** * Gujarati. - * '\u0A80' - '\u0AFF'. + * 0x0A80 - 0x0AFF. */ public static final UnicodeBlock GUJARATI - = new UnicodeBlock('\u0A80', '\u0AFF', - "GUJARATI"); + = new UnicodeBlock(0x0A80, 0x0AFF, + "GUJARATI", + "Gujarati"); /** * Oriya. - * '\u0B00' - '\u0B7F'. + * 0x0B00 - 0x0B7F. */ public static final UnicodeBlock ORIYA - = new UnicodeBlock('\u0B00', '\u0B7F', - "ORIYA"); + = new UnicodeBlock(0x0B00, 0x0B7F, + "ORIYA", + "Oriya"); /** * Tamil. - * '\u0B80' - '\u0BFF'. + * 0x0B80 - 0x0BFF. */ public static final UnicodeBlock TAMIL - = new UnicodeBlock('\u0B80', '\u0BFF', - "TAMIL"); + = new UnicodeBlock(0x0B80, 0x0BFF, + "TAMIL", + "Tamil"); /** * Telugu. - * '\u0C00' - '\u0C7F'. + * 0x0C00 - 0x0C7F. */ public static final UnicodeBlock TELUGU - = new UnicodeBlock('\u0C00', '\u0C7F', - "TELUGU"); + = new UnicodeBlock(0x0C00, 0x0C7F, + "TELUGU", + "Telugu"); /** * Kannada. - * '\u0C80' - '\u0CFF'. + * 0x0C80 - 0x0CFF. */ public static final UnicodeBlock KANNADA - = new UnicodeBlock('\u0C80', '\u0CFF', - "KANNADA"); + = new UnicodeBlock(0x0C80, 0x0CFF, + "KANNADA", + "Kannada"); /** * Malayalam. - * '\u0D00' - '\u0D7F'. + * 0x0D00 - 0x0D7F. */ public static final UnicodeBlock MALAYALAM - = new UnicodeBlock('\u0D00', '\u0D7F', - "MALAYALAM"); + = new UnicodeBlock(0x0D00, 0x0D7F, + "MALAYALAM", + "Malayalam"); /** * Sinhala. - * '\u0D80' - '\u0DFF'. + * 0x0D80 - 0x0DFF. * @since 1.4 */ public static final UnicodeBlock SINHALA - = new UnicodeBlock('\u0D80', '\u0DFF', - "SINHALA"); + = new UnicodeBlock(0x0D80, 0x0DFF, + "SINHALA", + "Sinhala"); /** * Thai. - * '\u0E00' - '\u0E7F'. + * 0x0E00 - 0x0E7F. */ public static final UnicodeBlock THAI - = new UnicodeBlock('\u0E00', '\u0E7F', - "THAI"); + = new UnicodeBlock(0x0E00, 0x0E7F, + "THAI", + "Thai"); /** * Lao. - * '\u0E80' - '\u0EFF'. + * 0x0E80 - 0x0EFF. */ public static final UnicodeBlock LAO - = new UnicodeBlock('\u0E80', '\u0EFF', - "LAO"); + = new UnicodeBlock(0x0E80, 0x0EFF, + "LAO", + "Lao"); /** * Tibetan. - * '\u0F00' - '\u0FFF'. + * 0x0F00 - 0x0FFF. */ public static final UnicodeBlock TIBETAN - = new UnicodeBlock('\u0F00', '\u0FFF', - "TIBETAN"); + = new UnicodeBlock(0x0F00, 0x0FFF, + "TIBETAN", + "Tibetan"); /** * Myanmar. - * '\u1000' - '\u109F'. + * 0x1000 - 0x109F. * @since 1.4 */ public static final UnicodeBlock MYANMAR - = new UnicodeBlock('\u1000', '\u109F', - "MYANMAR"); + = new UnicodeBlock(0x1000, 0x109F, + "MYANMAR", + "Myanmar"); /** * Georgian. - * '\u10A0' - '\u10FF'. + * 0x10A0 - 0x10FF. */ public static final UnicodeBlock GEORGIAN - = new UnicodeBlock('\u10A0', '\u10FF', - "GEORGIAN"); + = new UnicodeBlock(0x10A0, 0x10FF, + "GEORGIAN", + "Georgian"); /** * Hangul Jamo. - * '\u1100' - '\u11FF'. + * 0x1100 - 0x11FF. */ public static final UnicodeBlock HANGUL_JAMO - = new UnicodeBlock('\u1100', '\u11FF', - "HANGUL_JAMO"); + = new UnicodeBlock(0x1100, 0x11FF, + "HANGUL_JAMO", + "Hangul Jamo"); /** * Ethiopic. - * '\u1200' - '\u137F'. + * 0x1200 - 0x137F. * @since 1.4 */ public static final UnicodeBlock ETHIOPIC - = new UnicodeBlock('\u1200', '\u137F', - "ETHIOPIC"); + = new UnicodeBlock(0x1200, 0x137F, + "ETHIOPIC", + "Ethiopic"); /** * Cherokee. - * '\u13A0' - '\u13FF'. + * 0x13A0 - 0x13FF. * @since 1.4 */ public static final UnicodeBlock CHEROKEE - = new UnicodeBlock('\u13A0', '\u13FF', - "CHEROKEE"); + = new UnicodeBlock(0x13A0, 0x13FF, + "CHEROKEE", + "Cherokee"); /** * Unified Canadian Aboriginal Syllabics. - * '\u1400' - '\u167F'. + * 0x1400 - 0x167F. * @since 1.4 */ public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS - = new UnicodeBlock('\u1400', '\u167F', - "UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS"); + = new UnicodeBlock(0x1400, 0x167F, + "UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", + "Unified Canadian Aboriginal Syllabics"); /** * Ogham. - * '\u1680' - '\u169F'. + * 0x1680 - 0x169F. * @since 1.4 */ public static final UnicodeBlock OGHAM - = new UnicodeBlock('\u1680', '\u169F', - "OGHAM"); + = new UnicodeBlock(0x1680, 0x169F, + "OGHAM", + "Ogham"); /** * Runic. - * '\u16A0' - '\u16FF'. + * 0x16A0 - 0x16FF. * @since 1.4 */ public static final UnicodeBlock RUNIC - = new UnicodeBlock('\u16A0', '\u16FF', - "RUNIC"); + = new UnicodeBlock(0x16A0, 0x16FF, + "RUNIC", + "Runic"); + + /** + * Tagalog. + * 0x1700 - 0x171F. + * @since 1.5 + */ + public static final UnicodeBlock TAGALOG + = new UnicodeBlock(0x1700, 0x171F, + "TAGALOG", + "Tagalog"); + + /** + * Hanunoo. + * 0x1720 - 0x173F. + * @since 1.5 + */ + public static final UnicodeBlock HANUNOO + = new UnicodeBlock(0x1720, 0x173F, + "HANUNOO", + "Hanunoo"); + + /** + * Buhid. + * 0x1740 - 0x175F. + * @since 1.5 + */ + public static final UnicodeBlock BUHID + = new UnicodeBlock(0x1740, 0x175F, + "BUHID", + "Buhid"); + + /** + * Tagbanwa. + * 0x1760 - 0x177F. + * @since 1.5 + */ + public static final UnicodeBlock TAGBANWA + = new UnicodeBlock(0x1760, 0x177F, + "TAGBANWA", + "Tagbanwa"); /** * Khmer. - * '\u1780' - '\u17FF'. + * 0x1780 - 0x17FF. * @since 1.4 */ public static final UnicodeBlock KHMER - = new UnicodeBlock('\u1780', '\u17FF', - "KHMER"); + = new UnicodeBlock(0x1780, 0x17FF, + "KHMER", + "Khmer"); /** * Mongolian. - * '\u1800' - '\u18AF'. + * 0x1800 - 0x18AF. * @since 1.4 */ public static final UnicodeBlock MONGOLIAN - = new UnicodeBlock('\u1800', '\u18AF', - "MONGOLIAN"); + = new UnicodeBlock(0x1800, 0x18AF, + "MONGOLIAN", + "Mongolian"); + + /** + * Limbu. + * 0x1900 - 0x194F. + * @since 1.5 + */ + public static final UnicodeBlock LIMBU + = new UnicodeBlock(0x1900, 0x194F, + "LIMBU", + "Limbu"); + + /** + * Tai Le. + * 0x1950 - 0x197F. + * @since 1.5 + */ + public static final UnicodeBlock TAI_LE + = new UnicodeBlock(0x1950, 0x197F, + "TAI_LE", + "Tai Le"); + + /** + * Khmer Symbols. + * 0x19E0 - 0x19FF. + * @since 1.5 + */ + public static final UnicodeBlock KHMER_SYMBOLS + = new UnicodeBlock(0x19E0, 0x19FF, + "KHMER_SYMBOLS", + "Khmer Symbols"); + + /** + * Phonetic Extensions. + * 0x1D00 - 0x1D7F. + * @since 1.5 + */ + public static final UnicodeBlock PHONETIC_EXTENSIONS + = new UnicodeBlock(0x1D00, 0x1D7F, + "PHONETIC_EXTENSIONS", + "Phonetic Extensions"); /** * Latin Extended Additional. - * '\u1E00' - '\u1EFF'. + * 0x1E00 - 0x1EFF. */ public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL - = new UnicodeBlock('\u1E00', '\u1EFF', - "LATIN_EXTENDED_ADDITIONAL"); + = new UnicodeBlock(0x1E00, 0x1EFF, + "LATIN_EXTENDED_ADDITIONAL", + "Latin Extended Additional"); /** * Greek Extended. - * '\u1F00' - '\u1FFF'. + * 0x1F00 - 0x1FFF. */ public static final UnicodeBlock GREEK_EXTENDED - = new UnicodeBlock('\u1F00', '\u1FFF', - "GREEK_EXTENDED"); + = new UnicodeBlock(0x1F00, 0x1FFF, + "GREEK_EXTENDED", + "Greek Extended"); /** * General Punctuation. - * '\u2000' - '\u206F'. + * 0x2000 - 0x206F. */ public static final UnicodeBlock GENERAL_PUNCTUATION - = new UnicodeBlock('\u2000', '\u206F', - "GENERAL_PUNCTUATION"); + = new UnicodeBlock(0x2000, 0x206F, + "GENERAL_PUNCTUATION", + "General Punctuation"); /** * Superscripts and Subscripts. - * '\u2070' - '\u209F'. + * 0x2070 - 0x209F. */ public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS - = new UnicodeBlock('\u2070', '\u209F', - "SUPERSCRIPTS_AND_SUBSCRIPTS"); + = new UnicodeBlock(0x2070, 0x209F, + "SUPERSCRIPTS_AND_SUBSCRIPTS", + "Superscripts and Subscripts"); /** * Currency Symbols. - * '\u20A0' - '\u20CF'. + * 0x20A0 - 0x20CF. */ public static final UnicodeBlock CURRENCY_SYMBOLS - = new UnicodeBlock('\u20A0', '\u20CF', - "CURRENCY_SYMBOLS"); + = new UnicodeBlock(0x20A0, 0x20CF, + "CURRENCY_SYMBOLS", + "Currency Symbols"); /** * Combining Marks for Symbols. - * '\u20D0' - '\u20FF'. + * 0x20D0 - 0x20FF. */ public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS - = new UnicodeBlock('\u20D0', '\u20FF', - "COMBINING_MARKS_FOR_SYMBOLS"); + = new UnicodeBlock(0x20D0, 0x20FF, + "COMBINING_MARKS_FOR_SYMBOLS", + "Combining Marks for Symbols"); /** * Letterlike Symbols. - * '\u2100' - '\u214F'. + * 0x2100 - 0x214F. */ public static final UnicodeBlock LETTERLIKE_SYMBOLS - = new UnicodeBlock('\u2100', '\u214F', - "LETTERLIKE_SYMBOLS"); + = new UnicodeBlock(0x2100, 0x214F, + "LETTERLIKE_SYMBOLS", + "Letterlike Symbols"); /** * Number Forms. - * '\u2150' - '\u218F'. + * 0x2150 - 0x218F. */ public static final UnicodeBlock NUMBER_FORMS - = new UnicodeBlock('\u2150', '\u218F', - "NUMBER_FORMS"); + = new UnicodeBlock(0x2150, 0x218F, + "NUMBER_FORMS", + "Number Forms"); /** * Arrows. - * '\u2190' - '\u21FF'. + * 0x2190 - 0x21FF. */ public static final UnicodeBlock ARROWS - = new UnicodeBlock('\u2190', '\u21FF', - "ARROWS"); + = new UnicodeBlock(0x2190, 0x21FF, + "ARROWS", + "Arrows"); /** * Mathematical Operators. - * '\u2200' - '\u22FF'. + * 0x2200 - 0x22FF. */ public static final UnicodeBlock MATHEMATICAL_OPERATORS - = new UnicodeBlock('\u2200', '\u22FF', - "MATHEMATICAL_OPERATORS"); + = new UnicodeBlock(0x2200, 0x22FF, + "MATHEMATICAL_OPERATORS", + "Mathematical Operators"); /** * Miscellaneous Technical. - * '\u2300' - '\u23FF'. + * 0x2300 - 0x23FF. */ public static final UnicodeBlock MISCELLANEOUS_TECHNICAL - = new UnicodeBlock('\u2300', '\u23FF', - "MISCELLANEOUS_TECHNICAL"); + = new UnicodeBlock(0x2300, 0x23FF, + "MISCELLANEOUS_TECHNICAL", + "Miscellaneous Technical"); /** * Control Pictures. - * '\u2400' - '\u243F'. + * 0x2400 - 0x243F. */ public static final UnicodeBlock CONTROL_PICTURES - = new UnicodeBlock('\u2400', '\u243F', - "CONTROL_PICTURES"); + = new UnicodeBlock(0x2400, 0x243F, + "CONTROL_PICTURES", + "Control Pictures"); /** * Optical Character Recognition. - * '\u2440' - '\u245F'. + * 0x2440 - 0x245F. */ public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION - = new UnicodeBlock('\u2440', '\u245F', - "OPTICAL_CHARACTER_RECOGNITION"); + = new UnicodeBlock(0x2440, 0x245F, + "OPTICAL_CHARACTER_RECOGNITION", + "Optical Character Recognition"); /** * Enclosed Alphanumerics. - * '\u2460' - '\u24FF'. + * 0x2460 - 0x24FF. */ public static final UnicodeBlock ENCLOSED_ALPHANUMERICS - = new UnicodeBlock('\u2460', '\u24FF', - "ENCLOSED_ALPHANUMERICS"); + = new UnicodeBlock(0x2460, 0x24FF, + "ENCLOSED_ALPHANUMERICS", + "Enclosed Alphanumerics"); /** * Box Drawing. - * '\u2500' - '\u257F'. + * 0x2500 - 0x257F. */ public static final UnicodeBlock BOX_DRAWING - = new UnicodeBlock('\u2500', '\u257F', - "BOX_DRAWING"); + = new UnicodeBlock(0x2500, 0x257F, + "BOX_DRAWING", + "Box Drawing"); /** * Block Elements. - * '\u2580' - '\u259F'. + * 0x2580 - 0x259F. */ public static final UnicodeBlock BLOCK_ELEMENTS - = new UnicodeBlock('\u2580', '\u259F', - "BLOCK_ELEMENTS"); + = new UnicodeBlock(0x2580, 0x259F, + "BLOCK_ELEMENTS", + "Block Elements"); /** * Geometric Shapes. - * '\u25A0' - '\u25FF'. + * 0x25A0 - 0x25FF. */ public static final UnicodeBlock GEOMETRIC_SHAPES - = new UnicodeBlock('\u25A0', '\u25FF', - "GEOMETRIC_SHAPES"); + = new UnicodeBlock(0x25A0, 0x25FF, + "GEOMETRIC_SHAPES", + "Geometric Shapes"); /** * Miscellaneous Symbols. - * '\u2600' - '\u26FF'. + * 0x2600 - 0x26FF. */ public static final UnicodeBlock MISCELLANEOUS_SYMBOLS - = new UnicodeBlock('\u2600', '\u26FF', - "MISCELLANEOUS_SYMBOLS"); + = new UnicodeBlock(0x2600, 0x26FF, + "MISCELLANEOUS_SYMBOLS", + "Miscellaneous Symbols"); /** * Dingbats. - * '\u2700' - '\u27BF'. + * 0x2700 - 0x27BF. */ public static final UnicodeBlock DINGBATS - = new UnicodeBlock('\u2700', '\u27BF', - "DINGBATS"); + = new UnicodeBlock(0x2700, 0x27BF, + "DINGBATS", + "Dingbats"); + + /** + * Miscellaneous Mathematical Symbols-A. + * 0x27C0 - 0x27EF. + * @since 1.5 + */ + public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A + = new UnicodeBlock(0x27C0, 0x27EF, + "MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", + "Miscellaneous Mathematical Symbols-A"); + + /** + * Supplemental Arrows-A. + * 0x27F0 - 0x27FF. + * @since 1.5 + */ + public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A + = new UnicodeBlock(0x27F0, 0x27FF, + "SUPPLEMENTAL_ARROWS_A", + "Supplemental Arrows-A"); /** * Braille Patterns. - * '\u2800' - '\u28FF'. + * 0x2800 - 0x28FF. * @since 1.4 */ public static final UnicodeBlock BRAILLE_PATTERNS - = new UnicodeBlock('\u2800', '\u28FF', - "BRAILLE_PATTERNS"); + = new UnicodeBlock(0x2800, 0x28FF, + "BRAILLE_PATTERNS", + "Braille Patterns"); + + /** + * Supplemental Arrows-B. + * 0x2900 - 0x297F. + * @since 1.5 + */ + public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B + = new UnicodeBlock(0x2900, 0x297F, + "SUPPLEMENTAL_ARROWS_B", + "Supplemental Arrows-B"); + + /** + * Miscellaneous Mathematical Symbols-B. + * 0x2980 - 0x29FF. + * @since 1.5 + */ + public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B + = new UnicodeBlock(0x2980, 0x29FF, + "MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", + "Miscellaneous Mathematical Symbols-B"); + + /** + * Supplemental Mathematical Operators. + * 0x2A00 - 0x2AFF. + * @since 1.5 + */ + public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS + = new UnicodeBlock(0x2A00, 0x2AFF, + "SUPPLEMENTAL_MATHEMATICAL_OPERATORS", + "Supplemental Mathematical Operators"); + + /** + * Miscellaneous Symbols and Arrows. + * 0x2B00 - 0x2BFF. + * @since 1.5 + */ + public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS + = new UnicodeBlock(0x2B00, 0x2BFF, + "MISCELLANEOUS_SYMBOLS_AND_ARROWS", + "Miscellaneous Symbols and Arrows"); /** * CJK Radicals Supplement. - * '\u2E80' - '\u2EFF'. + * 0x2E80 - 0x2EFF. * @since 1.4 */ public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT - = new UnicodeBlock('\u2E80', '\u2EFF', - "CJK_RADICALS_SUPPLEMENT"); + = new UnicodeBlock(0x2E80, 0x2EFF, + "CJK_RADICALS_SUPPLEMENT", + "CJK Radicals Supplement"); /** * Kangxi Radicals. - * '\u2F00' - '\u2FDF'. + * 0x2F00 - 0x2FDF. * @since 1.4 */ public static final UnicodeBlock KANGXI_RADICALS - = new UnicodeBlock('\u2F00', '\u2FDF', - "KANGXI_RADICALS"); + = new UnicodeBlock(0x2F00, 0x2FDF, + "KANGXI_RADICALS", + "Kangxi Radicals"); /** * Ideographic Description Characters. - * '\u2FF0' - '\u2FFF'. + * 0x2FF0 - 0x2FFF. * @since 1.4 */ public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS - = new UnicodeBlock('\u2FF0', '\u2FFF', - "IDEOGRAPHIC_DESCRIPTION_CHARACTERS"); + = new UnicodeBlock(0x2FF0, 0x2FFF, + "IDEOGRAPHIC_DESCRIPTION_CHARACTERS", + "Ideographic Description Characters"); /** * CJK Symbols and Punctuation. - * '\u3000' - '\u303F'. + * 0x3000 - 0x303F. */ public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION - = new UnicodeBlock('\u3000', '\u303F', - "CJK_SYMBOLS_AND_PUNCTUATION"); + = new UnicodeBlock(0x3000, 0x303F, + "CJK_SYMBOLS_AND_PUNCTUATION", + "CJK Symbols and Punctuation"); /** * Hiragana. - * '\u3040' - '\u309F'. + * 0x3040 - 0x309F. */ public static final UnicodeBlock HIRAGANA - = new UnicodeBlock('\u3040', '\u309F', - "HIRAGANA"); + = new UnicodeBlock(0x3040, 0x309F, + "HIRAGANA", + "Hiragana"); /** * Katakana. - * '\u30A0' - '\u30FF'. + * 0x30A0 - 0x30FF. */ public static final UnicodeBlock KATAKANA - = new UnicodeBlock('\u30A0', '\u30FF', - "KATAKANA"); + = new UnicodeBlock(0x30A0, 0x30FF, + "KATAKANA", + "Katakana"); /** * Bopomofo. - * '\u3100' - '\u312F'. + * 0x3100 - 0x312F. */ public static final UnicodeBlock BOPOMOFO - = new UnicodeBlock('\u3100', '\u312F', - "BOPOMOFO"); + = new UnicodeBlock(0x3100, 0x312F, + "BOPOMOFO", + "Bopomofo"); /** * Hangul Compatibility Jamo. - * '\u3130' - '\u318F'. + * 0x3130 - 0x318F. */ public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO - = new UnicodeBlock('\u3130', '\u318F', - "HANGUL_COMPATIBILITY_JAMO"); + = new UnicodeBlock(0x3130, 0x318F, + "HANGUL_COMPATIBILITY_JAMO", + "Hangul Compatibility Jamo"); /** * Kanbun. - * '\u3190' - '\u319F'. + * 0x3190 - 0x319F. */ public static final UnicodeBlock KANBUN - = new UnicodeBlock('\u3190', '\u319F', - "KANBUN"); + = new UnicodeBlock(0x3190, 0x319F, + "KANBUN", + "Kanbun"); /** * Bopomofo Extended. - * '\u31A0' - '\u31BF'. + * 0x31A0 - 0x31BF. * @since 1.4 */ public static final UnicodeBlock BOPOMOFO_EXTENDED - = new UnicodeBlock('\u31A0', '\u31BF', - "BOPOMOFO_EXTENDED"); + = new UnicodeBlock(0x31A0, 0x31BF, + "BOPOMOFO_EXTENDED", + "Bopomofo Extended"); + + /** + * Katakana Phonetic Extensions. + * 0x31F0 - 0x31FF. + * @since 1.5 + */ + public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS + = new UnicodeBlock(0x31F0, 0x31FF, + "KATAKANA_PHONETIC_EXTENSIONS", + "Katakana Phonetic Extensions"); /** * Enclosed CJK Letters and Months. - * '\u3200' - '\u32FF'. + * 0x3200 - 0x32FF. */ public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS - = new UnicodeBlock('\u3200', '\u32FF', - "ENCLOSED_CJK_LETTERS_AND_MONTHS"); + = new UnicodeBlock(0x3200, 0x32FF, + "ENCLOSED_CJK_LETTERS_AND_MONTHS", + "Enclosed CJK Letters and Months"); /** * CJK Compatibility. - * '\u3300' - '\u33FF'. + * 0x3300 - 0x33FF. */ public static final UnicodeBlock CJK_COMPATIBILITY - = new UnicodeBlock('\u3300', '\u33FF', - "CJK_COMPATIBILITY"); + = new UnicodeBlock(0x3300, 0x33FF, + "CJK_COMPATIBILITY", + "CJK Compatibility"); /** * CJK Unified Ideographs Extension A. - * '\u3400' - '\u4DB5'. + * 0x3400 - 0x4DBF. * @since 1.4 */ public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A - = new UnicodeBlock('\u3400', '\u4DB5', - "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A"); + = new UnicodeBlock(0x3400, 0x4DBF, + "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", + "CJK Unified Ideographs Extension A"); + + /** + * Yijing Hexagram Symbols. + * 0x4DC0 - 0x4DFF. + * @since 1.5 + */ + public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS + = new UnicodeBlock(0x4DC0, 0x4DFF, + "YIJING_HEXAGRAM_SYMBOLS", + "Yijing Hexagram Symbols"); /** * CJK Unified Ideographs. - * '\u4E00' - '\u9FFF'. + * 0x4E00 - 0x9FFF. */ public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS - = new UnicodeBlock('\u4E00', '\u9FFF', - "CJK_UNIFIED_IDEOGRAPHS"); + = new UnicodeBlock(0x4E00, 0x9FFF, + "CJK_UNIFIED_IDEOGRAPHS", + "CJK Unified Ideographs"); /** * Yi Syllables. - * '\uA000' - '\uA48F'. + * 0xA000 - 0xA48F. * @since 1.4 */ public static final UnicodeBlock YI_SYLLABLES - = new UnicodeBlock('\uA000', '\uA48F', - "YI_SYLLABLES"); + = new UnicodeBlock(0xA000, 0xA48F, + "YI_SYLLABLES", + "Yi Syllables"); /** * Yi Radicals. - * '\uA490' - '\uA4CF'. + * 0xA490 - 0xA4CF. * @since 1.4 */ public static final UnicodeBlock YI_RADICALS - = new UnicodeBlock('\uA490', '\uA4CF', - "YI_RADICALS"); + = new UnicodeBlock(0xA490, 0xA4CF, + "YI_RADICALS", + "Yi Radicals"); /** * Hangul Syllables. - * '\uAC00' - '\uD7A3'. + * 0xAC00 - 0xD7AF. */ public static final UnicodeBlock HANGUL_SYLLABLES - = new UnicodeBlock('\uAC00', '\uD7A3', - "HANGUL_SYLLABLES"); + = new UnicodeBlock(0xAC00, 0xD7AF, + "HANGUL_SYLLABLES", + "Hangul Syllables"); /** - * Surrogates Area. - * '\uD800' - '\uDFFF'. + * High Surrogates. + * 0xD800 - 0xDB7F. + * @since 1.5 */ - public static final UnicodeBlock SURROGATES_AREA - = new UnicodeBlock('\uD800', '\uDFFF', - "SURROGATES_AREA"); + public static final UnicodeBlock HIGH_SURROGATES + = new UnicodeBlock(0xD800, 0xDB7F, + "HIGH_SURROGATES", + "High Surrogates"); + + /** + * High Private Use Surrogates. + * 0xDB80 - 0xDBFF. + * @since 1.5 + */ + public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES + = new UnicodeBlock(0xDB80, 0xDBFF, + "HIGH_PRIVATE_USE_SURROGATES", + "High Private Use Surrogates"); + + /** + * Low Surrogates. + * 0xDC00 - 0xDFFF. + * @since 1.5 + */ + public static final UnicodeBlock LOW_SURROGATES + = new UnicodeBlock(0xDC00, 0xDFFF, + "LOW_SURROGATES", + "Low Surrogates"); /** * Private Use Area. - * '\uE000' - '\uF8FF'. + * 0xE000 - 0xF8FF. */ public static final UnicodeBlock PRIVATE_USE_AREA - = new UnicodeBlock('\uE000', '\uF8FF', - "PRIVATE_USE_AREA"); + = new UnicodeBlock(0xE000, 0xF8FF, + "PRIVATE_USE_AREA", + "Private Use Area"); /** * CJK Compatibility Ideographs. - * '\uF900' - '\uFAFF'. + * 0xF900 - 0xFAFF. */ public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS - = new UnicodeBlock('\uF900', '\uFAFF', - "CJK_COMPATIBILITY_IDEOGRAPHS"); + = new UnicodeBlock(0xF900, 0xFAFF, + "CJK_COMPATIBILITY_IDEOGRAPHS", + "CJK Compatibility Ideographs"); /** * Alphabetic Presentation Forms. - * '\uFB00' - '\uFB4F'. + * 0xFB00 - 0xFB4F. */ public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS - = new UnicodeBlock('\uFB00', '\uFB4F', - "ALPHABETIC_PRESENTATION_FORMS"); + = new UnicodeBlock(0xFB00, 0xFB4F, + "ALPHABETIC_PRESENTATION_FORMS", + "Alphabetic Presentation Forms"); /** * Arabic Presentation Forms-A. - * '\uFB50' - '\uFDFF'. + * 0xFB50 - 0xFDFF. */ public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A - = new UnicodeBlock('\uFB50', '\uFDFF', - "ARABIC_PRESENTATION_FORMS_A"); + = new UnicodeBlock(0xFB50, 0xFDFF, + "ARABIC_PRESENTATION_FORMS_A", + "Arabic Presentation Forms-A"); + + /** + * Variation Selectors. + * 0xFE00 - 0xFE0F. + * @since 1.5 + */ + public static final UnicodeBlock VARIATION_SELECTORS + = new UnicodeBlock(0xFE00, 0xFE0F, + "VARIATION_SELECTORS", + "Variation Selectors"); /** * Combining Half Marks. - * '\uFE20' - '\uFE2F'. + * 0xFE20 - 0xFE2F. */ public static final UnicodeBlock COMBINING_HALF_MARKS - = new UnicodeBlock('\uFE20', '\uFE2F', - "COMBINING_HALF_MARKS"); + = new UnicodeBlock(0xFE20, 0xFE2F, + "COMBINING_HALF_MARKS", + "Combining Half Marks"); /** * CJK Compatibility Forms. - * '\uFE30' - '\uFE4F'. + * 0xFE30 - 0xFE4F. */ public static final UnicodeBlock CJK_COMPATIBILITY_FORMS - = new UnicodeBlock('\uFE30', '\uFE4F', - "CJK_COMPATIBILITY_FORMS"); + = new UnicodeBlock(0xFE30, 0xFE4F, + "CJK_COMPATIBILITY_FORMS", + "CJK Compatibility Forms"); /** * Small Form Variants. - * '\uFE50' - '\uFE6F'. + * 0xFE50 - 0xFE6F. */ public static final UnicodeBlock SMALL_FORM_VARIANTS - = new UnicodeBlock('\uFE50', '\uFE6F', - "SMALL_FORM_VARIANTS"); + = new UnicodeBlock(0xFE50, 0xFE6F, + "SMALL_FORM_VARIANTS", + "Small Form Variants"); /** * Arabic Presentation Forms-B. - * '\uFE70' - '\uFEFE'. + * 0xFE70 - 0xFEFF. */ public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B - = new UnicodeBlock('\uFE70', '\uFEFE', - "ARABIC_PRESENTATION_FORMS_B"); + = new UnicodeBlock(0xFE70, 0xFEFF, + "ARABIC_PRESENTATION_FORMS_B", + "Arabic Presentation Forms-B"); /** * Halfwidth and Fullwidth Forms. - * '\uFF00' - '\uFFEF'. + * 0xFF00 - 0xFFEF. */ public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS - = new UnicodeBlock('\uFF00', '\uFFEF', - "HALFWIDTH_AND_FULLWIDTH_FORMS"); + = new UnicodeBlock(0xFF00, 0xFFEF, + "HALFWIDTH_AND_FULLWIDTH_FORMS", + "Halfwidth and Fullwidth Forms"); /** * Specials. - * '\uFEFF', '\uFFF0' - '\uFFFD'. + * 0xFFF0 - 0xFFFF. */ public static final UnicodeBlock SPECIALS - = new UnicodeBlock('\uFFF0', '\uFFFD', - "SPECIALS"); + = new UnicodeBlock(0xFFF0, 0xFFFF, + "SPECIALS", + "Specials"); + + /** + * Linear B Syllabary. + * 0x10000 - 0x1007F. + * @since 1.5 + */ + public static final UnicodeBlock LINEAR_B_SYLLABARY + = new UnicodeBlock(0x10000, 0x1007F, + "LINEAR_B_SYLLABARY", + "Linear B Syllabary"); + + /** + * Linear B Ideograms. + * 0x10080 - 0x100FF. + * @since 1.5 + */ + public static final UnicodeBlock LINEAR_B_IDEOGRAMS + = new UnicodeBlock(0x10080, 0x100FF, + "LINEAR_B_IDEOGRAMS", + "Linear B Ideograms"); + + /** + * Aegean Numbers. + * 0x10100 - 0x1013F. + * @since 1.5 + */ + public static final UnicodeBlock AEGEAN_NUMBERS + = new UnicodeBlock(0x10100, 0x1013F, + "AEGEAN_NUMBERS", + "Aegean Numbers"); + + /** + * Old Italic. + * 0x10300 - 0x1032F. + * @since 1.5 + */ + public static final UnicodeBlock OLD_ITALIC + = new UnicodeBlock(0x10300, 0x1032F, + "OLD_ITALIC", + "Old Italic"); + + /** + * Gothic. + * 0x10330 - 0x1034F. + * @since 1.5 + */ + public static final UnicodeBlock GOTHIC + = new UnicodeBlock(0x10330, 0x1034F, + "GOTHIC", + "Gothic"); + + /** + * Ugaritic. + * 0x10380 - 0x1039F. + * @since 1.5 + */ + public static final UnicodeBlock UGARITIC + = new UnicodeBlock(0x10380, 0x1039F, + "UGARITIC", + "Ugaritic"); + + /** + * Deseret. + * 0x10400 - 0x1044F. + * @since 1.5 + */ + public static final UnicodeBlock DESERET + = new UnicodeBlock(0x10400, 0x1044F, + "DESERET", + "Deseret"); + + /** + * Shavian. + * 0x10450 - 0x1047F. + * @since 1.5 + */ + public static final UnicodeBlock SHAVIAN + = new UnicodeBlock(0x10450, 0x1047F, + "SHAVIAN", + "Shavian"); + + /** + * Osmanya. + * 0x10480 - 0x104AF. + * @since 1.5 + */ + public static final UnicodeBlock OSMANYA + = new UnicodeBlock(0x10480, 0x104AF, + "OSMANYA", + "Osmanya"); + + /** + * Cypriot Syllabary. + * 0x10800 - 0x1083F. + * @since 1.5 + */ + public static final UnicodeBlock CYPRIOT_SYLLABARY + = new UnicodeBlock(0x10800, 0x1083F, + "CYPRIOT_SYLLABARY", + "Cypriot Syllabary"); + + /** + * Byzantine Musical Symbols. + * 0x1D000 - 0x1D0FF. + * @since 1.5 + */ + public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS + = new UnicodeBlock(0x1D000, 0x1D0FF, + "BYZANTINE_MUSICAL_SYMBOLS", + "Byzantine Musical Symbols"); + + /** + * Musical Symbols. + * 0x1D100 - 0x1D1FF. + * @since 1.5 + */ + public static final UnicodeBlock MUSICAL_SYMBOLS + = new UnicodeBlock(0x1D100, 0x1D1FF, + "MUSICAL_SYMBOLS", + "Musical Symbols"); + + /** + * Tai Xuan Jing Symbols. + * 0x1D300 - 0x1D35F. + * @since 1.5 + */ + public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS + = new UnicodeBlock(0x1D300, 0x1D35F, + "TAI_XUAN_JING_SYMBOLS", + "Tai Xuan Jing Symbols"); + + /** + * Mathematical Alphanumeric Symbols. + * 0x1D400 - 0x1D7FF. + * @since 1.5 + */ + public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS + = new UnicodeBlock(0x1D400, 0x1D7FF, + "MATHEMATICAL_ALPHANUMERIC_SYMBOLS", + "Mathematical Alphanumeric Symbols"); + + /** + * CJK Unified Ideographs Extension B. + * 0x20000 - 0x2A6DF. + * @since 1.5 + */ + public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B + = new UnicodeBlock(0x20000, 0x2A6DF, + "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", + "CJK Unified Ideographs Extension B"); + + /** + * CJK Compatibility Ideographs Supplement. + * 0x2F800 - 0x2FA1F. + * @since 1.5 + */ + public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT + = new UnicodeBlock(0x2F800, 0x2FA1F, + "CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", + "CJK Compatibility Ideographs Supplement"); + + /** + * Tags. + * 0xE0000 - 0xE007F. + * @since 1.5 + */ + public static final UnicodeBlock TAGS + = new UnicodeBlock(0xE0000, 0xE007F, + "TAGS", + "Tags"); + + /** + * Variation Selectors Supplement. + * 0xE0100 - 0xE01EF. + * @since 1.5 + */ + public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT + = new UnicodeBlock(0xE0100, 0xE01EF, + "VARIATION_SELECTORS_SUPPLEMENT", + "Variation Selectors Supplement"); + + /** + * Supplementary Private Use Area-A. + * 0xF0000 - 0xFFFFF. + * @since 1.5 + */ + public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A + = new UnicodeBlock(0xF0000, 0xFFFFF, + "SUPPLEMENTARY_PRIVATE_USE_AREA_A", + "Supplementary Private Use Area-A"); + + /** + * Supplementary Private Use Area-B. + * 0x100000 - 0x10FFFF. + * @since 1.5 + */ + public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B + = new UnicodeBlock(0x100000, 0x10FFFF, + "SUPPLEMENTARY_PRIVATE_USE_AREA_B", + "Supplementary Private Use Area-B"); + + /** + * Surrogates Area. + * 'D800' - 'DFFF'. + * @deprecated As of 1.5, the three areas, + * <a href="#HIGH_SURROGATES">HIGH_SURROGATES</a>, + * <a href="#HIGH_PRIVATE_USE_SURROGATES">HIGH_PRIVATE_USE_SURROGATES</a> + * and <a href="#LOW_SURROGATES">LOW_SURROGATES</a>, as defined + * by the Unicode standard, should be used in preference to + * this. These are also returned from calls to <code>of(int)</code> + * and <code>of(char)</code>. + */ + @Deprecated + public static final UnicodeBlock SURROGATES_AREA + = new UnicodeBlock(0xD800, 0xDFFF, + "SURROGATES_AREA", + "Surrogates Area"); /** * The defined subsets. @@ -904,6 +1523,7 @@ public final class Character implements Serializable, Comparable<Character> COMBINING_DIACRITICAL_MARKS, GREEK, CYRILLIC, + CYRILLIC_SUPPLEMENTARY, ARMENIAN, HEBREW, ARABIC, @@ -930,8 +1550,16 @@ public final class Character implements Serializable, Comparable<Character> UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, OGHAM, RUNIC, + TAGALOG, + HANUNOO, + BUHID, + TAGBANWA, KHMER, MONGOLIAN, + LIMBU, + TAI_LE, + KHMER_SYMBOLS, + PHONETIC_EXTENSIONS, LATIN_EXTENDED_ADDITIONAL, GREEK_EXTENDED, GENERAL_PUNCTUATION, @@ -951,7 +1579,13 @@ public final class Character implements Serializable, Comparable<Character> GEOMETRIC_SHAPES, MISCELLANEOUS_SYMBOLS, DINGBATS, + MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, + SUPPLEMENTAL_ARROWS_A, BRAILLE_PATTERNS, + SUPPLEMENTAL_ARROWS_B, + MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, + SUPPLEMENTAL_MATHEMATICAL_OPERATORS, + MISCELLANEOUS_SYMBOLS_AND_ARROWS, CJK_RADICALS_SUPPLEMENT, KANGXI_RADICALS, IDEOGRAPHIC_DESCRIPTION_CHARACTERS, @@ -962,24 +1596,49 @@ public final class Character implements Serializable, Comparable<Character> HANGUL_COMPATIBILITY_JAMO, KANBUN, BOPOMOFO_EXTENDED, + KATAKANA_PHONETIC_EXTENSIONS, ENCLOSED_CJK_LETTERS_AND_MONTHS, CJK_COMPATIBILITY, CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, + YIJING_HEXAGRAM_SYMBOLS, CJK_UNIFIED_IDEOGRAPHS, YI_SYLLABLES, YI_RADICALS, HANGUL_SYLLABLES, - SURROGATES_AREA, + HIGH_SURROGATES, + HIGH_PRIVATE_USE_SURROGATES, + LOW_SURROGATES, PRIVATE_USE_AREA, CJK_COMPATIBILITY_IDEOGRAPHS, ALPHABETIC_PRESENTATION_FORMS, ARABIC_PRESENTATION_FORMS_A, + VARIATION_SELECTORS, COMBINING_HALF_MARKS, CJK_COMPATIBILITY_FORMS, SMALL_FORM_VARIANTS, ARABIC_PRESENTATION_FORMS_B, HALFWIDTH_AND_FULLWIDTH_FORMS, SPECIALS, + LINEAR_B_SYLLABARY, + LINEAR_B_IDEOGRAMS, + AEGEAN_NUMBERS, + OLD_ITALIC, + GOTHIC, + UGARITIC, + DESERET, + SHAVIAN, + OSMANYA, + CYPRIOT_SYLLABARY, + BYZANTINE_MUSICAL_SYMBOLS, + MUSICAL_SYMBOLS, + TAI_XUAN_JING_SYMBOLS, + MATHEMATICAL_ALPHANUMERIC_SYMBOLS, + CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, + CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, + TAGS, + VARIATION_SELECTORS_SUPPLEMENT, + SUPPLEMENTARY_PRIVATE_USE_AREA_A, + SUPPLEMENTARY_PRIVATE_USE_AREA_B, }; } // class UnicodeBlock @@ -1028,6 +1687,65 @@ public final class Character implements Serializable, Comparable<Character> public static final char MAX_VALUE = '\uFFFF'; /** + * The minimum Unicode 4.0 code point. This value is <code>0</code>. + */ + public static final int MIN_CODE_POINT = 0; + + /** + * The maximum Unicode 4.0 code point, which is greater than the range + * of the char data type. + * This value is <code>0x10FFFF</code>. + */ + public static final int MAX_CODE_POINT = 0x10FFFF; + + /** + * The minimum Unicode high surrogate code unit, or + * <emph>leading-surrogate</emph>, in the UTF-16 character encoding. + * This value is <code>'\uD800'</code>. + */ + public static final char MIN_HIGH_SURROGATE = '\uD800'; + + /** + * The maximum Unicode high surrogate code unit, or + * <emph>leading-surrogate</emph>, in the UTF-16 character encoding. + * This value is <code>'\uDBFF'</code>. + */ + public static final char MAX_HIGH_SURROGATE = '\uDBFF'; + + /** + * The minimum Unicode low surrogate code unit, or + * <emph>trailing-surrogate</emph>, in the UTF-16 character encoding. + * This value is <code>'\uDC00'</code>. + */ + public static final char MIN_LOW_SURROGATE = '\uDC00'; + + /** + * The maximum Unicode low surrogate code unit, or + * <emph>trailing-surrogate</emph>, in the UTF-16 character encoding. + * This value is <code>'\uDFFF'</code>. + */ + public static final char MAX_LOW_SURROGATE = '\uDFFF'; + + /** + * The minimum Unicode surrogate code unit in the UTF-16 character encoding. + * This value is <code>'\uD800'</code>. + */ + public static final char MIN_SURROGATE = '\uD800'; + + /** + * The maximum Unicode surrogate code unit in the UTF-16 character encoding. + * This value is <code>'\uDFFF'</code>. + */ + public static final char MAX_SURROGATE = '\uDFFF'; + + /** + * The lowest possible supplementary Unicode code point (the first code + * point outside the basic multilingual plane (BMP)). + * This value is <code>0x10000</code>. + */ + public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000; + + /** * Class object representing the primitive char data type. * * @since 1.1 diff --git a/java/lang/annotation/ElementType.java b/java/lang/annotation/ElementType.java index 3af02237b..a7869ea50 100644 --- a/java/lang/annotation/ElementType.java +++ b/java/lang/annotation/ElementType.java @@ -54,6 +54,6 @@ public enum ElementType /** * For compatability with Sun's JDK */ - private static final long serialVersionUID = -7822481094880900790L; + private static final long serialVersionUID = 2798216111136361587L; } diff --git a/java/lang/annotation/RetentionPolicy.java b/java/lang/annotation/RetentionPolicy.java index 098385a41..b3e6391c2 100644 --- a/java/lang/annotation/RetentionPolicy.java +++ b/java/lang/annotation/RetentionPolicy.java @@ -61,6 +61,6 @@ public enum RetentionPolicy /** * For compatability with Sun's JDK */ - private static final long serialVersionUID = 2098916047332259179L; + private static final long serialVersionUID = -1700821648800605045L; } diff --git a/scripts/unicode-blocks.pl b/scripts/unicode-blocks.pl index 2cea35fb2..d9d0b70a0 100755 --- a/scripts/unicode-blocks.pl +++ b/scripts/unicode-blocks.pl @@ -76,6 +76,47 @@ my %additions = ("SYRIAC" => "1.4", "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A" => "1.4", "YI_SYLLABLES" => "1.4", "YI_RADICALS" => "1.4", + "CYRILLIC_SUPPLEMENTARY" => "1.5", + "TAGALOG" => "1.5", + "HANUNOO" => "1.5", + "BUHID" => "1.5", + "TAGBANWA" => "1.5", + "LIMBU" => "1.5", + "TAI_LE" => "1.5", + "KHMER_SYMBOLS" => "1.5", + "PHONETIC_EXTENSIONS" => "1.5", + "MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A" => "1.5", + "SUPPLEMENTAL_ARROWS_A" => "1.5", + "SUPPLEMENTAL_ARROWS_B" => "1.5", + "MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B" => "1.5", + "SUPPLEMENTAL_MATHEMATICAL_OPERATORS" => "1.5", + "MISCELLANEOUS_SYMBOLS_AND_ARROWS" => "1.5", + "KATAKANA_PHONETIC_EXTENSIONS" => "1.5", + "YIJING_HEXAGRAM_SYMBOLS" => "1.5", + "VARIATION_SELECTORS" => "1.5", + "LINEAR_B_SYLLABARY" => "1.5", + "LINEAR_B_IDEOGRAMS" => "1.5", + "AEGEAN_NUMBERS" => "1.5", + "OLD_ITALIC" => "1.5", + "GOTHIC" => "1.5", + "UGARITIC" => "1.5", + "DESERET" => "1.5", + "SHAVIAN" => "1.5", + "OSMANYA" => "1.5", + "CYPRIOT_SYLLABARY" => "1.5", + "BYZANTINE_MUSICAL_SYMBOLS" => "1.5", + "MUSICAL_SYMBOLS" => "1.5", + "TAI_XUAN_JING_SYMBOLS" => "1.5", + "MATHEMATICAL_ALPHANUMERIC_SYMBOLS" => "1.5", + "CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B" => "1.5", + "CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT" => "1.5", + "TAGS" => "1.5", + "VARIATION_SELECTORS_SUPPLEMENT" => "1.5", + "SUPPLEMENTARY_PRIVATE_USE_AREA_A" => "1.5", + "SUPPLEMENTARY_PRIVATE_USE_AREA_B" => "1.5", + "HIGH_SURROGATES" => "1.5", + "HIGH_PRIVATE_USE_SURROGATES" => "1.5", + "LOW_SURROGATES" => "1.5" ); print <<'EOF'; @@ -87,7 +128,7 @@ print <<'EOF'; * <code>$ARGV[0]</code>, by some perl scripts. * This Unicode definition file can be found on the * <a href="http://www.unicode.org">http://www.unicode.org</a> website. - * JDK 1.4 uses Unicode version 3.0.0. + * JDK 1.5 uses Unicode version 4.0.0. * * @author scripts/unicode-blocks.pl (written by Eric Blake) * @since 1.2 @@ -95,10 +136,16 @@ print <<'EOF'; public static final class UnicodeBlock extends Subset { /** The start of the subset. */ - private final char start; + private final int start; /** The end of the subset. */ - private final char end; + private final int end; + + /** The canonical name of the block according to the Unicode standard. */ + private final String canonicalName; + + /** Enumeration for the <code>forName()</code> method */ + private enum NameType { CANONICAL, NO_SPACES, CONSTANT; }; /** * Constructor for strictly defined blocks. @@ -106,25 +153,46 @@ print <<'EOF'; * @param start the start character of the range * @param end the end character of the range * @param name the block name + * @param canonicalName the name of the block as defined in the Unicode + * standard. */ - private UnicodeBlock(char start, char end, String name) + private UnicodeBlock(int start, int end, String name, + String canonicalName) { super(name); this.start = start; this.end = end; + this.canonicalName = canonicalName; } /** * Returns the Unicode character block which a character belongs to. + * <strong>Note</strong>: This method does not support the use of + * supplementary characters. For such support, <code>of(int)</code> + * should be used instead. * * @param ch the character to look up * @return the set it belongs to, or null if it is not in one */ public static UnicodeBlock of(char ch) { - // Special case, since SPECIALS contains two ranges. - if (ch == '\uFEFF') - return SPECIALS; + return of((int) ch); + } + + /** + * Returns the Unicode character block which a code point belongs to. + * + * @param codePoint the character to look up + * @return the set it belongs to, or null if it is not in one. + * @throws IllegalArgumentException if the specified code point is + * invalid. + * @since 1.5 + */ + public static UnicodeBlock of(int codePoint) + { + if (codePoint > MAX_CODE_POINT) + throw new IllegalArgumentException("The supplied integer value is " + + "too large to be a codepoint."); // Simple binary search for the correct block. int low = 0; int hi = sets.length - 1; @@ -132,69 +200,155 @@ print <<'EOF'; { int mid = (low + hi) >> 1; UnicodeBlock b = sets[mid]; - if (ch < b.start) + if (codePoint < b.start) hi = mid - 1; - else if (ch > b.end) + else if (codePoint > b.end) low = mid + 1; else return b; } return null; } + + /** + * <p> + * Returns the <code>UnicodeBlock</code> with the given name, as defined + * by the Unicode standard. The version of Unicode in use is defined by + * the <code>Character</code> class, and the names are given in the + * <code>Blocks-<version>.txt</code> file corresponding to that version. + * The name may be specified in one of three ways: + * </p> + * <ol> + * <li>The canonical, human-readable name used by the Unicode standard. + * This is the name with all spaces and hyphens retained. For example, + * `Basic Latin' retrieves the block, UnicodeBlock.BASIC_LATIN.</li> + * <li>The canonical name with all spaces removed e.g. `BasicLatin'.</li> + * <li>The name used for the constants specified by this class, which + * is the canonical name with all spaces and hyphens replaced with + * underscores e.g. `BASIC_LATIN'</li> + * </ol> + * <p> + * The names are compared case-insensitively using the case comparison + * associated with the U.S. English locale. The method recognises the + * previous names used for blocks as well as the current ones. At + * present, this simply means that the deprecated `SURROGATES_AREA' + * will be recognised by this method (the <code>of()</code> methods + * only return one of the three new surrogate blocks). + * </p> + * + * @param blockName the name of the block to look up. + * @return the specified block. + * @throws NullPointerException if the <code>blockName</code> is + * <code>null</code>. + * @throws IllegalArgumentException if the name does not match any Unicode + * block. + * @since 1.5 + */ + public static final UnicodeBlock forName(String blockName) + { + NameType type; + if (blockName.indexOf(' ') != -1) + type = NameType.CANONICAL; + else if (blockName.indexOf('_') != -1) + type = NameType.CONSTANT; + else + type = NameType.NO_SPACES; + Collator usCollator = Collator.getInstance(Locale.US); + usCollator.setStrength(Collator.PRIMARY); + /* Special case for deprecated blocks not in sets */ + switch (type) + { + case CANONICAL: + if (usCollator.compare(blockName, "Surrogates Area") == 0) + return SURROGATES_AREA; + break; + case NO_SPACES: + if (usCollator.compare(blockName, "SurrogatesArea") == 0) + return SURROGATES_AREA; + break; + case CONSTANT: + if (usCollator.compare(blockName, "SURROGATES_AREA") == 0) + return SURROGATES_AREA; + break; + } + /* Other cases */ + switch (type) + { + case CANONICAL: + for (UnicodeBlock block : sets) + if (usCollator.compare(blockName, block.canonicalName) == 0) + return block; + break; + case NO_SPACES: + for (UnicodeBlock block : sets) + { + String nsName = block.canonicalName.replaceAll(" ",""); + if (usCollator.compare(blockName, nsName) == 0) + return block; + } + break; + case CONSTANT: + for (UnicodeBlock block : sets) + if (usCollator.compare(blockName, block.toString()) == 0) + return block; + break; + } + throw new IllegalArgumentException("No Unicode block found for " + + blockName + "."); + } EOF -my $seenSpecials = 0; -my $seenSurrogates = 0; -my $surrogateStart = 0; my @names = (); while (<BLOCKS>) { next if /^\#/; - my ($start, $end, $block) = split(/; /); + my ($range, $block) = split(/; /); + my ($start, $end) = split /\.\./, $range; next unless defined $block; chomp $block; $block =~ s/ *$//; - if (! $seenSpecials and $block =~ /Specials/) { - # Special case SPECIALS, since it is two disjoint ranges - $seenSpecials = 1; - next; - } - if ($block =~ /Surrogates/) { - # Special case SURROGATES_AREA, since it one range, not three - # consecutive, in Java - $seenSurrogates++; - if ($seenSurrogates == 1) { - $surrogateStart = $start; - next; - } elsif ($seenSurrogates == 2) { - next; - } else { - $start = $surrogateStart; - $block = "Surrogates Area"; - } - } - # Special case the name of PRIVATE_USE_AREA. - $block =~ s/(Private Use)/$1 Area/; + # Translate new Unicode names which have the old name in Java + $block = "Greek" if $block =~ /Greek and Coptic/; + $block = "Combining Marks for Symbols" + if $block =~ /Combining Diacritical Marks for Symbols/; + (my $name = $block) =~ tr/a-z -/A-Z__/; push @names, $name; my $since = (defined $additions{$name} ? "\n * \@since $additions{$name}" : ""); - my $extra = ($block =~ /Specials/ ? "'\\uFEFF', " : ""); print <<EOF; /** * $block. - * $extra'\\u$start' - '\\u$end'.$since + * 0x$start - 0x$end.$since */ public static final UnicodeBlock $name - = new UnicodeBlock('\\u$start', '\\u$end', - "$name"); + = new UnicodeBlock(0x$start, 0x$end, + "$name", + "$block"); EOF } print <<EOF; /** + * Surrogates Area. + * '\uD800' - '\uDFFF'. + * \@deprecated As of 1.5, the three areas, + * <a href="#HIGH_SURROGATES">HIGH_SURROGATES</a>, + * <a href="#HIGH_PRIVATE_USE_SURROGATES">HIGH_PRIVATE_USE_SURROGATES</a> + * and <a href="#LOW_SURROGATES">LOW_SURROGATES</a>, as defined + * by the Unicode standard, should be used in preference to + * this. These are also returned from calls to <code>of(int)</code> + * and <code>of(char)</code>. + */ + \@Deprecated + public static final UnicodeBlock SURROGATES_AREA + = new UnicodeBlock(0xD800, 0xDFFF, + "SURROGATES_AREA", + "Surrogates Area"); + + /** * The defined subsets. */ private static final UnicodeBlock sets[] = { |