Support preferentially the Unicode 'scripts' definition

in the \p{In...} notation since according to Unicode the scripts concept is more natural for matching than using the somewhat artificial block names. The block names are still available, though, and if there's a name conflict, the scripts one wins and the blocks one has to do with 'Block' appended to its name. For more information see http://www.unicode.org/unicode/reports/tr24/ p4raw-id: //depot/perl@11132
author: Jarkko Hietaniemi <jhi@iki.fi> 2001-07-04 01:32:11 +0000
committer: Jarkko Hietaniemi <jhi@iki.fi> 2001-07-04 01:32:11 +0000
commit: 2796c109dc2c56e2241410992d78bd8e0cccd71f (patch)
tree: 6afcbd325dc2525c4681ef8e20e95afc8fcd49a4 /lib/Unicode
parent: ad9cab3708f3a6aff28b5c1ca3a390c013235283 (diff)
download: perl-2796c109dc2c56e2241410992d78bd8e0cccd71f.tar.gz
1 files changed, 36 insertions, 2 deletions
diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm
index 4e310e7c1c..6c20d4066d 100644
--- a/lib/Unicode/UCD.pm
+++ b/lib/Unicode/UCD.pm
@@ -174,9 +174,9 @@ sub charblock {
     my $code = shift;
 
     unless (@BLOCKS) {
-	if (openunicode(\$BLOCKS, "Blocks.pl")) {
+	if (openunicode(\$BLOCKS, "Blocks.txt")) {
 	    while (<$BLOCKS>) {
-		if (/^([0-9A-F]+)\s+([0-9A-F]+)\s+(.+)/) {
+		if (/^([0-9A-F]+)\.\.([0-9A-F]+);\s+(.+)/) {
 		    push @BLOCKS, [ hex($1), hex($2), $3 ];
 		}
 	    }
@@ -241,6 +241,40 @@ Note also that the script names are all in uppercase, e.g. C<HEBREW>,
 while the block names are Capitalized and with intermixed spaces,
 e.g. C<Yi Syllables>.
 
+Greek
+Cyrillic
+Armenian
+Hebrew
+Arabic
+Syriac
+Thaana
+Devanagari
+Bengali
+Gurmukhi
+Gujarati
+Oriya
+Tamil
+Telugu
+Kannada
+Malayalam
+Sinhala
+Thai
+Lao
+Tibetan
+Myanmar
+Georgian
+Ethiopic
+Cherokee
+Ogham
+Runic
+Khmer
+Hiragana
+Katakana
+Bopomofo
+OldItalic
+Gothic
+Deseret
+
 =head1 IMPLEMENTATION NOTE
 
 The first use of charinfo() opens a read-only filehandle to the Unicode
author	Jarkko Hietaniemi <jhi@iki.fi>	2001-07-04 01:32:11 +0000
committer	Jarkko Hietaniemi <jhi@iki.fi>	2001-07-04 01:32:11 +0000
commit	2796c109dc2c56e2241410992d78bd8e0cccd71f (patch)
tree	6afcbd325dc2525c4681ef8e20e95afc8fcd49a4 /lib/Unicode
parent	ad9cab3708f3a6aff28b5c1ca3a390c013235283 (diff)
download	perl-2796c109dc2c56e2241410992d78bd8e0cccd71f.tar.gz