summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Clark <nick@ccl4.org>2004-10-28 09:10:57 +0000
committerNicholas Clark <nick@ccl4.org>2004-10-28 09:10:57 +0000
commitaef8ec01d60ce69b13358738019011b881f47e61 (patch)
treeaa17461173370e21c7cf6b6dbd4656c52ecf34d6
parentc851fbd014c50eb7ed9c77bf042fec3b6da8702f (diff)
downloadperl-aef8ec01d60ce69b13358738019011b881f47e61.tar.gz
Integrate:
[ 23417] Upgrade to Encode 2.07 [ 23421] Upgrade to Encode 2.08. p4raw-link: @23421 on //depot/perl: cc836e956f1f965d89e75825961e461d4c4efb8a p4raw-link: @23417 on //depot/perl: f9d05ba35dc7d01260b38a6dc93f199c3b1d2c39 p4raw-id: //depot/maint-5.8/perl@23427 p4raw-integrated: from //depot/perl@23417 'ignore' ext/Encode/META.yml (@23384..) p4raw-integrated: from //depot/perl@23415 'copy in' ext/Encode/t/fallback.t (@16372..) ext/Encode/t/Encode.t (@19712..) ext/Encode/Unicode/Unicode.xs ext/Encode/ucm/macArabic.ucm ext/Encode/ucm/macCentEuro.ucm ext/Encode/ucm/macChinsimp.ucm ext/Encode/ucm/macChintrad.ucm ext/Encode/ucm/macDingbats.ucm ext/Encode/ucm/macGreek.ucm ext/Encode/ucm/macKorean.ucm ext/Encode/ucm/macROMnn.ucm ext/Encode/ucm/macSymbol.ucm ext/Encode/ucm/macThai.ucm (@22835..) ext/Encode/AUTHORS (@23380..) ext/Encode/encoding.pm (@23384..) ext/Encode/Changes ext/Encode/Encode.pm ext/Encode/Encode.xs ext/Encode/Unicode/Unicode.pm ext/Encode/lib/Encode/Encoding.pm (@23417..)
-rw-r--r--ext/Encode/AUTHORS4
-rw-r--r--ext/Encode/Changes42
-rw-r--r--ext/Encode/Encode.pm62
-rw-r--r--ext/Encode/Encode.xs82
-rw-r--r--ext/Encode/META.yml2
-rw-r--r--ext/Encode/Unicode/Unicode.pm22
-rw-r--r--ext/Encode/Unicode/Unicode.xs10
-rw-r--r--ext/Encode/encoding.pm2
-rw-r--r--ext/Encode/lib/Encode/Encoding.pm48
-rw-r--r--ext/Encode/t/Encode.t6
-rw-r--r--ext/Encode/t/fallback.t169
-rw-r--r--ext/Encode/ucm/macArabic.ucm4
-rw-r--r--ext/Encode/ucm/macCentEuro.ucm3
-rw-r--r--ext/Encode/ucm/macChinsimp.ucm8
-rw-r--r--ext/Encode/ucm/macChintrad.ucm6
-rw-r--r--ext/Encode/ucm/macDingbats.ucm30
-rw-r--r--ext/Encode/ucm/macGreek.ucm6
-rw-r--r--ext/Encode/ucm/macKorean.ucm376
-rw-r--r--ext/Encode/ucm/macROMnn.ucm11
-rw-r--r--ext/Encode/ucm/macSymbol.ucm54
-rw-r--r--ext/Encode/ucm/macThai.ucm4
21 files changed, 624 insertions, 327 deletions
diff --git a/ext/Encode/AUTHORS b/ext/Encode/AUTHORS
index a40701f40c..8614dd8584 100644
--- a/ext/Encode/AUTHORS
+++ b/ext/Encode/AUTHORS
@@ -8,11 +8,12 @@
# source code kit or CPAN is, of course, allowed.)
#
# This list is in alphabetical order.
---
+--
Andreas J. Koenig <andreas.koenig@anima.de>
Anton Tagunov <tagunov@motor.ru>
Autrijus Tang <autrijus@autrijus.org>
Benjamin Goldberg <goldbb2@earthlink.net>
+Bjoern Hoehrmann <derhoermi@gmx.net>
Bjoern Jacke <debianbugs@j3e.de>
Chris Nandor <pudge@pobox.com>
Craig A. Berry <craigberry@mac.com>
@@ -47,5 +48,6 @@ Simon Cozens <simon@netthink.co.uk>
Spider Boardman <spider@web.zk3.dec.com>
Steve Hay <steve.hay@uk.radan.com>
Tatsuhiko Miyagawa <miyagawa@edge.co.jp>
+Tels <perl_dummy@bloodgate.com>
Vadim Konovalov <vkonovalov@peterstar.ru>
Yitzchak Scott-Thoennes <sthoenna@efn.org>
diff --git a/ext/Encode/Changes b/ext/Encode/Changes
index 1cffc39787..499179620a 100644
--- a/ext/Encode/Changes
+++ b/ext/Encode/Changes
@@ -1,9 +1,47 @@
# Revision history for Perl extension Encode.
#
-# $Id: Changes,v 2.5 2004/10/19 04:55:01 dankogai Exp dankogai $
+# $Id: Changes,v 2.8 2004/10/24 13:00:29 dankogai Exp dankogai $
#
+$Revision: 2.8 $ $Date: 2004/10/24 13:00:29 $
+! Encode.xs lib/Encode/Encoding.pm Unicode/Unicode.{pm,xs}
+ Resolved the issue that was raised by the Encode::utf8 fallbacks vs.
+ PerlIO::encoding issue that was introduced in 2.07. This is done by
+ making use of ->renew() method that used to be used only by
+ Encode::Unicode. ->renewed() method was also introduced to fetch
+ the value thereof.
+ Message-Id: <94B2EB12-25B7-11D9-9E6A-000A95DBB50A@dan.co.jp>
+
+2.07 2004/10/22 19:35:52
+! lib/Encode/Encoding.pm
+ "Remove Carp from warnings.pm" that influences Encode, by Tels.
+ Message-Id: <200410161618.29779@bloodgate.com>
+! Encode.xs AUTHORS t/fallback.t
+ Now Encode::utf8's fallbacks are compliant to Encode standard.
+ Thank Bjoern Hoehrmann for persistently convincing me.
+ Message-Id: <41a61aea.638409494@smtp.bjoern.hoehrmann.de>
+! Encode.pm
+ POD further revised.
+
+2.06 2004/10/22 06:23:11
+! ucm/mac*
+ RT #8083 reports that MacThai mapping was obsolete
+ Updated all mac* encodings accordingly to the URI below.
+ One remaining mystery is that MacRomanian vs. MacRumanian.
+ MacRumanian is not found in unicode.org...
+ http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/
+! Encode.pm t/Encode.t
+ Fixed RT #8081: "decode(..., bless{},'x') segfault"
+ Two more tests added to test that.
+ http://rt.cpan.org/NoAuth/Bug.html?id=8081
+! Encode.pm
+ POD revised accordingly to RT #7966
+ http://rt.cpan.org/NoAuth/Bug.html?id=7966
+! Unicode/Unicode.pm
+ POD updated explaining why Encode::Unicode always croaks on error
+ rather than giving users choices.
+ http://rt.cpan.org/NoAuth/Bug.html?id=7892
-$Revision: 2.5 $ $Date: 2004/10/19 04:55:01 $
+2.05 2004/10/19 04:55:01
! encoding.pm
"unnuke" jhi's patch in bleedperl, with minor correction by dankogai.
Message-ID: <41210A84.6060506@iki.fi>
diff --git a/ext/Encode/Encode.pm b/ext/Encode/Encode.pm
index 266efc6d73..29dde915fa 100644
--- a/ext/Encode/Encode.pm
+++ b/ext/Encode/Encode.pm
@@ -1,9 +1,9 @@
#
-# $Id: Encode.pm,v 2.5 2004/10/19 04:54:43 dankogai Exp $
+# $Id: Encode.pm,v 2.8 2004/10/24 12:32:06 dankogai Exp $
#
package Encode;
use strict;
-our $VERSION = do { my @r = (q$Revision: 2.5 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 2.8 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
sub DEBUG () { 0 }
use XSLoader ();
XSLoader::load(__PACKAGE__, $VERSION);
@@ -140,6 +140,7 @@ sub encode($$;$)
{
my ($name, $string, $check) = @_;
return undef unless defined $string;
+ return undef if ref $string;
$check ||=0;
my $enc = find_encoding($name);
unless(defined $enc){
@@ -155,6 +156,7 @@ sub decode($$;$)
{
my ($name,$octets,$check) = @_;
return undef unless defined $octets;
+ return undef if ref $octets;
$check ||=0;
my $enc = find_encoding($name);
unless(defined $enc){
@@ -429,7 +431,8 @@ decode($valid_encoding, '') is harmless and warnless.
Converts B<in-place> data between two encodings. The data in $octets
must be encoded as octets and not as characters in Perl's internal
-format. For example, to convert ISO-8859-1 data to Microsoft's CP1250 encoding:
+format. For example, to convert ISO-8859-1 data to Microsoft's CP1250
+encoding:
from_to($octets, "iso-8859-1", "cp1250");
@@ -440,8 +443,8 @@ and to convert it back:
Note that because the conversion happens in place, the data to be
converted cannot be a string constant; it must be a scalar variable.
-from_to() returns the length of the converted string in octets on success, undef
-otherwise.
+from_to() returns the length of the converted string in octets on
+success, I<undef> on error.
B<CAVEAT>: The following operations look the same but are not quite so;
@@ -551,40 +554,51 @@ method.
perlio_ok("euc-jp")
Fortunately, all encodings that come with Encode core are PerlIO-savvy
-except for hz and ISO-2022-kr. For gory details, see L<Encode::Encoding> and L<Encode::PerlIO>.
+except for hz and ISO-2022-kr. For gory details, see
+L<Encode::Encoding> and L<Encode::PerlIO>.
=head1 Handling Malformed Data
-The I<CHECK> argument is used as follows. When you omit it,
-the behaviour is the same as if you had passed a value of 0 for
-I<CHECK>.
+The optional I<CHECK> argument is used as follows. When you omit it,
+Encode::FB_DEFAULT ( == 0 ) is assumed.
+
+=over 2
+
+=item B<NOTE:> Not all encoding suppport this feature
+
+Some encodings ignore I<CHECK> argument. For example,
+L<Encode::Unicode> ignores I<CHECK> and it always croaks on error.
+
+=back
+
+Now here is the list of I<CHECK> values available
=over 2
=item I<CHECK> = Encode::FB_DEFAULT ( == 0)
-If I<CHECK> is 0, (en|de)code will put a I<substitution character>
-in place of a malformed character. For UCM-based encodings,
-E<lt>subcharE<gt> will be used. For Unicode, the code point C<0xFFFD> is used.
-If the data is supposed to be UTF-8, an optional lexical warning
-(category utf8) is given.
+If I<CHECK> is 0, (en|de)code will put a I<substitution character> in
+place of a malformed character. When you encode to UCM-based encodings,
+E<lt>subcharE<gt> will be used. When you decode from UCM-based
+encodings, the code point C<0xFFFD> is used. If the data is supposed
+to be UTF-8, an optional lexical warning (category utf8) is given.
=item I<CHECK> = Encode::FB_CROAK ( == 1)
If I<CHECK> is 1, methods will die on error immediately with an error
message. Therefore, when I<CHECK> is set to 1, you should trap the
-fatal error with eval{} unless you really want to let it die on error.
+error with eval{} unless you really want to let it die.
=item I<CHECK> = Encode::FB_QUIET
If I<CHECK> is set to Encode::FB_QUIET, (en|de)code will immediately
-return the portion of the data that has been processed so far when
-an error occurs. The data argument will be overwritten with
-everything after that point (that is, the unprocessed part of data).
-This is handy when you have to call decode repeatedly in the case
-where your source data may contain partial multi-byte character
-sequences, for example because you are reading with a fixed-width
-buffer. Here is some sample code that does exactly this:
+return the portion of the data that has been processed so far when an
+error occurs. The data argument will be overwritten with everything
+after that point (that is, the unprocessed part of data). This is
+handy when you have to call decode repeatedly in the case where your
+source data may contain partial multi-byte character sequences,
+(i.e. you are reading with a fixed-width buffer). Here is a sample
+code that does exactly this:
my $data = ''; my $utf8 = '';
while(defined(read $fh, $buffer, 256)){
@@ -615,8 +629,8 @@ where I<HHHH> is the Unicode ID of the character that cannot be found
in the character repertoire of the encoding.
HTML/XML character reference modes are about the same, in place of
-C<\x{I<HHHH>}>, HTML uses C<&#I<NNNN>>; where I<NNNN> is a decimal digit and
-XML uses C<&#xI<HHHH>>; where I<HHHH> is the hexadecimal digit.
+C<\x{I<HHHH>}>, HTML uses C<&#I<NNNN>;> where I<NNNN> is a decimal digit and
+XML uses C<&#xI<HHHH>;> where I<HHHH> is the hexadecimal digit.
=item The bitmask
diff --git a/ext/Encode/Encode.xs b/ext/Encode/Encode.xs
index 77d53af2d1..d7a25ff599 100644
--- a/ext/Encode/Encode.xs
+++ b/ext/Encode/Encode.xs
@@ -1,5 +1,5 @@
/*
- $Id: Encode.xs,v 2.0 2004/05/16 20:55:15 dankogai Exp $
+ $Id: Encode.xs,v 2.2 2004/10/24 13:00:29 dankogai Exp dankogai $
*/
#define PERL_NO_GET_CONTEXT
@@ -157,24 +157,15 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src,
if (check & ENCODE_RETURN_ON_ERR){
goto ENCODE_SET_SRC;
}
- if (check & ENCODE_PERLQQ){
- SV* perlqq =
- sv_2mortal(newSVpvf("\\x{%04"UVxf"}", (UV)ch));
+ if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){
+ SV* subchar =
+ newSVpvf(check & ENCODE_PERLQQ ? "\\x{%04"UVxf"}" :
+ check & ENCODE_HTMLCREF ? "&#%" UVuf ";" :
+ "&#x%" UVxf ";", (UV)ch);
sdone += slen + clen;
- ddone += dlen + SvCUR(perlqq);
- sv_catsv(dst, perlqq);
- }else if (check & ENCODE_HTMLCREF){
- SV* htmlcref =
- sv_2mortal(newSVpvf("&#%" UVuf ";", (UV)ch));
- sdone += slen + clen;
- ddone += dlen + SvCUR(htmlcref);
- sv_catsv(dst, htmlcref);
- }else if (check & ENCODE_XMLCREF){
- SV* xmlcref =
- sv_2mortal(newSVpvf("&#x%" UVxf ";", (UV)ch));
- sdone += slen + clen;
- ddone += dlen + SvCUR(xmlcref);
- sv_catsv(dst, xmlcref);
+ ddone += dlen + SvCUR(subchar);
+ sv_catsv(dst, subchar);
+ SvREFCNT_dec(subchar);
} else {
/* fallback char */
sdone += slen + clen;
@@ -200,11 +191,11 @@ encode_method(pTHX_ encode_t * enc, encpage_t * dir, SV * src,
}
if (check &
(ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){
- SV* perlqq =
- sv_2mortal(newSVpvf("\\x%02" UVXf, (UV)s[slen]));
+ SV* subchar = newSVpvf("\\x%02" UVXf, (UV)s[slen]);
sdone += slen + 1;
- ddone += dlen + SvCUR(perlqq);
- sv_catsv(dst, perlqq);
+ ddone += dlen + SvCUR(subchar);
+ sv_catsv(dst, subchar);
+ SvREFCNT_dec(subchar);
} else {
sdone += slen + 1;
ddone += dlen + strlen(FBCHAR_UTF8);
@@ -261,14 +252,6 @@ MODULE = Encode PACKAGE = Encode::utf8 PREFIX = Method_
PROTOTYPES: DISABLE
void
-Method_renew(obj)
-SV * obj
-CODE:
-{
- XSRETURN(1);
-}
-
-void
Method_decode_xs(obj,src,check = 0)
SV * obj
SV * src
@@ -279,6 +262,28 @@ CODE:
U8 *s = (U8 *) SvPV(src, slen);
U8 *e = (U8 *) SvEND(src);
SV *dst = newSV(slen>0?slen:1); /* newSV() abhors 0 -- inaba */
+
+ /*
+ * PerlO check -- we assume the object is of PerlIO if renewed
+ * and if so, we set RETURN_ON_ERR for partial character
+ */
+ int renewed = 0;
+ dSP; ENTER; SAVETMPS;
+ PUSHMARK(sp);
+ XPUSHs(obj);
+ PUTBACK;
+ if (call_method("renewed",G_SCALAR) == 1) {
+ SPAGAIN;
+ renewed = POPi;
+ PUTBACK;
+#if 0
+ fprintf(stderr, "renewed == %d\n", renewed);
+#endif
+ if (renewed){ check |= ENCODE_RETURN_ON_ERR; }
+ }
+ FREETMPS; LEAVE;
+ /* end PerlIO check */
+
SvPOK_only(dst);
SvCUR_set(dst,0);
if (SvUTF8(src)) {
@@ -297,7 +302,7 @@ CODE:
U8 skip = UTF8SKIP(s);
if ((s + skip) > e) {
/* Partial character - done */
- break;
+ goto decode_utf8_fallback;
}
else if (is_utf8_char(s)) {
/* Whole char is good */
@@ -313,6 +318,7 @@ CODE:
/* Invalid start byte */
}
/* If we get here there is something wrong with alleged UTF-8 */
+ decode_utf8_fallback:
if (check & ENCODE_DIE_ON_ERR){
Perl_croak(aTHX_ ERR_DECODE_NOMAP, "utf8", (UV)*s);
XSRETURN(0);
@@ -325,9 +331,9 @@ CODE:
break;
}
if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){
- SV* perlqq = newSVpvf("\\x%02" UVXf, (UV)*s);
- sv_catsv(dst, perlqq);
- SvREFCNT_dec(perlqq);
+ SV* subchar = newSVpvf("\\x%02" UVXf, (UV)*s);
+ sv_catsv(dst, subchar);
+ SvREFCNT_dec(subchar);
} else {
sv_catpv(dst, FBCHAR_UTF8);
}
@@ -406,6 +412,14 @@ CODE:
XSRETURN(1);
}
+int
+Method_renewed(obj)
+SV * obj
+CODE:
+ RETVAL = 0;
+OUTPUT:
+ RETVAL
+
void
Method_name(obj)
SV * obj
diff --git a/ext/Encode/META.yml b/ext/Encode/META.yml
index 5a25863062..cea68e510a 100644
--- a/ext/Encode/META.yml
+++ b/ext/Encode/META.yml
@@ -1,7 +1,7 @@
# http://module-build.sourceforge.net/META-spec.html
#XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX#
name: Encode
-version: 2.05
+version: 2.08
version_from: Encode.pm
installdirs: perl
requires:
diff --git a/ext/Encode/Unicode/Unicode.pm b/ext/Encode/Unicode/Unicode.pm
index 8c661a457e..4d0c31d82d 100644
--- a/ext/Encode/Unicode/Unicode.pm
+++ b/ext/Encode/Unicode/Unicode.pm
@@ -4,7 +4,7 @@ use strict;
use warnings;
no warnings 'redefine';
-our $VERSION = do { my @r = (q$Revision: 2.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 2.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
use XSLoader;
XSLoader::load(__PACKAGE__,$VERSION);
@@ -46,7 +46,7 @@ sub renew {
my $self = shift;
$BOM_Unknown{$self->name} or return $self;
my $clone = bless { %$self } => ref($self);
- $clone->{clone} = 1; # so the caller knows it is renewed.
+ $clone->{renewed}++; # so the caller knows it is renewed.
return $clone;
}
@@ -234,6 +234,24 @@ every one of \x{0000_0000} up to \x{ffff_ffff} (*) is I<a character>.
(*) or \x{ffff_ffff_ffff_ffff} if your perl is compiled with 64-bit
integer support!
+=head1 Error Checking
+
+Unlike most encodings which accept various ways to handle errors,
+Unicode encodings simply croaks.
+
+ % perl -MEncode -e '$_ = "\xfe\xff\xd8\xd9\xda\xdb\0\n"' \
+ -e 'Encode::from_to($_, "utf16","shift_jis", 0); print'
+ UTF-16:Malformed LO surrogate d8d9 at /path/to/Encode.pm line 184.
+ % perl -MEncode -e '$a = "BOM missing"' \
+ -e ' Encode::from_to($a, "utf16", "shift_jis", 0); print'
+ UTF-16:Unrecognised BOM 424f at /path/to/Encode.pm line 184.
+
+Unlike other encodings where mappings are not one-to-one against
+Unicode, UTFs are supposed to map 100% against one another. So Encode
+is more strict on UTFs.
+
+Consider that "division by zero" of Encode :)
+
=head1 SEE ALSO
L<Encode>, L<Encode::Unicode::UTF7>, L<http://www.unicode.org/glossary/>,
diff --git a/ext/Encode/Unicode/Unicode.xs b/ext/Encode/Unicode/Unicode.xs
index 6dadbdcdef..acecd9c967 100644
--- a/ext/Encode/Unicode/Unicode.xs
+++ b/ext/Encode/Unicode/Unicode.xs
@@ -1,5 +1,5 @@
/*
- $Id: Unicode.xs,v 2.0 2004/05/16 20:55:16 dankogai Exp $
+ $Id: Unicode.xs,v 2.1 2004/10/24 13:00:29 dankogai Exp dankogai $
*/
#define PERL_NO_GET_CONTEXT
@@ -97,7 +97,7 @@ CODE:
U8 endian = *((U8 *)SvPV_nolen(attr("endian", 6)));
int size = SvIV(attr("size", 4));
int ucs2 = SvTRUE(attr("ucs2", 4));
- int clone = SvTRUE(attr("clone", 5));
+ int renewed = SvTRUE(attr("renewed", 7));
SV *result = newSVpvn("",0);
STRLEN ulen;
U8 *s = (U8 *)SvPVbyte(str,ulen);
@@ -124,7 +124,7 @@ CODE:
}
#if 1
/* Update endian for next sequence */
- if (clone) {
+ if (renewed) {
hv_store((HV *)SvRV(obj),"endian",6,newSVpv((char *)&endian,1),0);
}
#endif
@@ -200,7 +200,7 @@ CODE:
U8 endian = *((U8 *)SvPV_nolen(attr("endian", 6)));
int size = SvIV(attr("size", 4));
int ucs2 = SvTRUE(attr("ucs2", 4));
- int clone = SvTRUE(attr("clone", 5));
+ int renewed = SvTRUE(attr("renewed", 7));
SV *result = newSVpvn("",0);
STRLEN ulen;
U8 *s = (U8 *)SvPVutf8(utf8,ulen);
@@ -211,7 +211,7 @@ CODE:
enc_pack(aTHX_ result,size,endian,BOM_BE);
#if 1
/* Update endian for next sequence */
- if (clone){
+ if (renewed){
hv_store((HV *)SvRV(obj),"endian",6,newSVpv((char *)&endian,1),0);
}
#endif
diff --git a/ext/Encode/encoding.pm b/ext/Encode/encoding.pm
index 6eccc7eb52..b52280ffe8 100644
--- a/ext/Encode/encoding.pm
+++ b/ext/Encode/encoding.pm
@@ -1,4 +1,4 @@
-# $Id: encoding.pm,v 2.1 2004/10/19 04:55:01 dankogai Exp dankogai $
+# $Id: encoding.pm,v 2.1 2004/10/19 04:55:01 dankogai Exp $
package encoding;
our $VERSION = do { my @r = (q$Revision: 2.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
diff --git a/ext/Encode/lib/Encode/Encoding.pm b/ext/Encode/lib/Encode/Encoding.pm
index 92f8c9644a..06af9fb699 100644
--- a/ext/Encode/lib/Encode/Encoding.pm
+++ b/ext/Encode/lib/Encode/Encoding.pm
@@ -1,10 +1,11 @@
package Encode::Encoding;
# Base class for classes which implement encodings
use strict;
-our $VERSION = do { my @r = (q$Revision: 2.0 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
+our $VERSION = do { my @r = (q$Revision: 2.2 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
require Encode;
+sub DEBUG { 0 }
sub Define
{
my $obj = shift;
@@ -16,7 +17,18 @@ sub Define
sub name { return shift->{'Name'} }
-sub renew { return $_[0] }
+# sub renew { return $_[0] }
+
+sub renew {
+ my $self = shift;
+ my $clone = bless { %$self } => ref($self);
+ $clone->{renewed}++; # so the caller can see it
+ DEBUG and warn $clone->{renewed};
+ return $clone;
+}
+
+sub renewed{ return $_[0]->{renewed} || 0 }
+
*new_sequence = \&renew;
sub needs_lines { 0 };
@@ -39,14 +51,14 @@ sub encode {
require Carp;
my $obj = shift;
my $class = ref($obj) ? ref($obj) : $obj;
- Carp::croak $class, "->encode() not defined!";
+ Carp::croak($class . "->encode() not defined!");
}
sub decode{
require Carp;
my $obj = shift;
my $class = ref($obj) ? ref($obj) : $obj;
- Carp::croak $class, "->encode() not defined!";
+ Carp::croak($class . "->encode() not defined!");
}
sub DESTROY {}
@@ -167,25 +179,29 @@ MUST return the string representing the canonical name of the encoding.
Predefined As:
- sub renew { return $_[0] }
+ sub renew {
+ my $self = shift;
+ my $clone = bless { %$self } => ref($self);
+ $clone->{renewed}++;
+ return $clone;
+ }
This method reconstructs the encoding object if necessary. If you need
to store the state during encoding, this is where you clone your object.
-Here is an example:
-
- sub renew {
- my $self = shift;
- my $clone = bless { %$self } => ref($self);
- $clone->{clone} = 1; # so the caller can see it
- return $clone;
- }
-
-Since most encodings are stateless the default behavior is just return
-itself as shown above.
PerlIO ALWAYS calls this method to make sure it has its own private
encoding object.
+=item -E<gt>renewed
+
+Predefined As:
+
+ sub renewed { $_[0]->{renewed} || 0 }
+
+Tells whether the object is renewed (and how many times). Some
+modules emit C<Use of uninitialized value in null operation> warning
+unless the value is numeric so return 0 for false.
+
=item -E<gt>perlio_ok()
Predefined As:
diff --git a/ext/Encode/t/Encode.t b/ext/Encode/t/Encode.t
index 784ea74117..63e913a587 100644
--- a/ext/Encode/t/Encode.t
+++ b/ext/Encode/t/Encode.t
@@ -25,7 +25,7 @@ my @character_set = ('0'..'9', 'A'..'Z', 'a'..'z');
my @source = qw(ascii iso8859-1 cp1250);
my @destiny = qw(cp1047 cp37 posix-bc);
my @ebcdic_sets = qw(cp1047 cp37 posix-bc);
-plan test => 38+$n*@encodings + 2*@source*@destiny*@character_set + 2*@ebcdic_sets*256 + 6;
+plan test => 38+$n*@encodings + 2*@source*@destiny*@character_set + 2*@ebcdic_sets*256 + 6 + 2;
my $str = join('',map(chr($_),0x20..0x7E));
my $cpy = $str;
ok(length($str),from_to($cpy,'iso8859-1','Unicode'),"Length Wrong");
@@ -142,3 +142,7 @@ $a = "\x{100}";
chop $a;
ok( is_utf8($a)); # weird but true: an empty UTF-8 string
+# non-string arguments
+ok(decode(latin1 => bless {}, "x"), undef);
+ok(encode(utf8 => bless {}, "x"), undef);
+
diff --git a/ext/Encode/t/fallback.t b/ext/Encode/t/fallback.t
index 11b484af67..e319357566 100644
--- a/ext/Encode/t/fallback.t
+++ b/ext/Encode/t/fallback.t
@@ -17,86 +17,137 @@ BEGIN {
use strict;
#use Test::More qw(no_plan);
-use Test::More tests => 22;
+use Test::More tests => 36;
use Encode q(:all);
-my $original = '';
-my $nofallback = '';
-my ($fallenback, $quiet, $perlqq, $htmlcref, $xmlcref);
+my $uo = '';
+my $nf = '';
+my ($af, $aq, $ap, $ah, $ax, $uf, $uq, $up, $uh, $ux);
for my $i (0x20..0x7e){
- $original .= chr($i);
+ $uo .= chr($i);
}
-$fallenback = $quiet =
-$perlqq = $htmlcref = $xmlcref = $nofallback = $original;
+$af = $aq = $ap = $ah = $ax =
+$uf = $uq = $up = $uh = $ux =
+$nf = $uo;
my $residue = '';
for my $i (0x80..0xff){
- $original .= chr($i);
+ $uo .= chr($i);
$residue .= chr($i);
- $fallenback .= '?';
- $perlqq .= sprintf("\\x{%04x}", $i);
- $htmlcref .= sprintf("&#%d;", $i);
- $xmlcref .= sprintf("&#x%x;", $i);
+ $af .= '?';
+ $uf .= "\x{FFFD}";
+ $ap .= sprintf("\\x{%04x}", $i);
+ $up .= sprintf("\\x%02X", $i);
+ $ah .= sprintf("&#%d;", $i);
+ $uh .= sprintf("&#%d;", $i);
+ $ax .= sprintf("&#x%x;", $i);
+ $ux .= sprintf("&#x%x;", $i);
}
-utf8::upgrade($original);
-my $meth = find_encoding('ascii');
-my $src = $original;
-my $dst = $meth->encode($src, FB_DEFAULT);
-is($dst, $fallenback, "FB_DEFAULT");
-is($src, $original, "FB_DEFAULT residue");
+my $ao = $uo;
+utf8::upgrade($uo);
-$src = $original;
-eval{ $dst = $meth->encode($src, FB_CROAK) };
-like($@, qr/does not map to ascii/o, "FB_CROAK");
-is($src, $original, "FB_CROAK residue");
+my $ascii = find_encoding('ascii');
+my $utf8 = find_encoding('utf8');
-$src = $original;
-eval{ $dst = $meth->encode($src, FB_CROAK) };
-like($@, qr/does not map to ascii/o, "FB_CROAK");
-is($src, $original, "FB_CROAK residue");
+my $src = $uo;
+my $dst = $ascii->encode($src, FB_DEFAULT);
+is($dst, $af, "FB_DEFAULT ascii");
+is($src, $uo, "FB_DEFAULT residue ascii");
+$src = $ao;
+$dst = $utf8->decode($src, FB_DEFAULT);
+is($dst, $uf, "FB_DEFAULT utf8");
+is($src, $ao, "FB_DEFAULT residue utf8");
-$src = $nofallback;
-eval{ $dst = $meth->encode($src, FB_CROAK) };
-is($@, '', "FB_CROAK on success");
-is($src, '', "FB_CROAK on success residue");
+$src = $uo;
+eval{ $dst = $ascii->encode($src, FB_CROAK) };
+like($@, qr/does not map to ascii/o, "FB_CROAK ascii");
+is($src, $uo, "FB_CROAK residue ascii");
-$src = $original;
-$dst = $meth->encode($src, FB_QUIET);
-is($dst, $quiet, "FB_QUIET");
-is($src, $residue, "FB_QUIET residue");
+$src = $ao;
+eval{ $dst = $utf8->decode($src, FB_CROAK) };
+like($@, qr/does not map to Unicode/o, "FB_CROAK utf8");
+is($src, $ao, "FB_CROAK residue utf8");
+
+$src = $nf;
+eval{ $dst = $ascii->encode($src, FB_CROAK) };
+is($@, '', "FB_CROAK on success ascii");
+is($src, '', "FB_CROAK on success residue ascii");
+
+$src = $nf;
+eval{ $dst = $utf8->decode($src, FB_CROAK) };
+is($@, '', "FB_CROAK on success utf8");
+is($src, '', "FB_CROAK on success residue utf8");
+
+$src = $uo;
+$dst = $ascii->encode($src, FB_QUIET);
+is($dst, $aq, "FB_QUIET ascii");
+is($src, $residue, "FB_QUIET residue ascii");
+
+$src = $ao;
+$dst = $utf8->decode($src, FB_QUIET);
+is($dst, $uq, "FB_QUIET utf8");
+is($src, $residue, "FB_QUIET residue utf8");
{
- my $message;
+ my $message = '';
local $SIG{__WARN__} = sub { $message = $_[0] };
- $src = $original;
- $dst = $meth->encode($src, FB_WARN);
- is($dst, $quiet, "FB_WARN");
- is($src, $residue, "FB_WARN residue");
- like($message, qr/does not map to ascii/o, "FB_WARN message");
+
+ $src = $uo;
+ $dst = $ascii->encode($src, FB_WARN);
+ is($dst, $aq, "FB_WARN ascii");
+ is($src, $residue, "FB_WARN residue ascii");
+ like($message, qr/does not map to ascii/o, "FB_WARN message ascii");
$message = '';
+ $src = $ao;
+ $dst = $utf8->decode($src, FB_WARN);
+ is($dst, $uq, "FB_WARN utf8");
+ is($src, $residue, "FB_WARN residue utf8");
+ like($message, qr/does not map to Unicode/o, "FB_WARN message utf8");
- $src = $original;
- $dst = $meth->encode($src, WARN_ON_ERR);
+ $message = '';
+ $src = $uo;
+ $dst = $ascii->encode($src, WARN_ON_ERR);
+ is($dst, $af, "WARN_ON_ERR ascii");
+ is($src, '', "WARN_ON_ERR residue ascii");
+ like($message, qr/does not map to ascii/o, "WARN_ON_ERR message ascii");
- is($dst, $fallenback, "WARN_ON_ERR");
- is($src, '', "WARN_ON_ERR residue");
- like($message, qr/does not map to ascii/o, "WARN_ON_ERR message");
+ $message = '';
+ $src = $ao;
+ $dst = $utf8->decode($src, WARN_ON_ERR);
+ is($dst, $uf, "WARN_ON_ERR utf8");
+ is($src, '', "WARN_ON_ERR residue utf8");
+ like($message, qr/does not map to Unicode/o, "WARN_ON_ERR message ascii");
}
-$src = $original;
-$dst = $meth->encode($src, FB_PERLQQ);
-is($dst, $perlqq, "FB_PERLQQ");
-is($src, '', "FB_PERLQQ residue");
-
-$src = $original;
-$dst = $meth->encode($src, FB_HTMLCREF);
-is($dst, $htmlcref, "FB_HTMLCREF");
-is($src, '', "FB_HTMLCREF residue");
-
-$src = $original;
-$dst = $meth->encode($src, FB_XMLCREF);
-is($dst, $xmlcref, "FB_XMLCREF");
-is($src, '', "FB_XMLCREF residue");
+$src = $uo;
+$dst = $ascii->encode($src, FB_PERLQQ);
+is($dst, $ap, "FB_PERLQQ ascii");
+is($src, '', "FB_PERLQQ residue ascii");
+
+$src = $ao;
+$dst = $utf8->decode($src, FB_PERLQQ);
+is($dst, $up, "FB_PERLQQ utf8");
+is($src, '', "FB_PERLQQ residue utf8");
+
+$src = $uo;
+$dst = $ascii->encode($src, FB_HTMLCREF);
+is($dst, $ah, "FB_HTMLCREF ascii");
+is($src, '', "FB_HTMLCREF residue ascii");
+
+#$src = $ao;
+#$dst = $utf8->decode($src, FB_HTMLCREF);
+#is($dst, $uh, "FB_HTMLCREF utf8");
+#is($src, '', "FB_HTMLCREF residue utf8");
+
+$src = $uo;
+$dst = $ascii->encode($src, FB_XMLCREF);
+is($dst, $ax, "FB_XMLCREF ascii");
+is($src, '', "FB_XMLCREF residue ascii");
+
+#$src = $ao;
+#$dst = $utf8->decode($src, FB_XMLCREF);
+#is($dst, $ax, "FB_XMLCREF utf8");
+#is($src, '', "FB_XMLCREF residue utf8");
diff --git a/ext/Encode/ucm/macArabic.ucm b/ext/Encode/ucm/macArabic.ucm
index 2fa32ea4c0..584fd6e750 100644
--- a/ext/Encode/ucm/macArabic.ucm
+++ b/ext/Encode/ucm/macArabic.ucm
@@ -1,5 +1,5 @@
#
-# $Id: macArabic.ucm,v 2.0 2004/05/16 20:55:26 dankogai Exp $
+# $Id: macArabic.ucm,v 2.1 2004/10/22 06:23:11 dankogai Exp $
#
# Original table can be obtained at
# http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/ARABIC.TXT
@@ -213,6 +213,7 @@ CHARMAP
<U064F> \xEF |0 # ARABIC DAMMA
<U0650> \xF0 |0 # ARABIC KASRA
<U0651> \xF1 |0 # ARABIC SHADDA
+<U0652> \xF2 |0 # ARABIC SUKUN
<U0660> \xB0 |0 # ARABIC-INDIC DIGIT ZERO, right-left (need override)
<U0661> \xB1 |0 # ARABIC-INDIC DIGIT ONE, right-left (need override)
<U0662> \xB2 |0 # ARABIC-INDIC DIGIT TWO, right-left (need override)
@@ -223,7 +224,6 @@ CHARMAP
<U0667> \xB7 |0 # ARABIC-INDIC DIGIT SEVEN, right-left (need override)
<U0668> \xB8 |0 # ARABIC-INDIC DIGIT EIGHT, right-left (need override)
<U0669> \xB9 |0 # ARABIC-INDIC DIGIT NINE, right-left (need override)
-<U0652> \xF2 |0 # ARABIC SUKUN
<U066A> \xA5 |0 # ARABIC PERCENT SIGN
<U0679> \xF4 |0 # ARABIC LETTER TTEH
<U067E> \xF3 |0 # ARABIC LETTER PEH
diff --git a/ext/Encode/ucm/macCentEuro.ucm b/ext/Encode/ucm/macCentEuro.ucm
index a885997074..875a8ab6dd 100644
--- a/ext/Encode/ucm/macCentEuro.ucm
+++ b/ext/Encode/ucm/macCentEuro.ucm
@@ -1,5 +1,5 @@
#
-# $Id: macCentEuro.ucm,v 2.0 2004/05/16 20:55:26 dankogai Exp $
+# $Id: macCentEuro.ucm,v 2.1 2004/10/22 06:23:11 dankogai Exp $
#
# Original table can be obtained at
# http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/CENTEURO.TXT
@@ -9,7 +9,6 @@
<mb_cur_min> 1
<mb_cur_max> 1
<subchar> \x3F
-#
CHARMAP
<U0000> \x00 |0 # <control>
<U0001> \x01 |0 # <control>
diff --git a/ext/Encode/ucm/macChinsimp.ucm b/ext/Encode/ucm/macChinsimp.ucm
index 881fee7c72..5def5fb1e8 100644
--- a/ext/Encode/ucm/macChinsimp.ucm
+++ b/ext/Encode/ucm/macChinsimp.ucm
@@ -1,5 +1,5 @@
#
-# $Id: macChinsimp.ucm,v 2.0 2004/05/16 20:55:26 dankogai Exp $
+# $Id: macChinsimp.ucm,v 2.1 2004/10/22 06:23:11 dankogai Exp $
#
# Original table can be obtained at
# http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/CHINSIMP.TXT
@@ -121,7 +121,6 @@ CHARMAP
<U006C> \x6C |0 # LATIN SMALL LETTER L
<U006D> \x6D |0 # LATIN SMALL LETTER M
<U006E> \x6E |0 # LATIN SMALL LETTER N
-<U006E><U0300> \xA8\xBF |3 # LATIN SMALL LETTER N + COMBINING GRAVE ACCENT
<U006F> \x6F |0 # LATIN SMALL LETTER O
<U0070> \x70 |0 # LATIN SMALL LETTER P
<U0071> \x71 |0 # LATIN SMALL LETTER Q
@@ -180,8 +179,9 @@ CHARMAP
<U01D8> \xA8\xB6 |0 # LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE
<U01DA> \xA8\xB7 |0 # LATIN SMALL LETTER U WITH DIAERESIS AND CARON
<U01DC> \xA8\xB8 |0 # LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE
-<U0251> \xA8\xBB |0 # LATIN SMALL LETTER TURNED ALPHA (wrong glyph in Apple fonts)
-<U0261> \xA8\xC0 |0 # LATIN SMALL LETTER SCRIPT G (wrong glyph in Apple fonts other than Hei)
+<U01F9> \xA8\xBF |0 # LATIN SMALL LETTER N WITH GRAVE # for Unicode 3.0 and later
+<U0251> \xA8\xBB |0 # LATIN SMALL LETTER TURNED ALPHA
+<U0261> \xA8\xC0 |0 # LATIN SMALL LETTER SCRIPT G
<U02C7> \xA1\xA6 |0 # CARON (Mandarin Chinese third tone)
<U02C9> \xA1\xA5 |0 # MODIFIER LETTER MACRON (Mandarin Chinese first tone)
<U0391> \xA6\xA1 |0 # GREEK CAPITAL LETTER ALPHA
diff --git a/ext/Encode/ucm/macChintrad.ucm b/ext/Encode/ucm/macChintrad.ucm
index 9cbcf5215d..434287bbd5 100644
--- a/ext/Encode/ucm/macChintrad.ucm
+++ b/ext/Encode/ucm/macChintrad.ucm
@@ -1,5 +1,5 @@
#
-# $Id: macChintrad.ucm,v 2.0 2004/05/16 20:55:26 dankogai Exp $
+# $Id: macChintrad.ucm,v 2.1 2004/10/22 06:23:11 dankogai Exp $
#
# Original table can be obtained at
# http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/CHINTRAD.TXT
@@ -253,8 +253,8 @@ CHARMAP
<U2252> \xA1\xDC |0 # APPROXIMATELY EQUAL TO OR THE IMAGE OF
<U2260> \xA1\xDA |0 # NOT EQUAL TO
<U2261> \xA1\xDD |0 # IDENTICAL TO
-<U2266> \xA1\xD8 |0 # LESS THAN OVER EQUAL TO
-<U2267> \xA1\xD9 |0 # GREATER THAN OVER EQUAL TO
+<U2266> \xA1\xD8 |0 # LESS-THAN OVER EQUAL TO
+<U2267> \xA1\xD9 |0 # GREATER-THAN OVER EQUAL TO
<U2295> \xA1\xF2 |0 # CIRCLED PLUS # change from UTC mapping
<U22A5> \xA1\xE6 |0 # UP TACK
<U22BF> \xA1\xE9 |0 # RIGHT TRIANGLE
diff --git a/ext/Encode/ucm/macDingbats.ucm b/ext/Encode/ucm/macDingbats.ucm
index 2c77e7244d..3047a67e6f 100644
--- a/ext/Encode/ucm/macDingbats.ucm
+++ b/ext/Encode/ucm/macDingbats.ucm
@@ -1,5 +1,5 @@
#
-# $Id: macDingbats.ucm,v 2.0 2004/05/16 20:55:27 dankogai Exp $
+# $Id: macDingbats.ucm,v 2.1 2004/10/22 06:23:11 dankogai Exp $
#
# Original table can be obtained at
# http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/DINGBATS.TXT
@@ -42,14 +42,6 @@ CHARMAP
<U001E> \x1E |0 # <control>
<U001F> \x1F |0 # <control>
<U0020> \x20 |0 # SPACE
-<U0028> \x80 |0 # LEFT PARENTHESIS
-<U0028><UF87F> \x82 |3 # LEFT PARENTHESIS, alternate (flattened)
-<U0029> \x81 |0 # RIGHT PARENTHESIS
-<U0029><UF87F> \x83 |3 # RIGHT PARENTHESIS, alternate (flattened)
-<U007B> \x8C |0 # LEFT CURLY BRACKET
-<U007D> \x8D |0 # RIGHT CURLY BRACKET
-<U2039> \x86 |0 # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
-<U203A> \x87 |0 # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
<U2192> \xD5 |0 # RIGHTWARDS ARROW
<U2194> \xD6 |0 # LEFT RIGHT ARROW
<U2195> \xD7 |0 # UP DOWN ARROW
@@ -168,6 +160,20 @@ CHARMAP
<U2765> \xA5 |0 # ROTATED HEAVY BLACK HEART BULLET
<U2766> \xA6 |0 # FLORAL HEART
<U2767> \xA7 |0 # ROTATED FLORAL HEART BULLET
+<U2768> \x80 |0 # MEDIUM LEFT PARENTHESIS ORNAMENT # for Unicode 3.2 and later
+<U2769> \x81 |0 # MEDIUM RIGHT PARENTHESIS ORNAMENT # for Unicode 3.2 and later
+<U276A> \x82 |0 # MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT # for Unicode 3.2 and later
+<U276B> \x83 |0 # MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT # for Unicode 3.2 and later
+<U276C> \x84 |0 # MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT # for Unicode 3.2 and later
+<U276D> \x85 |0 # MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT # for Unicode 3.2 and later
+<U276E> \x86 |0 # HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT # for Unicode 3.2 and later
+<U276F> \x87 |0 # HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT # for Unicode 3.2 and later
+<U2770> \x88 |0 # HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT # for Unicode 3.2 and later
+<U2771> \x89 |0 # HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT # for Unicode 3.2 and later
+<U2772> \x8A |0 # LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT # for Unicode 3.2 and later
+<U2773> \x8B |0 # LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT # for Unicode 3.2 and later
+<U2774> \x8C |0 # MEDIUM LEFT CURLY BRACKET ORNAMENT # for Unicode 3.2 and later
+<U2775> \x8D |0 # MEDIUM RIGHT CURLY BRACKET ORNAMENT # for Unicode 3.2 and later
<U2776> \xB6 |0 # DINGBAT NEGATIVE CIRCLED DIGIT ONE
<U2777> \xB7 |0 # DINGBAT NEGATIVE CIRCLED DIGIT TWO
<U2778> \xB8 |0 # DINGBAT NEGATIVE CIRCLED DIGIT THREE
@@ -237,10 +243,4 @@ CHARMAP
<U27BC> \xFC |0 # WEDGE-TAILED RIGHTWARDS ARROW
<U27BD> \xFD |0 # HEAVY WEDGE-TAILED RIGHTWARDS ARROW
<U27BE> \xFE |0 # OPEN-OUTLINED RIGHTWARDS ARROW
-<U3008> \x84 |0 # LEFT ANGLE BRACKET
-<U3008><UF87C> \x88 |3 # LEFT ANGLE BRACKET, heavy
-<U3009> \x85 |0 # RIGHT ANGLE BRACKET
-<U3009><UF87C> \x89 |3 # RIGHT ANGLE BRACKET, heavy
-<U3014> \x8A |0 # LEFT TORTOISE SHELL BRACKET
-<U3015> \x8B |0 # RIGHT TORTOISE SHELL BRACKET
END CHARMAP
diff --git a/ext/Encode/ucm/macGreek.ucm b/ext/Encode/ucm/macGreek.ucm
index baeb06140d..bf88b98be2 100644
--- a/ext/Encode/ucm/macGreek.ucm
+++ b/ext/Encode/ucm/macGreek.ucm
@@ -1,5 +1,5 @@
#
-# $Id: macGreek.ucm,v 2.0 2004/05/16 20:55:27 dankogai Exp $
+# $Id: macGreek.ucm,v 2.1 2004/10/22 06:23:11 dankogai Exp $
#
# Original table can be obtained at
# http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/GREEK.TXT
@@ -145,7 +145,7 @@ CHARMAP
<U00A9> \xA9 |0 # COPYRIGHT SIGN
<U00AB> \xC7 |0 # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
<U00AC> \xC2 |0 # NOT SIGN
-<U00AD> \x9C |0 # SOFT HYPHEN
+<U00AD> \xFF |0 # SOFT HYPHEN # before Mac OS 9.2.2, was undefined
<U00AE> \xA8 |0 # REGISTERED SIGN
<U00B0> \xAE |0 # DEGREE SIGN
<U00B1> \xB1 |0 # PLUS-MINUS SIGN
@@ -258,10 +258,10 @@ CHARMAP
<U2022> \x96 |0 # BULLET
<U2026> \xC9 |0 # HORIZONTAL ELLIPSIS
<U2030> \x98 |0 # PER MILLE SIGN
+<U20AC> \x9C |0 # EURO SIGN # before Mac OS 9.2.2, was SOFT HYPHEN
<U2122> \x93 |0 # TRADE MARK SIGN
<U2248> \xC5 |0 # ALMOST EQUAL TO
<U2260> \xAD |0 # NOT EQUAL TO
<U2264> \xB2 |0 # LESS-THAN OR EQUAL TO
<U2265> \xB3 |0 # GREATER-THAN OR EQUAL TO
-<UF8A0> \xFF |0 # undefined1
END CHARMAP
diff --git a/ext/Encode/ucm/macKorean.ucm b/ext/Encode/ucm/macKorean.ucm
index 54f05b6137..6cdf7ca1a7 100644
--- a/ext/Encode/ucm/macKorean.ucm
+++ b/ext/Encode/ucm/macKorean.ucm
@@ -1,5 +1,5 @@
#
-# $Id: macKorean.ucm,v 2.0 2004/05/16 20:55:27 dankogai Exp $
+# $Id: macKorean.ucm,v 2.1 2004/10/22 06:23:11 dankogai Exp $
#
# Original table can be obtained at
# http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/KOREAN.TXT
@@ -52,21 +52,9 @@ CHARMAP
<U0026> \x26 |0 # AMPERSAND
<U0027> \x27 |0 # APOSTROPHE
<U0028> \x28 |0 # LEFT PARENTHESIS
-<U0028><UF873> \xA2\x45 |3 # LEFT PARENTHESIS, small, bold
-<U0028><UF874> \xA1\x4F |3 # LEFT PARENTHESIS, small, more rounded
-<U0028><UF875> \xA1\x55 |3 # LEFT PARENTHESIS, white, alternate
-<U0028><UF878> \xA1\x4D |3 # LEFT PARENTHESIS, small
-<U0028><UF87A> \xA1\x59 |3 # LEFT PARENTHESIS, white
-<U0028><UF87B> \xA1\x65 |3 # LEFT PARENTHESIS, white, bold
<U0028><UF87C> \xA1\x57 |3 # LEFT PARENTHESIS, bold
<U0028><UF87F> \xA2\x4B |3 # LEFT PARENTHESIS, more rounded
<U0029> \x29 |0 # RIGHT PARENTHESIS
-<U0029><UF873> \xA2\x46 |3 # RIGHT PARENTHESIS, small, bold
-<U0029><UF874> \xA1\x50 |3 # RIGHT PARENTHESIS, small, more rounded
-<U0029><UF875> \xA1\x56 |3 # RIGHT PARENTHESIS, white, alternate
-<U0029><UF878> \xA1\x4E |3 # RIGHT PARENTHESIS, small
-<U0029><UF87A> \xA1\x5A |3 # RIGHT PARENTHESIS, white
-<U0029><UF87B> \xA1\x66 |3 # RIGHT PARENTHESIS, white, bold
<U0029><UF87C> \xA1\x58 |3 # RIGHT PARENTHESIS, bold
<U0029><UF87F> \xA2\x4C |3 # RIGHT PARENTHESIS, more rounded
<U002A> \x2A |0 # ASTERISK
@@ -74,6 +62,7 @@ CHARMAP
<U002B> \x2B |0 # PLUS SIGN
<U002C> \x2C |0 # COMMA
<U002D> \x2D |0 # HYPHEN-MINUS
+<U002D><U0308> \xA7\x67 |3 # HYPHEN-MINUS+COMBINING DIAERESIS
<U002E> \x2E |0 # FULL STOP
<U002F> \x2F |0 # SOLIDUS
<U0030> \x30 |0 # DIGIT ZERO
@@ -146,6 +135,8 @@ CHARMAP
<U003C> \x3C |0 # LESS-THAN SIGN
<U003C><UF877> \xA1\x79 |3 # LESS-THAN SIGN, superscript
<U003D> \x3D |0 # EQUALS SIGN
+<U003D><U20D2> \xA7\x65 |3 # EQUALS SIGN+COMBINING LONG VERTICAL LINE OVERLAY
+<U003D><U20E5> \xA7\x62 |3 # EQUALS SIGN+COMBINING REVERSE SOLIDUS OVERLAY # for Unicode 3.2 and later
<U003E> \x3E |0 # GREATER-THAN SIGN
<U003E><UF877> \xA1\x78 |3 # GREATER-THAN SIGN, superscript
<U003F> \x3F |0 # QUESTION MARK
@@ -260,7 +251,6 @@ CHARMAP
<U00E6> \xA9\xA1 |0 # LATIN SMALL LIGATURE AE
<U00F0> \xA9\xA3 |0 # LATIN SMALL LETTER ETH (Icelandic)
<U00F7> \xA1\xC0 |0 # DIVISION SIGN
-<U00F7><U20DD> \xA7\x60 |3 # DIVISION SIGN + COMBINING ENCLOSING CIRCLE
<U00F8> \xA9\xAA |0 # LATIN SMALL LETTER O WITH STROKE
<U00FE> \xA9\xAD |0 # LATIN SMALL LETTER THORN (Icelandic)
<U0111> \xA9\xA2 |0 # LATIN SMALL LETTER D WITH STROKE
@@ -288,7 +278,7 @@ CHARMAP
<U02D9> \xA2\xAB |0 # DOT ABOVE (Mandarin Chinese light tone)
<U02DA> \xA2\xAA |0 # RING ABOVE
<U02DB> \xA2\xAD |0 # OGONEK
-<U02DC> \xA2\xA6 |0 # SMALL TILDE # KSC: "tilde accent"
+<U02DC> \xA2\xA6 |0 # SMALL TILDE # KSC spec: "tilde accent"
<U02DD> \xA2\xA9 |0 # DOUBLE ACUTE ACCENT
<U0391> \xA5\xC1 |0 # GREEK CAPITAL LETTER ALPHA
<U0392> \xA5\xC2 |0 # GREEK CAPITAL LETTER BETA
@@ -338,6 +328,7 @@ CHARMAP
<U03C7> \xA5\xF6 |0 # GREEK SMALL LETTER CHI
<U03C8> \xA5\xF7 |0 # GREEK SMALL LETTER PSI
<U03C9> \xA5\xF8 |0 # GREEK SMALL LETTER OMEGA
+<U03D5> \xA7\x6A |0 # GREEK PHI SYMBOL
<U0401> \xAC\xA7 |0 # CYRILLIC CAPITAL LETTER IO
<U0410> \xAC\xA1 |0 # CYRILLIC CAPITAL LETTER A
<U0411> \xAC\xA2 |0 # CYRILLIC CAPITAL LETTER BE
@@ -446,6 +437,16 @@ CHARMAP
<U203E> \xA3\xFE |0 # OVERLINE # change from UTC mapping; KSC spec: "overline, macron"
<U2042> \xA6\x4D |0 # ASTERISM
<U2042><UF879> \xA6\x51 |3 # ASTERISM, large
+<U2047> \xA7\x87 |0 # DOUBLE QUESTION MARK # for Unicode 3.2 and later
+<U2049> \xA7\x85 |0 # EXCLAMATION QUESTION MARK # for Unicode 3.0 and later
+<U204C> \xA1\x96 |0 # BLACK LEFTWARDS BULLET (used to bracket titles) # for Unicode 3.0 or later
+<U204D> \xA1\x97 |0 # BLACK RIGHTWARDS BULLET (used to bracket titles) # for Unicode 3.0 or later
+<U204E> \xA6\x4E |0 # LOW ASTERISK # for Unicode 3.2 and later
+<U2051> \xA1\x6D |0 # TWO ASTERISKS ALIGNED VERTICALLY (dictionary definition importance mark) # for Unicode 3.2 or later
+<U2051><UF871> \xA6\x4F |3 # TWO ASTERISKS ALIGNED VERTICALLY, large, right # for Unicode 3.2 and later
+<U2051><UF874> \xA6\x4B |3 # TWO ASTERISKS ALIGNED VERTICALLY, bold, right # for Unicode 3.2 and later
+<U2051><UF879> \xA1\xA0 |3 # TWO ASTERISKS ALIGNED VERTICALLY, large # for Unicode 3.2 or later
+<U2051><UF87C> \xA1\x9D |3 # TWO ASTERISKS ALIGNED VERTICALLY, medium large # for Unicode 3.2 or later
<U2074> \xA9\xF9 |0 # SUPERSCRIPT FOUR
<U207A> \xA1\x71 |0 # SUPERSCRIPT PLUS SIGN
<U207B> \xA1\x72 |0 # SUPERSCRIPT MINUS
@@ -495,6 +496,8 @@ CHARMAP
<U2179> \xA5\xAA |0 # SMALL ROMAN NUMERAL TEN
<U2190> \xA1\xE7 |0 # LEFTWARDS ARROW
<U2190><UF870> \xAC\x89 |3 # LEFTWARDS ARROW, angle head, white, large
+<U2190><UF871> \xA8\x69 |3 # LEFTWARDS ARROW, umbrella
+<U2190><UF872> \xA8\x6B |3 # LEFTWARDS ARROW, teardrop
<U2190><UF873> \xAC\x5D |3 # LEFTWARDS ARROW, small bold
<U2190><UF874> \xAC\x66 |3 # LEFTWARDS ARROW, curved head, white
<U2190><UF875> \xA8\x63 |3 # LEFTWARDS ARROW, alternate, white
@@ -503,21 +506,22 @@ CHARMAP
<U2190><UF87A> \xAC\x55 |3 # LEFTWARDS ARROW, angle head, white
<U2190><UF87B> \xA8\x42 |3 # LEFTWARDS ARROW, light
<U2190><UF87C> \xA8\x4E |3 # LEFTWARDS ARROW, bold
-<U2190><UF87F> \xA8\x5F |3 # LEFTWARDS ARROW, alternate
+<U2190><UF87F> \xA8\x5F |3 # LEFTWARDS ARROW, alternate (heavy round-tipped)
<U2191> \xA1\xE8 |0 # UPWARDS ARROW
<U2191><UF870> \xAC\x8B |3 # UPWARDS ARROW, angle head, white, large
+<U2191><UF872> \xA8\x6D |3 # UPWARDS ARROW, teardrop
<U2191><UF873> \xAC\x60 |3 # UPWARDS ARROW, small bold
<U2191><UF874> \xAC\x68 |3 # UPWARDS ARROW, curved head, white
<U2191><UF875> \xA8\x65 |3 # UPWARDS ARROW, alternate, white
<U2191><UF878> \xAC\x64 |3 # UPWARDS ARROW, curved head
<U2191><UF879> \xA8\x55 |3 # UPWARDS ARROW, large
-<U2191><UF87A> \xAC\x57 |3 # UPWARDS ARROW, angle head, whitee
+<U2191><UF87A> \xAC\x57 |3 # UPWARDS ARROW, angle head, white
<U2191><UF87B> \xA8\x43 |3 # UPWARDS ARROW, light
<U2191><UF87C> \xA8\x4F |3 # UPWARDS ARROW, bold
<U2191><UF87F> \xA8\x61 |3 # UPWARDS ARROW, alternate
<U2192> \xA1\xE6 |0 # RIGHTWARDS ARROW
<U2192><UF870> \xAC\x8A |3 # RIGHTWARDS ARROW, angle head, white, large
-<U2192><UF873> \xAC\x5E |3 # RIGHTWARDS ARROW, small bold
+<U2192><UF872> \xA8\x6C |3 # RIGHTWARDS ARROW, teardrop
<U2192><UF874> \xAC\x67 |3 # RIGHTWARDS ARROW, curved head, white
<U2192><UF875> \xA8\x64 |3 # RIGHTWARDS ARROW, alternate, white
<U2192><UF878> \xAC\x63 |3 # RIGHTWARDS ARROW, curved head
@@ -525,9 +529,9 @@ CHARMAP
<U2192><UF87A> \xAC\x56 |3 # RIGHTWARDS ARROW, angle head, white
<U2192><UF87B> \xA8\x41 |3 # RIGHTWARDS ARROW, light
<U2192><UF87C> \xA8\x4D |3 # RIGHTWARDS ARROW, bold
-<U2192><UF87F> \xA8\x60 |3 # RIGHTWARDS ARROW, alternate
<U2193> \xA1\xE9 |0 # DOWNWARDS ARROW
<U2193><UF870> \xAC\x8C |3 # DOWNWARDS ARROW, angle head, white, large
+<U2193><UF872> \xA8\x6E |3 # DOWNWARDS ARROW, teardrop
<U2193><UF873> \xAC\x61 |3 # DOWNWARDS ARROW, small bold
<U2193><UF874> \xAC\x69 |3 # DOWNWARDS ARROW, curved head, white
<U2193><UF875> \xA8\x66 |3 # DOWNWARDS ARROW, alternate, white
@@ -549,6 +553,8 @@ CHARMAP
<U2198><UF87B> \xA8\x47 |3 # SOUTH EAST ARROW, light
<U2199> \xA2\xD7 |0 # SOUTH WEST ARROW
<U2199><UF87B> \xA8\x48 |3 # SOUTH WEST ARROW, light
+<U219C> \xAC\x53 |0 # LEFTWARDS WAVE ARROW
+<U219D> \xAC\x52 |0 # RIGHTWARDS WAVE ARROW
<U21B0> \xA8\x82 |0 # UPWARDS ARROW WITH TIP LEFTWARDS
<U21B0><UF87A> \xAC\x7A |3 # UPWARDS ARROW WITH TIP LEFTWARDS, curved, white
<U21B0><UF87C> \xAC\x85 |3 # UPWARDS ARROW WITH TIP LEFTWARDS, curved
@@ -558,70 +564,76 @@ CHARMAP
<U21B1><UF87C> \xAC\x82 |3 # UPWARDS ARROW WITH TIP RIGHTWARDS, curved
<U21B1><UF87F> \xA8\x87 |3 # UPWARDS ARROW WITH TIP RIGHTWARDS, curved
<U21B2> \xA8\x7B |0 # DOWNWARDS ARROW WITH TIP LEFTWARDS
-<U21B2><UF87A> \xAC\x76 |3 # DOWNWARDS ARROW WITH TIP LEFTWARDS, curved, white
-<U21B2><UF87C> \xAC\x81 |3 # DOWNWARDS ARROW WITH TIP LEFTWARDS, curved
-<U21B2><UF87F> \xA8\x86 |3 # DOWNWARDS ARROW WITH TIP LEFTWARDS, curved
<U21B3> \xA8\x83 |0 # DOWNWARDS ARROW WITH TIP RIGHTWARDS
-<U21B3><UF87A> \xAC\x7B |3 # DOWNWARDS ARROW WITH TIP RIGHTWARDS, curved, white
-<U21B3><UF87C> \xAC\x86 |3 # DOWNWARDS ARROW WITH TIP RIGHTWARDS, curved
-<U21B3><UF87F> \xA8\x8B |3 # DOWNWARDS ARROW WITH TIP RIGHTWARDS, curved
<U21B4> \xA8\x81 |0 # RIGHTWARDS ARROW WITH CORNER DOWNWARDS
-<U21B6><UF87A> \xAC\x7C |3 # ANTICLOCKWISE TOP SEMICIRCLE ARROW, curved, white
-<U21B6><UF87C> \xAC\x87 |3 # ANTICLOCKWISE TOP SEMICIRCLE ARROW, curved
-<U21B6><UF87F> \xA8\x8C |3 # ANTICLOCKWISE TOP SEMICIRCLE ARROW, curved
-<U21B7><UF87A> \xAC\x79 |3 # CLOCKWISE TOP SEMICIRCLE ARROW, curved, white
-<U21B7><UF87C> \xAC\x84 |3 # CLOCKWISE TOP SEMICIRCLE ARROW, curved
-<U21B7><UF87F> \xA8\x89 |3 # CLOCKWISE TOP SEMICIRCLE ARROW, curved
-<U21BA><UF87A> \xAC\x7D |3 # ANTICLOCKWISE OPEN CIRCLE ARROW, curved, white
-<U21BA><UF87C> \xAC\x88 |3 # ANTICLOCKWISE OPEN CIRCLE ARROW, curved
-<U21BA><UF87F> \xA8\x8D |3 # ANTICLOCKWISE OPEN CIRCLE ARROW, curved
+<U21B6> \xAC\x50 |0 # ANTICLOCKWISE TOP SEMICIRCLE ARROW: up arrow with tip curving left and down
+<U21B7> \xAC\x51 |0 # CLOCKWISE TOP SEMICIRCLE ARROW: up arrow with tip curving right and down
<U21BB><UF87A> \xAC\x78 |3 # CLOCKWISE OPEN CIRCLE ARROW, curved, white
+<U21BB><UF87B> \xA8\x7D |3 # CLOCKWISE OPEN CIRCLE ARROW, alternate: leftwards arrow with tip upwards
<U21BB><UF87C> \xAC\x83 |3 # CLOCKWISE OPEN CIRCLE ARROW, curved
<U21BB><UF87F> \xA8\x88 |3 # CLOCKWISE OPEN CIRCLE ARROW, curved
<U21BC> \xA8\x92 |0 # LEFTWARDS HARPOON WITH BARB UPWARDS
-<U21BC><UF879> \xA8\x98 |3 # LEFTWARDS HARPOON WITH BARB UPWARDS + tag: large
-<U21BC><UF87F> \xAC\x4C |3 # LEFTWARDS HARPOON WITH BARB UPWARDS + tag: alt form
+<U21BC><UF879> \xA8\x99 |3 # LEFTWARDS HARPOON WITH BARB UPWARDS, large
+<U21BC><UF87F> \xAC\x4D |3 # LEFTWARDS HARPOON WITH BARB UPWARDS, alternate
<U21C0> \xA8\x93 |0 # RIGHTWARDS HARPOON WITH BARB UPWARDS
-<U21C0><UF879> \xA8\x99 |3 # RIGHTWARDS HARPOON WITH BARB UPWARDS + tag: large
-<U21C0><UF87F> \xAC\x4D |3 # RIGHTWARDS HARPOON WITH BARB UPWARDS + tag: alt form
+<U21C0><UF879> \xA8\x98 |3 # RIGHTWARDS HARPOON WITH BARB UPWARDS, large
+<U21C0><UF87F> \xAC\x4C |3 # RIGHTWARDS HARPOON WITH BARB UPWARDS, alternate
<U21C4> \xA8\x9E |0 # RIGHTWARDS ARROW OVER LEFTWARDS ARROW
<U21C5> \xA8\x9F |0 # UPWARDS ARROW LEFTWARDS OF DOWNWARDS ARROW
<U21CD> \xA8\x4B |0 # LEFTWARDS DOUBLE ARROW WITH STROKE
<U21CF> \xA8\x4A |0 # RIGHTWARDS DOUBLE ARROW WITH STROKE
<U21D0> \xA8\x49 |0 # LEFTWARDS DOUBLE ARROW
-<U21D0><UF878> \xAC\x45 |3 # LEFTWARDS DOUBLE ARROW, small
-<U21D0><UF87F> \xA8\x6F |3 # LEFTWARDS DOUBLE ARROW, alternate
-<U21D1> \xA8\x71 |0 # UPWARDS DOUBLE ARROW
+<U21D0><UF87C> \xA8\x9B |3 # LEFTWARDS DOUBLE ARROW, small white tapered
<U21D2> \xA2\xA1 |0 # RIGHTWARDS DOUBLE ARROW
-<U21D2><UF878> \xAC\x44 |3 # RIGHTWARDS DOUBLE ARROW, small
-<U21D2><UF87F> \xA8\x70 |3 # RIGHTWARDS DOUBLE ARROW, alternate
-<U21D3> \xA8\x72 |0 # DOWNWARDS DOUBLE ARROW
+<U21D2><UF87C> \xA8\x9A |3 # RIGHTWARDS DOUBLE ARROW, small white tapered
<U21D4> \xA2\xA2 |0 # LEFT RIGHT DOUBLE ARROW
+<U21D4><UF879> \xA8\x95 |3 # LEFT RIGHT DOUBLE ARROW, heavy
<U21D4><UF87F> \xA8\x4C |3 # LEFT RIGHT DOUBLE ARROW, duplicate of 0xA2A2
<U21E0> \xAC\x6A |0 # LEFTWARDS DASHED ARROW
<U21E1> \xAC\x6C |0 # UPWARDS DASHED ARROW
<U21E2> \xAC\x6B |0 # RIGHTWARDS DASHED ARROW
<U21E3> \xAC\x6D |0 # DOWNWARDS DASHED ARROW
<U21E6> \xAC\x72 |0 # LEFTWARDS WHITE ARROW
+<U21E6><U20DD> \xA8\x5B |3 # LEFTWARDS WHITE ARROW + COMBINING ENCLOSING CIRCLE
+<U21E6><U20DE> \xA8\x57 |3 # LEFTWARDS WHITE ARROW + COMBINING ENCLOSING SQUARE
+<U21E6><UF870> \xAC\x47 |3 # LEFTWARDS WHITE ARROW, negative: heavy black
+<U21E6><UF874> \xA8\x97 |3 # LEFTWARDS WHITE ARROW, heavy tapered
<U21E6><UF875> \xAC\x6E |3 # LEFTWARDS WHITE ARROW, heavy, negative
<U21E6><UF878> \xAD\xA6 |3 # LEFTWARDS WHITE ARROW, small
<U21E6><UF879> \xA8\x8E |3 # LEFTWARDS WHITE ARROW, large
+<U21E6><UF87A> \xA8\x73 |3 # LEFTWARDS WHITE ARROW, negative: heavy black
+<U21E6><UF87B> \xA8\x77 |3 # LEFTWARDS WHITE ARROW, negative: medium black
<U21E6><UF87C> \xAC\x59 |3 # LEFTWARDS WHITE ARROW, triangle head, white
+<U21E6><UF87F> \xAC\x4F |3 # LEFTWARDS WHITE ARROW, negative: black, demarcated head
<U21E7> \xAC\x74 |0 # UPWARDS WHITE ARROW
+<U21E7><U20DD> \xA8\x5D |3 # UPWARDS WHITE ARROW + COMBINING ENCLOSING CIRCLE
+<U21E7><U20DE> \xA8\x59 |3 # UPWARDS WHITE ARROW + COMBINING ENCLOSING SQUARE
<U21E7><UF875> \xAC\x70 |3 # UPWARDS WHITE ARROW, heavy, negative
<U21E7><UF878> \xAD\xA7 |3 # UPWARDS WHITE ARROW, small
<U21E7><UF879> \xA8\x90 |3 # UPWARDS WHITE ARROW, large
+<U21E7><UF87A> \xA8\x75 |3 # UPWARDS WHITE ARROW, negative: heavy black
+<U21E7><UF87B> \xA8\x79 |3 # UPWARDS WHITE ARROW, negative: medium black
<U21E7><UF87C> \xAC\x5B |3 # UPWARDS WHITE ARROW, triangle head, white
+<U21E7><UF87F> \xAD\xAF |3 # UPWARDS WHITE ARROW, alternate
<U21E8> \xAC\x73 |0 # RIGHTWARDS WHITE ARROW
+<U21E8><U20DE> \xA8\x58 |3 # RIGHTWARDS WHITE ARROW + COMBINING ENCLOSING SQUARE
+<U21E8><UF870> \xAC\x46 |3 # RIGHTWARDS WHITE ARROW, negative: heavy black
+<U21E8><UF874> \xA8\x96 |3 # RIGHTWARDS WHITE ARROW, heavy tapered
<U21E8><UF875> \xAC\x6F |3 # RIGHTWARDS WHITE ARROW, heavy, negative
<U21E8><UF878> \xAD\xA5 |3 # RIGHTWARDS WHITE ARROW, small
<U21E8><UF879> \xA8\x8F |3 # RIGHTWARDS WHITE ARROW, large
<U21E8><UF87C> \xAC\x5A |3 # RIGHTWARDS WHITE ARROW, triangle head, white
+<U21E8><UF87F> \xAC\x4E |3 # RIGHTWARDS WHITE ARROW, negative: black, demarcated head
<U21E9> \xAC\x75 |0 # DOWNWARDS WHITE ARROW
+<U21E9><U20DD> \xA8\x5E |3 # DOWNWARDS WHITE ARROW + COMBINING ENCLOSING CIRCLE
+<U21E9><U20DE> \xA8\x5A |3 # DOWNWARDS WHITE ARROW + COMBINING ENCLOSING SQUARE
<U21E9><UF875> \xAC\x71 |3 # DOWNWARDS WHITE ARROW, heavy, negative
<U21E9><UF878> \xAD\xA8 |3 # DOWNWARDS WHITE ARROW, small
<U21E9><UF879> \xA8\x91 |3 # DOWNWARDS WHITE ARROW, large
+<U21E9><UF87A> \xA8\x76 |3 # DOWNWARDS WHITE ARROW, negative: heavy black
+<U21E9><UF87B> \xA8\x7A |3 # DOWNWARDS WHITE ARROW, negative: medium black
<U21E9><UF87C> \xAC\x5C |3 # DOWNWARDS WHITE ARROW, triangle head, white
+<U21F0> \xAC\x41 |0 # RIGHTWARDS WHITE ARROW FROM WALL # for Unicode 3.0 and later
<U2200> \xA2\xA3 |0 # FOR ALL
<U2202> \xA1\xD3 |0 # PARTIAL DIFFERENTIAL
<U2203> \xA2\xA4 |0 # THERE EXISTS
@@ -647,6 +659,7 @@ CHARMAP
<U2222> \xA7\x68 |0 # SPHERICAL ANGLE
<U2222><UF87F> \xA4\x98 |3 # SPHERICAL ANGLE, alternate
<U2225> \xA7\x55 |0 # PARALLEL TO
+<U2225><U0347> \xA4\x9E |3 # PARALLEL TO+COMBINING EQUALS SIGN BELOW # for Unicode 3.0 and later
<U2226> \xA7\x56 |0 # NOT PARALLEL TO
<U2227> \xA1\xFC |0 # LOGICAL AND
<U2228> \xA1\xFD |0 # LOGICAL OR
@@ -663,6 +676,7 @@ CHARMAP
<U2235> \xA1\xF1 |0 # BECAUSE
<U2237> \xA2\xFE |0 # PROPORTION
<U223D> \xA1\xEF |0 # REVERSED TILDE
+<U223D><U0336> \xA7\x79 |3 # REVERSED TILDE+COMBINING LONG STROKE OVERLAY
<U223D><UF877> \xA1\x75 |3 # REVERSED TILDE, superscript
<U2243> \xA4\x9A |0 # ASYMPTOTICALLY EQUAL TO
<U2245> \xA4\x99 |0 # APPROXIMATELY EQUAL TO
@@ -675,6 +689,8 @@ CHARMAP
<U2260> \xA1\xC1 |0 # NOT EQUAL TO
<U2260><UF877> \xA1\x7B |3 # NOT EQUAL TO, superscript
<U2261> \xA1\xD5 |0 # IDENTICAL TO
+<U2261><U20D2> \xA7\x6E |3 # IDENTICAL TO+COMBINING LONG VERTICAL LINE OVERLAY
+<U2261><U20E5> \xA7\x63 |3 # IDENTICAL TO+COMBINING REVERSE SOLIDUS OVERLAY # for Unicode 3.2 and later
<U2262> \xA7\x64 |0 # NOT IDENTICAL TO
<U2264> \xA1\xC2 |0 # LESS-THAN OR EQUAL TO
<U2265> \xA1\xC3 |0 # GREATER-THAN OR EQUAL TO
@@ -701,11 +717,7 @@ CHARMAP
<U2284> \xA7\x72 |0 # NOT A SUBSET OF
<U2285> \xA7\x71 |0 # NOT A SUPERSET OF
<U2286> \xA1\xF6 |0 # SUBSET OF OR EQUAL TO
-<U2286><UF87F> \xA4\x8B |3 # SUBSET OF OR EQUAL TO, alternate
<U2287> \xA1\xF7 |0 # SUPERSET OF OR EQUAL TO
-<U2287><UF87F> \xA4\x8D |3 # SUPERSET OF OR EQUAL TO, alternate
-<U228A> \xA4\x8C |0 # SUBSET OF WITH NOT EQUAL TO
-<U228B> \xA4\x8E |0 # SUPERSET OF WITH NOT EQUAL TO
<U2295> \xA7\x5D |0 # CIRCLED PLUS
<U2296> \xA7\x5E |0 # CIRCLED MINUS
<U2297> \xA7\x5F |0 # CIRCLED TIMES
@@ -724,6 +736,7 @@ CHARMAP
<U2312> \xA1\xD2 |0 # ARC
<U2314> \xA7\x61 |0 # SECTOR
<U2314><UF87F> \xA7\x7A |3 # SECTOR, alternate
+<U2394><UF876> \xA7\x48 |3 # SOFTWARE-FUNCTION SYMBOL, rotated (small hexagon) # for Unicode 3.0 and later
<U2460> \xA8\xE7 |0 # CIRCLED DIGIT ONE
<U2460><UF87F> \xA5\x4C |3 # CIRCLED DIGIT ONE, serif, bold
<U2461> \xA8\xE8 |0 # CIRCLED DIGIT TWO
@@ -744,45 +757,15 @@ CHARMAP
<U2468><UF87F> \xA5\x54 |3 # CIRCLED DIGIT NINE, serif, bold
<U2469> \xA8\xF0 |0 # CIRCLED NUMBER TEN
<U246A> \xA8\xF1 |0 # CIRCLED NUMBER ELEVEN
-<U246A><UF875> \xA3\x5F |3 # dingbat negative circled sans number eleven
-<U246A><UF87A> \xA6\xEF |3 # CIRCLED NUMBER ELEVEN, negative
-<U246A><UF87F> \xA4\x73 |3 # CIRCLED NUMBER ELEVEN, negative, sans, light
<U246B> \xA8\xF2 |0 # CIRCLED NUMBER TWELVE
-<U246B><UF875> \xA3\x60 |3 # dingbat negative circled sans number twelve
-<U246B><UF87A> \xA6\xF0 |3 # CIRCLED NUMBER TWELVE, negative
-<U246B><UF87F> \xA4\x74 |3 # CIRCLED NUMBER TWELVE negative, sans, light
<U246C> \xA8\xF3 |0 # CIRCLED NUMBER THIRTEEN
-<U246C><UF875> \xA3\x61 |3 # dingbat negative circled sans number thirteen
-<U246C><UF87A> \xA6\xF1 |3 # CIRCLED NUMBER THIRTEEN, negative
-<U246C><UF87F> \xA4\x75 |3 # CIRCLED NUMBER THIRTEEN negative, sans, light
<U246D> \xA8\xF4 |0 # CIRCLED NUMBER FOURTEEN
-<U246D><UF875> \xA3\x62 |3 # dingbat negative circled sans number fourteen
-<U246D><UF87A> \xA6\xF2 |3 # CIRCLED NUMBER FOURTEEN, negative
-<U246D><UF87F> \xA4\x76 |3 # CIRCLED NUMBER FOURTEEN negative, sans, light
<U246E> \xA8\xF5 |0 # CIRCLED NUMBER FIFTEEN
-<U246E><UF875> \xA3\x63 |3 # dingbat negative circled sans number fifteen
-<U246E><UF87A> \xA6\xF3 |3 # CIRCLED NUMBER FIFTEEN, negative
-<U246E><UF87F> \xA4\x77 |3 # CIRCLED NUMBER FIFTEEN negative, sans, light
<U246F> \xA7\xF0 |0 # CIRCLED NUMBER SIXTEEN
-<U246F><UF875> \xA3\x64 |3 # dingbat negative circled sans number sixteen
-<U246F><UF87A> \xA6\xF4 |3 # CIRCLED NUMBER SIXTEEN, negative
-<U246F><UF87F> \xA4\x78 |3 # CIRCLED NUMBER SIXTEEN negative, sans, light
<U2470> \xA7\xF1 |0 # CIRCLED NUMBER SEVENTEEN
-<U2470><UF875> \xA3\x65 |3 # dingbat negative circled sans number seventeen
-<U2470><UF87A> \xA6\xF5 |3 # CIRCLED NUMBER SEVENTEEN, negative
-<U2470><UF87F> \xA4\x79 |3 # CIRCLED NUMBER SEVENTEEN negative, sans, light
<U2471> \xA7\xF2 |0 # CIRCLED NUMBER EIGHTEEN
-<U2471><UF875> \xA3\x66 |3 # dingbat negative circled sans number eighteen
-<U2471><UF87A> \xA6\xF6 |3 # CIRCLED NUMBER EIGHTEEN, negative
-<U2471><UF87F> \xA4\x7A |3 # CIRCLED NUMBER EIGHTEEN negative, sans, light
<U2472> \xA7\xF3 |0 # CIRCLED NUMBER NINETEEN
-<U2472><UF875> \xA3\x67 |3 # dingbat negative circled sans number nineteen
-<U2472><UF87A> \xA6\xF7 |3 # CIRCLED NUMBER NINETEEN, negative
-<U2472><UF87F> \xA4\x7B |3 # CIRCLED NUMBER NINETEEN negative, sans, light
<U2473> \xA7\xF4 |0 # CIRCLED NUMBER TWENTY
-<U2473><UF875> \xA3\x68 |3 # dingbat negative circled sans number twenty
-<U2473><UF87A> \xA6\xF8 |3 # CIRCLED NUMBER TWENTY, negative
-<U2473><UF87F> \xA4\x7C |3 # CIRCLED NUMBER TWENTY negative, sans, light
<U2474> \xA9\xE7 |0 # PARENTHESIZED DIGIT ONE
<U2475> \xA9\xE8 |0 # PARENTHESIZED DIGIT TWO
<U2476> \xA9\xE9 |0 # PARENTHESIZED DIGIT THREE
@@ -882,6 +865,36 @@ CHARMAP
<U24E8> \xA8\xE5 |0 # CIRCLED LATIN SMALL LETTER Y
<U24E9> \xA8\xE6 |0 # CIRCLED LATIN SMALL LETTER Z
<U24EA><UF87F> \xA5\x4B |3 # CIRCLED DIGIT ZERO, serif, bold
+<U24EB> \xA6\xEF |0 # NEGATIVE CIRCLED NUMBER ELEVEN # for Unicode 3.2 and later
+<U24EB><UF878> \xA4\x73 |3 # NEGATIVE CIRCLED NUMBER ELEVEN, sans, light # for Unicode 3.2 and later
+<U24EB><UF87F> \xA3\x5F |3 # NEGATIVE CIRCLED NUMBER ELEVEN, sans serif # for Unicode 3.2 and later
+<U24EC> \xA6\xF0 |0 # NEGATIVE CIRCLED NUMBER TWELVE # for Unicode 3.2 and later
+<U24EC><UF878> \xA4\x74 |3 # NEGATIVE CIRCLED NUMBER TWELVE, sans, light # for Unicode 3.2 and later
+<U24EC><UF87F> \xA3\x60 |3 # NEGATIVE CIRCLED NUMBER TWELVE, sans serif # for Unicode 3.2 and later
+<U24ED> \xA6\xF1 |0 # NEGATIVE CIRCLED NUMBER THIRTEEN # for Unicode 3.2 and later
+<U24ED><UF878> \xA4\x75 |3 # NEGATIVE CIRCLED NUMBER THIRTEEN, sans, light # for Unicode 3.2 and later
+<U24ED><UF87F> \xA3\x61 |3 # NEGATIVE CIRCLED NUMBER THIRTEEN, sans serif # for Unicode 3.2 and later
+<U24EE> \xA6\xF2 |0 # NEGATIVE CIRCLED NUMBER FOURTEEN # for Unicode 3.2 and later
+<U24EE><UF878> \xA4\x76 |3 # NEGATIVE CIRCLED NUMBER FOURTEEN, sans, light # for Unicode 3.2 and later
+<U24EE><UF87F> \xA3\x62 |3 # NEGATIVE CIRCLED NUMBER FOURTEEN, sans serif # for Unicode 3.2 and later
+<U24EF> \xA6\xF3 |0 # NEGATIVE CIRCLED NUMBER FIFTEEN # for Unicode 3.2 and later
+<U24EF><UF878> \xA4\x77 |3 # NEGATIVE CIRCLED NUMBER FIFTEEN, sans, light # for Unicode 3.2 and later
+<U24EF><UF87F> \xA3\x63 |3 # NEGATIVE CIRCLED NUMBER FIFTEEN, sans serif # for Unicode 3.2 and later
+<U24F0> \xA6\xF4 |0 # NEGATIVE CIRCLED NUMBER SIXTEEN # for Unicode 3.2 and later
+<U24F0><UF878> \xA4\x78 |3 # NEGATIVE CIRCLED NUMBER SIXTEEN, sans, light # for Unicode 3.2 and later
+<U24F0><UF87F> \xA3\x64 |3 # NEGATIVE CIRCLED NUMBER SIXTEEN, sans serif # for Unicode 3.2 and later
+<U24F1> \xA6\xF5 |0 # NEGATIVE CIRCLED NUMBER SEVENTEEN # for Unicode 3.2 and later
+<U24F1><UF878> \xA4\x79 |3 # NEGATIVE CIRCLED NUMBER SEVENTEEN, sans, light # for Unicode 3.2 and later
+<U24F1><UF87F> \xA3\x65 |3 # NEGATIVE CIRCLED NUMBER SEVENTEEN, sans serif # for Unicode 3.2 and later
+<U24F2> \xA6\xF6 |0 # NEGATIVE CIRCLED NUMBER EIGHTEEN # for Unicode 3.2 and later
+<U24F2><UF878> \xA4\x7A |3 # NEGATIVE CIRCLED NUMBER EIGHTEEN, sans, light # for Unicode 3.2 and later
+<U24F2><UF87F> \xA3\x66 |3 # NEGATIVE CIRCLED NUMBER EIGHTEEN, sans serif # for Unicode 3.2 and later
+<U24F3> \xA6\xF7 |0 # NEGATIVE CIRCLED NUMBER NINETEEN # for Unicode 3.2 and later
+<U24F3><UF878> \xA4\x7B |3 # NEGATIVE CIRCLED NUMBER NINETEEN, sans, light # for Unicode 3.2 and later
+<U24F3><UF87F> \xA3\x67 |3 # NEGATIVE CIRCLED NUMBER NINETEEN, sans serif # for Unicode 3.2 and later
+<U24F4> \xA6\xF8 |0 # NEGATIVE CIRCLED NUMBER TWENTY # for Unicode 3.2 and later
+<U24F4><UF878> \xA4\x7C |3 # NEGATIVE CIRCLED NUMBER TWENTY, sans, light # for Unicode 3.2 and later
+<U24F4><UF87F> \xA3\x68 |3 # NEGATIVE CIRCLED NUMBER TWENTY, sans serif # for Unicode 3.2 and later
<U2500> \xA6\xA1 |0 # BOX DRAWINGS LIGHT HORIZONTAL
<U2501> \xA6\xAC |0 # BOX DRAWINGS HEAVY HORIZONTAL
<U2502> \xA6\xA2 |0 # BOX DRAWINGS LIGHT VERTICAL
@@ -950,28 +963,33 @@ CHARMAP
<U2549> \xA6\xE3 |0 # BOX DRAWINGS RIGHT LIGHT AND LEFT VERTICAL HEAVY
<U254A> \xA6\xE4 |0 # BOX DRAWINGS LEFT LIGHT AND RIGHT VERTICAL HEAVY
<U254B> \xA6\xB6 |0 # BOX DRAWINGS HEAVY VERTICAL AND HORIZONTAL
+<U2588> \xA7\x8F |0 # FULL BLOCK
<U2592> \xA2\xC6 |0 # MEDIUM SHADE
<U25A0> \xA1\xE1 |0 # BLACK SQUARE
<U25A0><U20DF> \xA6\x56 |3 # BLACK SQUARE + COMBINING ENCLOSING DIAMOND
-<U25A0><UF879> \xA7\x8F |3 # BLACK SQUARE, large
<U25A1> \xA1\xE0 |0 # WHITE SQUARE
-<U25A1><U20DE> \xA6\x64 |3 # WHITE SQUARE + COMBINING ENCLOSING SQUARE
<U25A1><U20DF> \xA6\x59 |3 # WHITE SQUARE + COMBINING ENCLOSING DIAMOND
<U25A1><UF879> \xA7\x8D |3 # WHITE SQUARE, large
<U25A1><UF87B> \xA7\x8E |3 # WHITE SQUARE, large, bold
<U25A1><UF87C> \xA7\x8C |3 # WHITE SQUARE, bold
+<U25A2> \xA6\x78 |0 # WHITE SQUARE WITH ROUNDED CORNERS
<U25A3> \xA2\xC3 |0 # WHITE SQUARE CONTAINING BLACK SMALL SQUARE
<U25A4> \xA2\xC7 |0 # SQUARE WITH HORIZONTAL FILL
<U25A5> \xA2\xC8 |0 # SQUARE WITH VERTICAL FILL
<U25A6> \xA2\xCB |0 # SQUARE WITH ORTHOGONAL CROSSHATCH FILL
<U25A7> \xA2\xCA |0 # SQUARE WITH UPPER LEFT TO LOWER RIGHT FILL
<U25A8> \xA2\xC9 |0 # SQUARE WITH UPPER RIGHT TO LOWER LEFT FILL
+<U25A8><UF87F> \xA6\x8A |3 # SQUARE WITH UPPER RIGHT TO LOWER LEFT FILL, alternate
<U25A9> \xA2\xCC |0 # SQUARE WITH DIAGONAL CROSSHATCH FILL
+<U25AD> \xA7\x4A |0 # WHITE RECTANGLE
+<U25AD><UF878> \xA7\x49 |3 # WHITE RECTANGLE, small
<U25B1> \xA7\x66 |0 # WHITE PARALLELOGRAM
<U25B2> \xA1\xE3 |0 # BLACK UP-POINTING TRIANGLE
<U25B2><U20DD> \xA6\x6B |3 # BLACK UP-POINTING TRIANGLE + COMBINING ENCLOSING CIRCLE
<U25B3> \xA1\xE2 |0 # WHITE UP-POINTING TRIANGLE
<U25B3><U20DD> \xA6\x6A |3 # WHITE UP-POINTING TRIANGLE + COMBINING ENCLOSING CIRCLE
+<U25B3><UF87F> \xA7\x45 |3 # WHITE UP-POINTING TRIANGLE, small
+<U25B4><U20E4> \xA7\x9B |3 # BLACK UP-POINTING SMALL TRIANGLE + COMBINING ENCLOSING UPWARD POINTING TRIANGLE # for Unicode 3.2 and later
<U25B5> \xA7\x95 |0 # WHITE UP-POINTING SMALL TRIANGLE
<U25B6> \xA2\xBA |0 # BLACK RIGHT-POINTING TRIANGLE
<U25B7> \xA2\xB9 |0 # WHITE RIGHT-POINTING TRIANGLE
@@ -988,28 +1006,40 @@ CHARMAP
<U25C7> \xA1\xDE |0 # WHITE DIAMOND
<U25C7><U20DE> \xA6\x62 |3 # WHITE DIAMOND + COMBINING ENCLOSING SQUARE
<U25C7><U20DF> \xA6\x57 |3 # WHITE DIAMOND + COMBINING ENCLOSING DIAMOND
+<U25C7><U20DF><U20DF> \xA6\x61 |3 # WHITE DIAMOND + COMBINING ENCLOSING DIAMOND + COMBINING ENCLOSING DIAMOND
<U25C7><UF879> \xA7\x89 |3 # WHITE DIAMOND, large
<U25C7><UF87B> \xA7\x8A |3 # WHITE DIAMOND, large, bold
<U25C7><UF87C> \xA7\x88 |3 # WHITE DIAMOND, bold
+<U25C7><UF87F> \xA7\x4E |3 # WHITE DIAMOND, flattened
<U25C8> \xA2\xC2 |0 # WHITE DIAMOND CONTAINING BLACK SMALL DIAMOND
+<U25C8><UF87F> \xA6\x89 |3 # WHITE DIAMOND CONTAINING BLACK SMALL DIAMOND, alternate
<U25C9> \xA2\xC1 |0 # FISHEYE
+<U25C9><U20DD> \xA6\x82 |3 # FISHEYE + COMBINING ENCLOSING CIRCLE
<U25CA> \xA7\x9C |0 # LOZENGE
<U25CB> \xA1\xDB |0 # WHITE CIRCLE
<U25CB><UF879> \xA7\x91 |3 # WHITE CIRCLE, large
<U25CB><UF87B> \xA7\x92 |3 # WHITE CIRCLE, large, bold
+<U25CB><UF87F> \xA7\x44 |3 # WHITE CIRCLE, small
<U25CC> \xA6\x75 |0 # DOTTED CIRCLE
+<U25CD> \xA6\x84 |0 # CIRCLE WITH VERTICAL FILL
<U25CE> \xA1\xDD |0 # BULLSEYE
-<U25CE><UF87F> \xA6\x68 |3 # BULLSEYE, alternate
+<U25CE><U20DD> \xA6\x69 |3 # BULLSEYE + COMBINING ENCLOSING CIRCLE
<U25CF> \xA1\xDC |0 # BLACK CIRCLE
<U25CF><UF879> \xA7\x93 |3 # BLACK CIRCLE, large
<U25D0> \xA2\xC4 |0 # CIRCLE WITH LEFT HALF BLACK
<U25D1> \xA2\xC5 |0 # CIRCLE WITH RIGHT HALF BLACK
<U25E6> \xA7\x90 |0 # WHITE BULLET
<U25EF> \xA6\x6F |0 # LARGE CIRCLE
+<U25EF><UF87C> \xA6\x70 |3 # LARGE CIRCLE, bold
+<U25FB> \xA7\x46 |0 # WHITE MEDIUM SQUARE # for Unicode 3.2 and later
+<U25FC> \xA7\x9A |0 # BLACK MEDIUM SQUARE # for Unicode 3.2 and later
<U2605> \xA1\xDA |0 # BLACK STAR
<U2606> \xA1\xD9 |0 # WHITE STAR
<U260E> \xA2\xCF |0 # BLACK TELEPHONE
<U260F> \xA2\xCE |0 # WHITE TELEPHONE
+<U2610> \xA6\x77 |0 # BALLOT BOX (large white square)
+<U2610><UF87C> \xA6\x71 |3 # BALLOT BOX, bold (large bold white square)
+<U2610><UF87F> \xA6\x76 |3 # BALLOT BOX, dotted
<U261C> \xA2\xD0 |0 # WHITE LEFT POINTING INDEX
<U261C><UF87F> \xA6\x5E |3 # WHITE LEFT POINTING INDEX, alternate
<U261D> \xAC\x8D |0 # WHITE UP POINTING INDEX
@@ -1029,6 +1059,7 @@ CHARMAP
<U2663> \xA2\xC0 |0 # BLACK CLUB SUIT
<U2664> \xA2\xBB |0 # WHITE SPADE SUIT
<U2665> \xA2\xBE |0 # BLACK HEART SUIT
+<U2666> \xA7\x98 |0 # BLACK DIAMOND SUIT
<U2667> \xA2\xBF |0 # WHITE CLUB SUIT
<U2668> \xA2\xCD |0 # HOT SPRINGS
<U2669> \xA2\xDB |0 # QUARTER NOTE
@@ -1038,9 +1069,22 @@ CHARMAP
<U266F> \xA6\x48 |0 # MUSIC SHARP SIGN
<U2716> \xA6\x6D |0 # HEAVY MULTIPLICATION X
<U271A> \xA6\x6C |0 # HEAVY GREEK CROSS
+<U2720> \xA6\x88 |0 # MALTESE CROSS
+<U2720><UF87A> \xA6\x87 |3 # MALTESE CROSS, white
+<U2723> \xA6\x72 |0 # FOUR BALLOON-SPOKED ASTERISK with balloon at center
+<U2723><UF87A> \xA6\x79 |3 # FOUR BALLOON-SPOKED ASTERISK with balloon at center, white
+<U2731> \xA6\x53 |0 # HEAVY ASTERISK (large 6-spokes line asterisk dingbat)
<U273D> \xA6\x52 |0 # HEAVY TEARDROP-SPOKED ASTERISK
+<U273F> \xA6\x7C |0 # BLACK FLORETTE
+<U273F><UF87A> \xA6\x7B |3 # BLACK FLORETTE, negative (white)
+<U2740> \xA6\x99 |0 # WHITE FLORETTE
+<U2741> \xA6\x8D |0 # EIGHT PETALLED OUTLINED BLACK FLORETTE
+<U2747> \xA6\x54 |0 # SPARKLE (small square 8-spoke line asterisk dingbat)
+<U2748><U20D8> \xA6\x9B |3 # HEAVY SPARKLE + COMBINING RING OVERLAY
+<U274D> \xA6\x83 |0 # SHADOWED WHITE CIRCLE
<U2756> \xA6\x73 |0 # BLACK DIAMOND MINUS WHITE X
<U2756><UF87A> \xA6\x7A |3 # BLACK DIAMOND MINUS WHITE X, negative
+<U2756><UF87F> \xA6\x8E |3 # BLACK DIAMOND MINUS WHITE X, alternate
<U2776> \xA6\xE5 |0 # DINGBAT NEGATIVE CIRCLED DIGIT ONE
<U2777> \xA6\xE6 |0 # DINGBAT NEGATIVE CIRCLED DIGIT TWO
<U2778> \xA6\xE7 |0 # DINGBAT NEGATIVE CIRCLED DIGIT THREE
@@ -1071,6 +1115,73 @@ CHARMAP
<U2792><UF87F> \xA4\x71 |3 # DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE, light
<U2793> \xA3\x5E |0 # DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN
<U2793><UF87F> \xA4\x72 |3 # DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN, light
+<U2794> \xAC\x5E |0 # HEAVY WIDE-HEADED RIGHTWARDS ARROW: small bold
+<U279B> \xA8\x6A |0 # DRAFTING POINT RIGHTWARDS ARROW (umbrella)
+<U279C> \xA8\x60 |0 # HEAVY ROUND-TIPPED RIGHTWARDS ARROW
+<U279E> \xA8\x78 |0 # HEAVY TRIANGLE-HEADED RIGHTWARDS ARROW: medium black
+<U27A1> \xA8\x74 |0 # BLACK RIGHTWARDS ARROW: heavy black
+<U27A4> \xAC\x48 |0 # BLACK RIGHTWARDS ARROWHEAD
+<U27B2> \xA8\x5C |0 # CIRCLED HEAVY WHITE RIGHTWARDS ARROW
+<U27B5> \xAC\x43 |0 # BLACK-FEATHERED RIGHTWARDS ARROW
+<U27E1><U20DD> \xA6\x85 |3 # WHITE CONCAVE-SIDED DIAMOND (like star) + COMBINING ENCLOSING CIRCLE # for Unicode 3.2 and later
+<U2934> \xA8\x8D |0 # ARROW POINTING RIGHTWARDS THEN CURVING UPWARDS # for Unicode 3.2 and later
+<U2934><UF87A> \xAC\x7D |3 # ARROW POINTING RIGHTWARDS THEN CURVING UPWARDS, negative # for Unicode 3.2 and later
+<U2934><UF87C> \xAC\x88 |3 # ARROW POINTING RIGHTWARDS THEN CURVING UPWARDS, triangle head # for Unicode 3.2 and later
+<U2934><UF87F> \xA8\x85 |3 # ARROW POINTING RIGHTWARDS THEN CURVING UPWARDS, alternate: rightwards arrow with tip upwards # for Unicode 3.2 and later
+<U2935> \xA8\x89 |0 # ARROW POINTING RIGHTWARDS THEN CURVING DOWNWARDS # for Unicode 3.2 and later
+<U2935><UF87A> \xAC\x79 |3 # ARROW POINTING RIGHTWARDS THEN CURVING DOWNWARDS, negative # for Unicode 3.2 and later
+<U2935><UF87C> \xAC\x84 |3 # ARROW POINTING RIGHTWARDS THEN CURVING DOWNWARDS, triangle head # for Unicode 3.2 and later
+<U2936> \xA8\x86 |0 # ARROW POINTING DOWNWARDS THEN CURVING LEFTWARDS # for Unicode 3.2 and later
+<U2936><UF87A> \xAC\x76 |3 # ARROW POINTING DOWNWARDS THEN CURVING LEFTWARDS, negative # for Unicode 3.2 and later
+<U2936><UF87C> \xAC\x81 |3 # ARROW POINTING DOWNWARDS THEN CURVING LEFTWARDS, triangle head # for Unicode 3.2 and later
+<U2937> \xA8\x8B |0 # ARROW POINTING DOWNWARDS THEN CURVING RIGHTWARDS # for Unicode 3.2 and later
+<U2937><UF87A> \xAC\x7B |3 # ARROW POINTING DOWNWARDS THEN CURVING RIGHTWARDS, negative # for Unicode 3.2 and later
+<U2937><UF87C> \xAC\x86 |3 # ARROW POINTING DOWNWARDS THEN CURVING RIGHTWARDS, triangle head # for Unicode 3.2 and later
+<U2939> \xA8\x8C |0 # LEFT-SIDE ARC ANTICLOCKWISE ARROW # for Unicode 3.2 and later
+<U2939><UF87A> \xAC\x7C |3 # LEFT-SIDE ARC ANTICLOCKWISE ARROW, negative # for Unicode 3.2 and later
+<U2939><UF87C> \xAC\x87 |3 # LEFT-SIDE ARC ANTICLOCKWISE ARROW, triangle head # for Unicode 3.2 and later
+<U2939><UF87F> \xA8\x84 |3 # LEFT-SIDE ARC ANTICLOCKWISE ARROW, alternate: leftwards arrow with tip downwards # for Unicode 3.2 and later
+<U2962> \xA8\x6F |0 # LEFTWARDS HARPOON WITH BARB UP ABOVE LEFTWARDS HARPOON WITH BARB DOWN # for Unicode 3.2 and later
+<U2962><UF87F> \xAC\x45 |3 # LEFTWARDS HARPOON WITH BARB UP ABOVE LEFTWARDS HARPOON WITH BARB DOWN, alternate # for Unicode 3.2 and later
+<U2963> \xA8\x71 |0 # UPWARDS HARPOON WITH BARB LEFT BESIDE UPWARDS HARPOON WITH BARB RIGHT # for Unicode 3.2 and later
+<U2964> \xA8\x70 |0 # RIGHTWARDS HARPOON WITH BARB UP ABOVE RIGHTWARDS HARPOON WITH BARB DOWN # for Unicode 3.2 and later
+<U2964><UF87F> \xAC\x44 |3 # RIGHTWARDS HARPOON WITH BARB UP ABOVE RIGHTWARDS HARPOON WITH BARB DOWN, alternate # for Unicode 3.2 and later
+<U2965> \xA8\x72 |0 # DOWNWARDS HARPOON WITH BARB LEFT BESIDE DOWNWARDS HARPOON WITH BARB RIGHT # for Unicode 3.2 and later
+<U2981> \xA7\x99 |0 # Z NOTATION SPOT (small black circle) # for Unicode 3.2 and later
+<U2985> \xA1\x59 |0 # LEFT WHITE PARENTHESIS # for Unicode 3.2 or later
+<U2985><UF873> \xA2\x43 |3 # LEFT WHITE PARENTHESIS (double), small, bold # for Unicode 3.2 and later
+<U2985><UF878> \xA2\x41 |3 # LEFT WHITE PARENTHESIS (double), small # for Unicode 3.2 and later
+<U2985><UF879> \xA1\x53 |3 # LEFT WHITE PARENTHESIS (double), large # for Unicode 3.2 or later
+<U2985><UF87B> \xA1\x65 |3 # LEFT WHITE PARENTHESIS, bold, wide # for Unicode 3.2 or later
+<U2985><UF87C> \xA1\x55 |3 # LEFT WHITE PARENTHESIS, bold # for Unicode 3.2 or later
+<U2985><UF87F> \xA1\x51 |3 # LEFT WHITE PARENTHESIS (double), alternate # for Unicode 3.2 or later
+<U2986> \xA1\x5A |0 # RIGHT WHITE PARENTHESIS # for Unicode 3.2 or later
+<U2986><UF873> \xA2\x44 |3 # RIGHT WHITE PARENTHESIS (double), small, bold # for Unicode 3.2 and later
+<U2986><UF878> \xA2\x42 |3 # RIGHT WHITE PARENTHESIS (double), small # for Unicode 3.2 and later
+<U2986><UF879> \xA1\x54 |3 # RIGHT WHITE PARENTHESIS (double), large # for Unicode 3.2 or later
+<U2986><UF87B> \xA1\x66 |3 # RIGHT WHITE PARENTHESIS, bold, wide # for Unicode 3.2 or later
+<U2986><UF87C> \xA1\x56 |3 # RIGHT WHITE PARENTHESIS, bold # for Unicode 3.2 or later
+<U2986><UF87F> \xA1\x52 |3 # RIGHT WHITE PARENTHESIS (double), alternate # for Unicode 3.2 or later
+<U2997> \xA1\x99 |0 # LEFT BLACK TORTOISE SHELL BRACKET # for Unicode 3.2 or later
+<U2998> \xA1\x9A |0 # RIGHT BLACK TORTOISE SHELL BRACKET # for Unicode 3.2 or later
+<U29A3> \xA4\x9C |0 # REVERSED ANGLE # for Unicode 3.2 and later
+<U29BE> \xA6\x68 |0 # CIRCLED WHITE BULLET # for Unicode 3.2 and later
+<U29BF> \xA6\x6E |0 # CIRCLED BULLET # for Unicode 3.2 and later
+<U29C8> \xA6\x64 |0 # SQUARED SQUARE # for Unicode 3.2 and later
+<U29C8><U20DE> \xA6\x67 |3 # SQUARED SQUARE + COMBINING ENCLOSING SQUARE # for Unicode 3.2 and later
+<U2A26> \xA4\x7D |0 # PLUS SIGN WITH TILDE BELOW # for Unicode 3.2 and later
+<U2A38> \xA7\x60 |0 # CIRCLED DIVISION SIGN # for Unicode 3.2 and later
+<U2A72> \xA7\x7B |0 # PLUS SIGN ABOVE EQUALS SIGN # for Unicode 3.2 and later
+<U2A8B> \xA4\x94 |0 # LESS-THAN ABOVE DOUBLE-LINE EQUAL ABOVE GREATER-THAN # for Unicode 3.2 and later
+<U2A8C> \xA4\x95 |0 # GREATER-THAN ABOVE DOUBLE-LINE EQUAL ABOVE LESS-THAN # for Unicode 3.2 and later
+<U2A91> \xA4\x96 |0 # LESS-THAN ABOVE GREATER-THAN ABOVE DOUBLE-LINE EQUAL # for Unicode 3.2 and later
+<U2A92> \xA4\x97 |0 # GREATER-THAN ABOVE LESS-THAN ABOVE DOUBLE-LINE EQUAL # for Unicode 3.2 and later
+<U2AC5> \xA4\x8B |0 # SUBSET OF ABOVE EQUALS SIGN # for Unicode 3.2 and later
+<U2AC6> \xA4\x8D |0 # SUPERSET OF ABOVE EQUALS SIGN # for Unicode 3.2 and later
+<U2ACB> \xA4\x8C |0 # SUBSET OF ABOVE NOT EQUAL TO # for Unicode 3.2 and later
+<U2ACC> \xA4\x8E |0 # SUPERSET OF ABOVE NOT EQUAL TO # for Unicode 3.2 and later
+<U2AE8> \xA7\x6B |0 # SHORT UP TACK WITH UNDERBAR # for Unicode 3.2 and later
+<U2AFD><U0347> \xA4\x9F |3 # DOUBLE SOLIDUS OPERATOR+COMBINING EQUALS SIGN BELOW # for Unicode 3.2 and later
<U3000> \xA1\xA1 |0 # IDEOGRAPHIC SPACE
<U3001> \xA1\xA2 |0 # IDEOGRAPHIC COMMA # KSC spec: "comma for vertical use"
<U3002> \xA1\xA3 |0 # IDEOGRAPHIC FULL STOP # KSC spec: "period for vertical use"
@@ -1082,7 +1193,7 @@ CHARMAP
<U3009> \xA1\xB5 |0 # RIGHT ANGLE BRACKET
<U3009><UF878> \xA1\x4C |3 # RIGHT ANGLE BRACKET, small
<U300A> \xA1\xB6 |0 # LEFT DOUBLE ANGLE BRACKET
-<U300A><UF878> \xA1\x49 |3 # LEFT DOUBLE ANGLE BRACKET , small
+<U300A><UF878> \xA1\x49 |3 # LEFT DOUBLE ANGLE BRACKET, small
<U300B> \xA1\xB7 |0 # RIGHT DOUBLE ANGLE BRACKET
<U300B><UF878> \xA1\x4A |3 # RIGHT DOUBLE ANGLE BRACKET, small
<U300C> \xA1\xB8 |0 # LEFT CORNER BRACKET
@@ -1109,10 +1220,9 @@ CHARMAP
<U3011><UF87F> \xA1\x5C |3 # RIGHT BLACK LENTICULAR BRACKET, duplicate of 0xA1BD
<U3012> \xA7\x42 |0 # POSTAL MARK
<U3013> \xA1\xEB |0 # GETA MARK # KSC spec: "bad character mark"
+<U3013><UF87C> \xA6\x7D |3 # GETA MARK, bold (compare 0xA1EB->)
<U3014> \xA1\xB2 |0 # LEFT TORTOISE SHELL BRACKET
-<U3014><UF87C> \xA1\x99 |3 # LEFT TORTOISE SHELL BRACKET, bold
<U3015> \xA1\xB3 |0 # RIGHT TORTOISE SHELL BRACKET
-<U3015><UF87C> \xA1\x9A |3 # RIGHT TORTOISE SHELL BRACKET, bold
<U3016> \xA1\x5D |0 # LEFT WHITE LENTICULAR BRACKET
<U3016><UF878> \xA2\x47 |3 # LEFT WHITE LENTICULAR BRACKET, small
<U3017> \xA1\x5E |0 # RIGHT WHITE LENTICULAR BRACKET
@@ -1418,6 +1528,26 @@ CHARMAP
<U321C> \xA2\xDF |0 # PARENTHESIZED HANGUL CIEUC U
<U3231> \xA7\x9D |0 # PARENTHESIZED IDEOGRAPH STOCK
<U3239> \xA7\x9E |0 # PARENTHESIZED IDEOGRAPH REPRESENT
+<U3251> \xA7\xF5 |0 # CIRCLED NUMBER TWENTY ONE # for Unicode 3.2 and later
+<U3251><UF87A> \xA6\xF9 |3 # CIRCLED NUMBER TWENTY ONE, negative # for Unicode 3.2 and later
+<U3252> \xA7\xF6 |0 # CIRCLED NUMBER TWENTY TWO # for Unicode 3.2 and later
+<U3252><UF87A> \xA6\xFA |3 # CIRCLED NUMBER TWENTY TWO, negative # for Unicode 3.2 and later
+<U3253> \xA7\xF7 |0 # CIRCLED NUMBER TWENTY THREE # for Unicode 3.2 and later
+<U3253><UF87A> \xA6\xFB |3 # CIRCLED NUMBER TWENTY THREE, negative # for Unicode 3.2 and later
+<U3254> \xA7\xF8 |0 # CIRCLED NUMBER TWENTY FOUR # for Unicode 3.2 and later
+<U3254><UF87A> \xA6\xFC |3 # CIRCLED NUMBER TWENTY FOUR, negative # for Unicode 3.2 and later
+<U3255> \xA7\xF9 |0 # CIRCLED NUMBER TWENTY FIVE # for Unicode 3.2 and later
+<U3255><UF87A> \xA6\xFD |3 # CIRCLED NUMBER TWENTY FIVE, negative # for Unicode 3.2 and later
+<U3256> \xA7\xFA |0 # CIRCLED NUMBER TWENTY SIX # for Unicode 3.2 and later
+<U3256><UF87A> \xA6\xFE |3 # CIRCLED NUMBER TWENTY SIX, negative # for Unicode 3.2 and later
+<U3257> \xA7\xFB |0 # CIRCLED NUMBER TWENTY SEVEN # for Unicode 3.2 and later
+<U3257><UF87A> \xA5\xF9 |3 # CIRCLED NUMBER TWENTY SEVEN, negative # for Unicode 3.2 and later
+<U3258> \xA7\xFC |0 # CIRCLED NUMBER TWENTY EIGHT # for Unicode 3.2 and later
+<U3258><UF87A> \xA5\xFA |3 # CIRCLED NUMBER TWENTY EIGHT, negative # for Unicode 3.2 and later
+<U3259> \xA7\xFD |0 # CIRCLED NUMBER TWENTY NINE # for Unicode 3.2 and later
+<U3259><UF87A> \xA5\xFB |3 # CIRCLED NUMBER TWENTY NINE, negative # for Unicode 3.2 and later
+<U325A> \xA7\xFE |0 # CIRCLED NUMBER THIRTY # for Unicode 3.2 and later
+<U325A><UF87A> \xA5\xFC |3 # CIRCLED NUMBER THIRTY, negative # for Unicode 3.2 and later
<U3260> \xA8\xB1 |0 # CIRCLED HANGUL KIYEOK
<U3261> \xA8\xB2 |0 # CIRCLED HANGUL NIEUN
<U3262> \xA8\xB3 |0 # CIRCLED HANGUL TIKEUT
@@ -1875,7 +2005,9 @@ CHARMAP
<U51F1> \xCB\xC2 |0 # <CJK>
<U51F6> \xFD\xD5 |0 # <CJK>
<U51F8> \xF4\xC8 |0 # <CJK>
+<U51F8><UF87F> \xA7\x50 |3 # <CJK> protrusion/convex, alternate
<U51F9> \xE8\xEA |0 # <CJK>
+<U51F9><UF87F> \xA7\x4F |3 # <CJK> depression/concave, alternate
<U51FA> \xF5\xF3 |0 # <CJK>
<U51FD> \xF9\xDE |0 # <CJK>
<U5200> \xD3\xEF |0 # <CJK>
@@ -5282,6 +5414,7 @@ CHARMAP
<U88D9> \xCF\xD9 |0 # <CJK>
<U88DC> \xDC\xCD |0 # <CJK>
<U88DC><U20DE> \xAA\x60 |3 # ideograph repair/restore + COMBINING ENCLOSING SQUARE
+<U88DC><U20E4> \xA7\x7C |3 # <CJK> "repair/restore" in enclosing triangle # for Unicode 3.2 and later
<U88DD> \xED\xFB |0 # <CJK>
<U88DF> \xDE\xF0 |0 # <CJK>
<U88E1> \xD7\xEB |0 # <CJK>
@@ -8743,10 +8876,37 @@ CHARMAP
<UD799> \xC8\xFC |0 # HANGUL SYLLABLE HIEUH I PIEUP
<UD79B> \xC8\xFD |0 # HANGUL SYLLABLE HIEUH I SIOS
<UD79D> \xC8\xFE |0 # HANGUL SYLLABLE HIEUH I IEUNG
-<UF860><U0021><U003F> \xA7\x85 |3 # EXCLAMATION MARK and QUESTION MARK
-<UF860><U0028><U0028> \xA1\x51 |3 # double left parenthesis
-<UF860><U0029><U0029> \xA1\x52 |3 # double right parenthesisS
-<UF860><U003F><U003F> \xA7\x87 |3 # double QUESTION MARK
+<UF805> \xA6\x58 |0 # black diamond minus white square # corporate char
+<UF805><U20DE> \xA6\x66 |3 # black diamond minus white square + COMBINING ENCLOSING SQUARE # corporate char
+<UF806> \xA6\x63 |0 # black square minus white diamond # corporate char
+<UF806><U20DF> \xA6\x60 |3 # black square minus white diamond + COMBINING ENCLOSING DIAMOND # corporate char
+<UF807> \xA6\x9F |0 # telephone dial # corporate char
+<UF808> \xA6\x8F |0 # five vertical lines # corporate char
+<UF809> \xA6\x81 |0 # one downward-pointing black triangle over two others # corporate char
+<UF809><UF87A> \xA6\x91 |3 # one downward-pointing black triangle over two others, negative # corporate char
+<UF80A> \xA6\x74 |0 # two interwoven eye shapes # corporate char
+<UF80B> \xA6\x96 |0 # narrow-leaf four-petal florette # corporate char
+<UF80B><UF87F> \xA6\x86 |3 # narrow-leaf four-petal florette, in front of black diamond # corporate char
+<UF80C> \xA6\x9A |0 # four interleaved fisheyes # corporate char
+<UF83D> \xA6\x42 |0 # fleur-de-lis # corporate char
+<UF83D><UF87F> \xA6\x41 |3 # fleur-de-lis, alternate # corporate char
+<UF840> \xA1\x6E |0 # three asterisks aligned vertically (dictionary definition importance mark) # corporate char
+<UF841> \xA8\x94 |0 # left right up down arrow # corporate char
+<UF842> \xAC\x54 |0 # downwards wave arrow # corporate char
+<UF843> \xAC\x42 |0 # leftwards white arrow from wall # corporate char
+<UF844> \xAC\x49 |0 # black leftwards arrowhead # corporate char
+<UF845> \xAC\x5F |0 # black-feathered leftwards arrow # corporate char
+<UF846> \xA8\x67 |0 # leftwards arrowhead with tail of spreading ripples # corporate char
+<UF847> \xA8\x68 |0 # rightwards arrowhead with tail of spreading ripples # corporate char
+<UF848> \xA8\x9D |0 # large white leftwards arrow with white fins # corporate char
+<UF849> \xA8\x9C |0 # large white rightwards arrow with white fins # corporate char
+<UF84A> \xAC\x4B |0 # leftwards arrow with bow # corporate char
+<UF84B> \xAC\x4A |0 # rightwards arrow with bow # corporate char
+<UF84C> \xA7\x47 |0 # small pentagon # corporate char
+<UF84D> \xA7\x4B |0 # trapezoid # corporate char
+<UF84E> \xA7\x4C |0 # quadrilateral with shorter right side # corporate char
+<UF84F> \xA7\x4D |0 # quadrilateral with shorter left side # corporate char
+<UF860><U002A><U002A> \xA6\x4C |3 # two asterisks aligned horizontally (annotation/comment mark)
<UF860><U0041><U0029> \xA5\x55 |3 # LATIN CAPITAL LETTER A with RIGHT PARENTHESIS
<UF860><U0041><U002E> \xA9\x41 |3 # LATIN CAPITAL LETTER A with FULL STOP
<UF860><U0042><U0029> \xA5\x56 |3 # LATIN CAPITAL LETTER B with RIGHT PARENTHESIS
@@ -8885,7 +9045,7 @@ CHARMAP
<UF862><U0028><U0032><U0033><U0029> \xAA\xFB |3 # parenthesized number twenty-three
<UF862><U0028><U0032><U0034><U0029> \xAA\xFC |3 # parenthesized number twenty-four
<UF862><U0028><U0032><U0035><U0029> \xAA\xFD |3 # parenthesized number twenty-five
-<UF862><U0028><U0032><U0036><U0029> \xAA\xFE |3 # parenthesized number twenty-siz
+<UF862><U0028><U0032><U0036><U0029> \xAA\xFE |3 # parenthesized number twenty-six
<UF862><U0028><U0032><U0037><U0029> \xAB\xF7 |3 # parenthesized number twenty-seven
<UF862><U0028><U0032><U0038><U0029> \xAB\xF8 |3 # parenthesized number twenty-eight
<UF862><U0028><U0032><U0039><U0029> \xAB\xF9 |3 # parenthesized number twenty-nine
@@ -8911,16 +9071,7 @@ CHARMAP
<UF862><U005B><U5341><U516B><U005D> \xAD\x66 |3 # ideographs for eighteen in enclosing square
<UF862><U005B><U5341><U516D><U005D> \xAD\x64 |3 # ideographs for sixteen in enclosing square
<UF862><U005B><U5341><U56DB><U005D> \xAD\x62 |3 # ideographs for fourteen in enclosing square
-<UF863><U0028><U0032><U0031><U0029> \xA7\xF5 |3 # circled number twenty-one
-<UF863><U0028><U0032><U0032><U0029> \xA7\xF6 |3 # circled number twenty-two
-<UF863><U0028><U0032><U0033><U0029> \xA7\xF7 |3 # circled number twenty-three
-<UF863><U0028><U0032><U0034><U0029> \xA7\xF8 |3 # circled number twenty-four
-<UF863><U0028><U0032><U0035><U0029> \xA7\xF9 |3 # circled number twenty-five
-<UF863><U0028><U0032><U0036><U0029> \xA7\xFA |3 # circled number twenty-six
-<UF863><U0028><U0032><U0037><U0029> \xA7\xFB |3 # circled number twenty-seven
-<UF863><U0028><U0032><U0038><U0029> \xA7\xFC |3 # circled number twenty-eight
-<UF863><U0028><U0032><U0039><U0029> \xA7\xFD |3 # circled number twenty-nine
-<UF863><U0028><U0033><U0030><U0029> \xA7\xFE |3 # circled number twenty-ten
+<UF862><UC8FC><UC2DD><UD68C><UC0AC> \xA7\x7D |3 # square hangul, horizontal LR form
<UF863><U005B><U0031><U0030><U005D> \xA2\xEF |3 # number ten in enclosing square, serif, bold
<UF863><U005B><U0031><U0031><U005D> \xA2\xF0 |3 # number eleven in enclosing square, serif, bold
<UF863><U005B><U0031><U0032><U005D> \xA2\xF1 |3 # number twelve in enclosing square, serif, bold
@@ -8942,16 +9093,7 @@ CHARMAP
<UF863><U005B><U5341><U516B><U005D> \xAD\x52 |3 # ideographs for eighteen in enclosing square, negative
<UF863><U005B><U5341><U516D><U005D> \xAD\x50 |3 # ideographs for sixteen in enclosing square, negative
<UF863><U005B><U5341><U56DB><U005D> \xAD\x4E |3 # ideographs for fourteen in enclosing square, negative
-<UF864><U0028><U0032><U0031><U0029> \xA6\xF9 |3 # circled number twenty-one, negative
-<UF864><U0028><U0032><U0032><U0029> \xA6\xFA |3 # circled number twenty-two, negative
-<UF864><U0028><U0032><U0033><U0029> \xA6\xFB |3 # circled number twenty-three, negative
-<UF864><U0028><U0032><U0034><U0029> \xA6\xFC |3 # circled number twenty-four, negative
-<UF864><U0028><U0032><U0035><U0029> \xA6\xFD |3 # circled number twenty-five, negative
-<UF864><U0028><U0032><U0036><U0029> \xA6\xFE |3 # circled number twenty-six, negative
-<UF864><U0028><U0032><U0037><U0029> \xA5\xF9 |3 # circled number twenty-seven, negative
-<UF864><U0028><U0032><U0038><U0029> \xA5\xFA |3 # circled number twenty-eight, negative
-<UF864><U0028><U0032><U0039><U0029> \xA5\xFB |3 # circled number twenty-nine, negative
-<UF864><U0028><U0033><U0030><U0029> \xA5\xFC |3 # circled number thirty, negative
+<UF863><UC8FC><UC2DD><UD68C><UC0AC> \xA7\x81 |3 # square hangul, vertical form
<UF864><U005B><U0031><U0030><U005D> \xA3\x4A |3 # number ten in enclosing square, sans, shadowed
<UF864><U005B><U0031><U0031><U005D> \xA3\x4B |3 # number eleven in enclosing square, sans, shadowed
<UF864><U005B><U0031><U0032><U005D> \xA3\x4C |3 # number twelve in enclosing square, sans, shadowed
@@ -8985,12 +9127,7 @@ CHARMAP
<UF866><U005B><U0031><U0038><U005D> \xAC\xF4 |3 # number eighteen in enclosing square, negative, light
<UF866><U005B><U0031><U0039><U005D> \xAC\xF5 |3 # number nineteen in enclosing square, negative, light
<UF866><U005B><U0032><U0030><U005D> \xAC\xF6 |3 # number twenty in enclosing square, negative, light
-<UF867><U0028><U0028> \xA1\x53 |3 # double left parenthesis, alternate
-<UF867><U0029><U0029> \xA1\x54 |3 # double right parenthesis, alternate
-<UF868><U0028><U0028> \xA2\x41 |3 # double left parenthesis, alternate 2
-<UF868><U0029><U0029> \xA2\x42 |3 # double right parenthesis, alternate 2
-<UF869><U0028><U0028> \xA2\x43 |3 # double left parenthesis, alternate 3
-<UF869><U0029><U0029> \xA2\x44 |3 # double right parenthesis, alternate 3
+<UF867><U002A><U002A> \xA6\x50 |3 # two asterisks aligned horizontally (annotation/comment mark), large
<UF900> \xCB\xD0 |0 # CJK COMPATIBILITY IDEOGRAPH-F900
<UF901> \xCB\xD6 |0 # CJK COMPATIBILITY IDEOGRAPH-F901
<UF902> \xCB\xE7 |0 # CJK COMPATIBILITY IDEOGRAPH-F902
@@ -9259,6 +9396,12 @@ CHARMAP
<UFA09> \xFA\xA2 |0 # CJK COMPATIBILITY IDEOGRAPH-FA09
<UFA0A> \xFA\xE6 |0 # CJK COMPATIBILITY IDEOGRAPH-FA0A
<UFA0B> \xFC\xA9 |0 # CJK COMPATIBILITY IDEOGRAPH-FA0B
+<UFE59> \xA1\x4D |0 # SMALL LEFT PARENTHESIS
+<UFE59><UF87C> \xA2\x45 |3 # SMALL LEFT PARENTHESIS, bold
+<UFE59><UF87F> \xA1\x4F |3 # SMALL LEFT PARENTHESIS, more rounded
+<UFE5A> \xA1\x4E |0 # SMALL RIGHT PARENTHESIS
+<UFE5A><UF87C> \xA2\x46 |3 # SMALL RIGHT PARENTHESIS, bold
+<UFE5A><UF87F> \xA1\x50 |3 # SMALL RIGHT PARENTHESIS, more rounded
<UFF01> \xA3\xA1 |0 # FULLWIDTH EXCLAMATION MARK
<UFF01><UF874> \xA5\xDA |3 # FULLWIDTH EXCLAMATION MARK, position left
<UFF02> \xA3\xA2 |0 # FULLWIDTH QUOTATION MARK
@@ -9270,12 +9413,11 @@ CHARMAP
<UFF08> \xA3\xA8 |0 # FULLWIDTH LEFT PARENTHESIS
<UFF09> \xA3\xA9 |0 # FULLWIDTH RIGHT PARENTHESIS
<UFF0A> \xA3\xAA |0 # FULLWIDTH ASTERISK
-<UFF0A><UF870> \xA6\x4E |3 # FULLWIDTH ASTERISK, position low, large
<UFF0A><UF871> \xA1\x9F |3 # FULLWIDTH ASTERISK, position left, large
<UFF0A><UF873> \xA6\x4A |3 # FULLWIDTH ASTERISK, position center
<UFF0A><UF874> \xA1\x9C |3 # FULLWIDTH ASTERISK, position left
<UFF0A><UF875> \xA1\x9E |3 # FULLWIDTH ASTERISK, position low left
-<UFF0A><UF87F> \xA6\x49 |3 # FULLWIDTH ASTERISK, duplicate of xA3AA
+<UFF0A><UF87F> \xA6\x49 |3 # FULLWIDTH ASTERISK, duplicate of 0xA3AA
<UFF0B> \xA3\xAB |0 # FULLWIDTH PLUS SIGN
<UFF0C> \xA3\xAC |0 # FULLWIDTH COMMA
<UFF0D> \xA3\xAD |0 # FULLWIDTH HYPHEN-MINUS
diff --git a/ext/Encode/ucm/macROMnn.ucm b/ext/Encode/ucm/macROMnn.ucm
index 6be54cd696..5f81911d88 100644
--- a/ext/Encode/ucm/macROMnn.ucm
+++ b/ext/Encode/ucm/macROMnn.ucm
@@ -1,5 +1,5 @@
#
-# $Id: macROMnn.ucm,v 2.0 2004/05/16 20:55:27 dankogai Exp $
+# $Id: macROMnn.ucm,v 2.1 2004/10/22 06:23:11 dankogai Exp $
#
# Original table can be obtained at
# http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/ROMANIAN.TXT
@@ -8,7 +8,6 @@
<mb_cur_min> 1
<mb_cur_max> 1
<subchar> \x3F
-#
CHARMAP
<U0000> \x00 |0 # <control>
<U0001> \x01 |0 # <control>
@@ -93,9 +92,7 @@ CHARMAP
<U0050> \x50 |0 # LATIN CAPITAL LETTER P
<U0051> \x51 |0 # LATIN CAPITAL LETTER Q
<U0052> \x52 |0 # LATIN CAPITAL LETTER R
-<U0053><U0326> \xAF |3 # LATIN CAPITAL LETTER S + COMBINING COMMA BELOW
<U0053> \x53 |0 # LATIN CAPITAL LETTER S
-<U0054><U0326> \xDE |3 # LATIN CAPITAL LETTER T + COMBINING COMMA BELOW
<U0054> \x54 |0 # LATIN CAPITAL LETTER T
<U0055> \x55 |0 # LATIN CAPITAL LETTER U
<U0056> \x56 |0 # LATIN CAPITAL LETTER V
@@ -127,9 +124,7 @@ CHARMAP
<U0070> \x70 |0 # LATIN SMALL LETTER P
<U0071> \x71 |0 # LATIN SMALL LETTER Q
<U0072> \x72 |0 # LATIN SMALL LETTER R
-<U0073><U0326> \xBF |3 # LATIN SMALL LETTER S + COMBINING COMMA BELOW
<U0073> \x73 |0 # LATIN SMALL LETTER S
-<U0074><U0326> \xDF |3 # LATIN SMALL LETTER T + COMBINING COMMA BELOW
<U0074> \x74 |0 # LATIN SMALL LETTER T
<U0075> \x75 |0 # LATIN SMALL LETTER U
<U0076> \x76 |0 # LATIN SMALL LETTER V
@@ -224,6 +219,10 @@ CHARMAP
<U0153> \xCF |0 # LATIN SMALL LIGATURE OE
<U0178> \xD9 |0 # LATIN CAPITAL LETTER Y WITH DIAERESIS
<U0192> \xC4 |0 # LATIN SMALL LETTER F WITH HOOK
+<U0218> \xAF |0 # LATIN CAPITAL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later
+<U0219> \xBF |0 # LATIN SMALL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later
+<U021A> \xDE |0 # LATIN CAPITAL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later
+<U021B> \xDF |0 # LATIN SMALL LETTER T WITH COMMA BELOW; # for Unicode 3.0 and later
<U02C6> \xF6 |0 # MODIFIER LETTER CIRCUMFLEX ACCENT
<U02C7> \xFF |0 # CARON
<U02D8> \xF9 |0 # BREVE
diff --git a/ext/Encode/ucm/macSymbol.ucm b/ext/Encode/ucm/macSymbol.ucm
index 6d64b4aeda..dfae00ebb5 100644
--- a/ext/Encode/ucm/macSymbol.ucm
+++ b/ext/Encode/ucm/macSymbol.ucm
@@ -1,5 +1,5 @@
#
-# $Id: macSymbol.ucm,v 2.0 2004/05/16 20:55:27 dankogai Exp $
+# $Id: macSymbol.ucm,v 2.1 2004/10/22 06:23:11 dankogai Exp $
#
# Original table can be obtained at
# http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/SYMBOL.TXT
@@ -47,13 +47,7 @@ CHARMAP
<U0025> \x25 |0 # PERCENT SIGN
<U0026> \x26 |0 # AMPERSAND
<U0028> \x28 |0 # LEFT PARENTHESIS
-<U0028><UF870> \xE6 |3 # LEFT PARENTHESIS, fragment (top)
-<U0028><UF871> \xE7 |3 # LEFT PARENTHESIS, fragment (extender)
-<U0028><UF872> \xE8 |3 # LEFT PARENTHESIS, fragment (bottom)
<U0029> \x29 |0 # RIGHT PARENTHESIS
-<U0029><UF870> \xF6 |3 # RIGHT PARENTHESIS, fragment (top)
-<U0029><UF871> \xF7 |3 # RIGHT PARENTHESIS, fragment (extender)
-<U0029><UF872> \xF8 |3 # RIGHT PARENTHESIS, fragment (bottom)
<U002B> \x2B |0 # PLUS SIGN
<U002C> \x2C |0 # COMMA
<U002E> \x2E |0 # FULL STOP
@@ -75,28 +69,16 @@ CHARMAP
<U003E> \x3E |0 # GREATER-THAN SIGN
<U003F> \x3F |0 # QUESTION MARK
<U005B> \x5B |0 # LEFT SQUARE BRACKET
-<U005B><UF870> \xE9 |3 # LEFT SQUARE BRACKET, fragment (top)
-<U005B><UF871> \xEA |3 # LEFT SQUARE BRACKET, fragment (extender)
-<U005B><UF872> \xEB |3 # LEFT SQUARE BRACKET, fragment (bottom)
<U005D> \x5D |0 # RIGHT SQUARE BRACKET
-<U005D><UF870> \xF9 |3 # RIGHT SQUARE BRACKET, fragment (top)
-<U005D><UF871> \xFA |3 # RIGHT SQUARE BRACKET, fragment (extender)
-<U005D><UF872> \xFB |3 # RIGHT SQUARE BRACKET, fragment (bottom)
<U005F> \x5F |0 # LOW LINE
<U007B> \x7B |0 # LEFT CURLY BRACKET
-<U007B><UF870> \xEC |3 # LEFT CURLY BRACKET, fragment (top)
-<U007B><UF871> \xED |3 # LEFT CURLY BRACKET, fragment (center)
-<U007B><UF872> \xEE |3 # LEFT CURLY BRACKET, fragment (bottom)
<U007C> \x7C |0 # VERTICAL LINE
<U007D> \x7D |0 # RIGHT CURLY BRACKET
-<U007D><UF870> \xFC |3 # RIGHT CURLY BRACKET, fragment (top)
-<U007D><UF871> \xFD |3 # RIGHT CURLY BRACKET, fragment (center)
-<U007D><UF872> \xFE |3 # RIGHT CURLY BRACKET, fragment (bottom)
<U00A9> \xD3 |0 # COPYRIGHT SIGN # serif
-<U00A9><UF87F> \xE3 |3 # COPYRIGHT SIGN, alternate (sans serif)
+<U00A9><UF87F> \xE3 |3 # COPYRIGHT SIGN, alternate: sans serif
<U00AC> \xD8 |0 # NOT SIGN
<U00AE> \xD2 |0 # REGISTERED SIGN # serif
-<U00AE><UF87F> \xE2 |3 # REGISTERED SIGN, alternate (sans serif)
+<U00AE><UF87F> \xE2 |3 # REGISTERED SIGN, alternate: sans serif
<U00B0> \xB0 |0 # DEGREE SIGN
<U00B1> \xB1 |0 # PLUS-MINUS SIGN
<U00D7> \xB4 |0 # MULTIPLICATION SIGN
@@ -165,7 +147,7 @@ CHARMAP
<U2118> \xC3 |0 # SCRIPT CAPITAL P
<U211C> \xC2 |0 # BLACK-LETTER CAPITAL R
<U2122> \xD4 |0 # TRADE MARK SIGN # serif
-<U2122><UF87F> \xE4 |3 # TRADE MARK SIGN, alternate (sans serif)
+<U2122><UF87F> \xE4 |3 # TRADE MARK SIGN, alternate: sans serif
<U2135> \xC0 |0 # ALEF SYMBOL
<U2190> \xAC |0 # LEFTWARDS ARROW
<U2191> \xAD |0 # UPWARDS ARROW
@@ -199,7 +181,6 @@ CHARMAP
<U2229> \xC7 |0 # INTERSECTION
<U222A> \xC8 |0 # UNION
<U222B> \xF2 |0 # INTEGRAL
-<U222B><UF871> \xF4 |3 # INTEGRAL, fragment (extender)
<U2234> \x5C |0 # THEREFORE
<U223C> \x7E |0 # TILDE OPERATOR
<U2245> \x40 |0 # APPROXIMATELY EQUAL TO
@@ -220,15 +201,34 @@ CHARMAP
<U22C5> \xD7 |0 # DOT OPERATOR
<U2320> \xF3 |0 # TOP HALF INTEGRAL
<U2321> \xF5 |0 # BOTTOM HALF INTEGRAL
+<U239B> \xE6 |0 # LEFT PARENTHESIS UPPER HOOK # for Unicode 3.2 and later
+<U239C> \xE7 |0 # LEFT PARENTHESIS EXTENSION # for Unicode 3.2 and later
+<U239D> \xE8 |0 # LEFT PARENTHESIS LOWER HOOK # for Unicode 3.2 and later
+<U239E> \xF6 |0 # RIGHT PARENTHESIS UPPER HOOK # for Unicode 3.2 and later
+<U239F> \xF7 |0 # RIGHT PARENTHESIS EXTENSION # for Unicode 3.2 and later
+<U23A0> \xF8 |0 # RIGHT PARENTHESIS LOWER HOOK # for Unicode 3.2 and later
+<U23A1> \xE9 |0 # LEFT SQUARE BRACKET UPPER CORNER # for Unicode 3.2 and later
+<U23A2> \xEA |0 # LEFT SQUARE BRACKET EXTENSION # for Unicode 3.2 and later
+<U23A3> \xEB |0 # LEFT SQUARE BRACKET LOWER CORNER # for Unicode 3.2 and later
+<U23A4> \xF9 |0 # RIGHT SQUARE BRACKET UPPER CORNER # for Unicode 3.2 and later
+<U23A5> \xFA |0 # RIGHT SQUARE BRACKET EXTENSION # for Unicode 3.2 and later
+<U23A6> \xFB |0 # RIGHT SQUARE BRACKET LOWER CORNER # for Unicode 3.2 and later
+<U23A7> \xEC |0 # LEFT CURLY BRACKET UPPER HOOK # for Unicode 3.2 and later
+<U23A8> \xED |0 # LEFT CURLY BRACKET MIDDLE PIECE # for Unicode 3.2 and later
+<U23A9> \xEE |0 # LEFT CURLY BRACKET LOWER HOOK # for Unicode 3.2 and later
+<U23AA> \xEF |0 # CURLY BRACKET EXTENSION # for Unicode 3.2 and later
+<U23AB> \xFC |0 # RIGHT CURLY BRACKET UPPER HOOK # for Unicode 3.2 and later
+<U23AC> \xFD |0 # RIGHT CURLY BRACKET MIDDLE PIECE # for Unicode 3.2 and later
+<U23AD> \xFE |0 # RIGHT CURLY BRACKET LOWER HOOK # for Unicode 3.2 and later
+<U23AE> \xF4 |0 # INTEGRAL EXTENSION # for Unicode 3.2 and later
+<U23AF> \xBE |0 # HORIZONTAL LINE EXTENSION (for arrows) # for Unicode 3.2 and later
<U2660> \xAA |0 # BLACK SPADE SUIT
<U2663> \xA7 |0 # BLACK CLUB SUIT
<U2665> \xA9 |0 # BLACK HEART SUIT
<U2666> \xA8 |0 # BLACK DIAMOND SUIT
<U3008> \xE1 |0 # LEFT ANGLE BRACKET
<U3009> \xF1 |0 # RIGHT ANGLE BRACKET
-<UF8E5> \x60 |0 # radical extender
-<UF8E6> \xBD |0 # vertical arrow extender
-<UF8E7> \xBE |0 # horizontal arrow extender
-<UF8F4> \xEF |0 # curly bracket extender
+<UF8E5> \x60 |0 # radical extender # corporate char
+<UF8E6> \xBD |0 # vertical line extension (for arrows) # corporate char
<UF8FF> \xF0 |0 # Apple logo
END CHARMAP
diff --git a/ext/Encode/ucm/macThai.ucm b/ext/Encode/ucm/macThai.ucm
index 3940034dcf..159204c152 100644
--- a/ext/Encode/ucm/macThai.ucm
+++ b/ext/Encode/ucm/macThai.ucm
@@ -1,5 +1,5 @@
#
-# $Id: macThai.ucm,v 2.0 2004/05/16 20:55:27 dankogai Exp $
+# $Id: macThai.ucm,v 2.1 2004/10/22 06:23:11 dankogai Exp $
#
# Original table can be obtained at
# http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/THAI.TXT
@@ -256,6 +256,6 @@ CHARMAP
<U201D> \x8E |0 # RIGHT DOUBLE QUOTATION MARK
<U2022> \x91 |0 # BULLET
<U2026> \x82 |0 # HORIZONTAL ELLIPSIS
+<U2060> \xDB |0 # WORD JOINER # for Unicode 3.2 and later
<U2122> \xEE |0 # TRADE MARK SIGN
-<UFEFF> \xDB |0 # ZERO WIDTH NO-BREAK SPACE
END CHARMAP