diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2002-03-05 01:39:29 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2002-03-05 01:39:29 +0000 |
commit | eb1cbb171ab3ec4bb1ec039125b5ab5123d5305d (patch) | |
tree | b063520a9c9015a4ca0b7fa713b85b3c3527891d /ext/Encode/lib | |
parent | e4f51a44c1859f4a042995273704823dba35ea18 (diff) | |
download | perl-eb1cbb171ab3ec4bb1ec039125b5ab5123d5305d.tar.gz |
"The last pieces of Chinese puzzle" from Autrijus.
p4raw-id: //depot/perl@15029
Diffstat (limited to 'ext/Encode/lib')
-rw-r--r-- | ext/Encode/lib/Encode/CN/HZ.pm | 50 |
1 files changed, 50 insertions, 0 deletions
diff --git a/ext/Encode/lib/Encode/CN/HZ.pm b/ext/Encode/lib/Encode/CN/HZ.pm new file mode 100644 index 0000000000..a57ae8a971 --- /dev/null +++ b/ext/Encode/lib/Encode/CN/HZ.pm @@ -0,0 +1,50 @@ +package Encode::CN::HZ; + +use Encode::CN; +use Encode qw|encode decode|; +use base 'Encode::Encoding'; + +use strict; + +# HZ is but escaped GB, so we implement it with the +# GB2312(raw) encoding here. Cf. RFC 1842 & 1843. + +my $canon = 'hz'; +my $obj = bless {name => $canon}, __PACKAGE__; +$obj->Define($canon); + +sub decode +{ + my ($obj,$str,$chk) = @_; + my $gb = Encode::find_encoding('gb2312'); + + $str =~ s{~(?:(~)|\n|{([^~]*)~}|)} + {$1 ? '~' : defined $2 ? $gb->decode($2, $chk) : ''}eg; + + return $str; +} + +sub encode +{ + my ($obj,$str,$chk) = @_; + my $gb = Encode::find_encoding('gb2312'); + + $str =~ s/~/~~/g; + $str =~ s/((?: + \p{InCJKCompatibility}| + \p{InCJKCompatibilityForms}| + \p{InCJKCompatibilityIdeographs}| + \p{InCJKCompatibilityIdeographsSupplement}| + \p{InCJKRadicalsSupplement}| + \p{InCJKSymbolsAndPunctuation}| + \p{InCJKUnifiedIdeographsExtensionA}| + \p{InCJKUnifiedIdeographs}| + \p{InCJKUnifiedIdeographsExtensionB}| + \p{InEnclosedCJKLettersAndMonths} + )+)/'~{'.$gb->encode($1, $chk).'~}'/egx; + + return $str; +} + +1; +__END__ |