summaryrefslogtreecommitdiff
path: root/ext/Encode/lib
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2002-03-05 01:39:29 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2002-03-05 01:39:29 +0000
commiteb1cbb171ab3ec4bb1ec039125b5ab5123d5305d (patch)
treeb063520a9c9015a4ca0b7fa713b85b3c3527891d /ext/Encode/lib
parente4f51a44c1859f4a042995273704823dba35ea18 (diff)
downloadperl-eb1cbb171ab3ec4bb1ec039125b5ab5123d5305d.tar.gz
"The last pieces of Chinese puzzle" from Autrijus.
p4raw-id: //depot/perl@15029
Diffstat (limited to 'ext/Encode/lib')
-rw-r--r--ext/Encode/lib/Encode/CN/HZ.pm50
1 files changed, 50 insertions, 0 deletions
diff --git a/ext/Encode/lib/Encode/CN/HZ.pm b/ext/Encode/lib/Encode/CN/HZ.pm
new file mode 100644
index 0000000000..a57ae8a971
--- /dev/null
+++ b/ext/Encode/lib/Encode/CN/HZ.pm
@@ -0,0 +1,50 @@
+package Encode::CN::HZ;
+
+use Encode::CN;
+use Encode qw|encode decode|;
+use base 'Encode::Encoding';
+
+use strict;
+
+# HZ is but escaped GB, so we implement it with the
+# GB2312(raw) encoding here. Cf. RFC 1842 & 1843.
+
+my $canon = 'hz';
+my $obj = bless {name => $canon}, __PACKAGE__;
+$obj->Define($canon);
+
+sub decode
+{
+ my ($obj,$str,$chk) = @_;
+ my $gb = Encode::find_encoding('gb2312');
+
+ $str =~ s{~(?:(~)|\n|{([^~]*)~}|)}
+ {$1 ? '~' : defined $2 ? $gb->decode($2, $chk) : ''}eg;
+
+ return $str;
+}
+
+sub encode
+{
+ my ($obj,$str,$chk) = @_;
+ my $gb = Encode::find_encoding('gb2312');
+
+ $str =~ s/~/~~/g;
+ $str =~ s/((?:
+ \p{InCJKCompatibility}|
+ \p{InCJKCompatibilityForms}|
+ \p{InCJKCompatibilityIdeographs}|
+ \p{InCJKCompatibilityIdeographsSupplement}|
+ \p{InCJKRadicalsSupplement}|
+ \p{InCJKSymbolsAndPunctuation}|
+ \p{InCJKUnifiedIdeographsExtensionA}|
+ \p{InCJKUnifiedIdeographs}|
+ \p{InCJKUnifiedIdeographsExtensionB}|
+ \p{InEnclosedCJKLettersAndMonths}
+ )+)/'~{'.$gb->encode($1, $chk).'~}'/egx;
+
+ return $str;
+}
+
+1;
+__END__