ext/Encode/t/TW.t


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94

BEGIN {
    chdir 't' if -d 't';
    @INC = '../lib';
    require Config; import Config;
    if ($Config{'extensions'} !~ /\bEncode\b/) {
      print "1..0 # Skip: Encode was not built\n";
      exit 0;
    }
    unless (find PerlIO::Layer 'perlio') {
	print "1..0 # Skip: PerlIO was not built\n";
	exit 0;
    }
    if (ord("A") == 193) {
	print "1..0 # Skip: EBCDIC\n";
	exit 0;
    }
    $| = 1;
}

use strict;
use Test::More tests => 17;
use Encode;

use_ok('Encode::TW');

# Since JP.t already test basic file IO, we will just focus on
# internal encode / decode test here. Unfortunately, to test
# against all the UniHan characters will take a huge disk space,
# not to mention the time it will take, and the fact that Perl
# did not bundle UniHan.txt anyway.

# So, here we just test a typical snippet spanning multiple Unicode
# blocks, and hope it can point out obvious errors.

run_tests('Basic Big5 range', {
    'utf'	=> (
24093.39640.38525.20043.33495.35028.20846.65292.
26389.30343.32771.26352.20271.24248.65108.
25885.25552.35998.20110.23391.38508.20846.65292.
24799.24218.23493.21566.20197.38477.65108
    ),

    'big5'	=> (join('',
'�Ұ������]�Ǥ��A�ӬӦҤ�B�e�Q',
'�ᴣ�s�_�s�����A�����G�^�H���Q',
    )),

    'big5-hkscs'=> (join('',
'�Ұ������]�Ǥ��A�ӬӦҤ�B�e�Q',
'�ᴣ�s�_�s�����A�����G�^�H���Q',
    )),

    'cp950'	=> (join('',
'�Ұ������]�Ǥ��A�ӬӦҤ�B�e�Q',
'�ᴣ�s�_�s�����A�����G�^�H���Q',
    )),
});

run_tests('Hong Kong Extensions', {
    'utf'	=> (
24863.35613.25152.26377.20351.29992.32.80.101.114.108.32.
22021.26379.21451.65292.32102.25105.21707.22021.
25903.25345.12289.24847.35211.21644.40723.21237.
22914.26524.32232.30908.26377.20219.20309.37679.28431.
65292.35531.21578.35380.25105.21707.12290
    ),

    'big5-hkscs'	=> join('',
'�P�©Ҧ��ϥ� Perl ��B�͡A���ڒ]�����B�N���M���y',
'�p�G�s�X��������|�A�Чi�D�ڒ]�C'
    ),
});

sub run_tests {
    my ($title, $tests) = @_;
    my $utf = delete $tests->{'utf'};

    # $enc = encoding, $str = content
    foreach my $enc (sort keys %{$tests}) {
	my $str = $tests->{$enc};

	is(Encode::decode($enc, $str), $utf, "[$enc] decode - $title");
	is(Encode::encode($enc, $utf), $str, "[$enc] encode - $title");

	my $str2 = $str;
	my $utf8 = Encode::encode('utf-8', $utf);

	Encode::from_to($str2, $enc, 'utf-8');
	is($str2, $utf8, "[$enc] from_to => utf8 - $title");

	Encode::from_to($utf8, 'utf-8', $enc); # convert $utf8 as $enc
	is($utf8, $str,  "[$enc] utf8 => from_to - $title");
    }
}