summaryrefslogtreecommitdiff
path: root/ext/Storable/t/utf8hash.t
blob: 7d842d57604eba3eca68bcf2037e94e4b46af88b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
#!./perl

sub BEGIN {
    if ($] < 5.007) {
	print "1..0 # Skip: no utf8 hash key support\n";
	exit 0;
    }
    if ($ENV{PERL_CORE}){
	chdir('t') if -d 't';
	@INC = ('.', '../lib');
	push @INC, "::lib:$MacPerl::Architecture:" if $^O eq 'MacOS';
    } else {
	unshift @INC, 't';
    }
    require Config; import Config;
    if ($ENV{PERL_CORE}){
	if($Config{'extensions'} !~ /\bStorable\b/) {
	    print "1..0 # Skip: Storable was not built\n";
	    exit 0;
	}
    }
}

use strict;
our $DEBUGME = shift || 0;
use Storable qw(store nstore retrieve thaw freeze);
{
    no warnings;
    $Storable::DEBUGME = ($DEBUGME > 1);
}
# Better than no plan, because I was getting out of memory errors, at which
# point Test::More tidily prints up 1..79 as if I meant to finish there.
use Test::More tests=>148;
use bytes ();
use Encode qw(is_utf8);
my %utf8hash;

$Storable::canonical = $Storable::canonical; # Shut up a used only once warning.

for $Storable::canonical (0, 1) {

# first we generate a nasty hash which keys include both utf8
# on and off with identical PVs

no utf8; # we have a naked 8-bit byte below (in Latin 1, anyway)

# In Latin 1 -ese the below ord() should end up 0xc0 (192),
# in EBCDIC 0x64 (100).  Both should end up being UTF-8/UTF-EBCDIC.
my @ords = (
	    ord("Á"), # LATIN CAPITAL LETTER A WITH GRAVE
	    0x3000, #IDEOGRAPHIC SPACE
	   );

foreach my $i (@ords){
    my $u = chr($i); utf8::upgrade($u);
    # warn sprintf "%d,%d", bytes::length($u), is_utf8($u);
    my $b = pack("C*", unpack("C*", $u));
    # warn sprintf "%d,%d" ,bytes::length($b), is_utf8($b);

    isnt($u,	                        $b, 
	 "equivalence - with utf8flag");
    is   (pack("C*", unpack("C*", $u)), pack("C*", unpack("C*", $b)),
	  "equivalence - without utf8flag");

    $utf8hash{$u} = $utf8hash{$b} = $i;
}

sub nkeys($){
    my $href = shift;
    return scalar keys %$href; 
}

my $nk;
is($nk = nkeys(\%utf8hash), scalar(@ords)*2, 
   "nasty hash generated (nkeys=$nk)");

# now let the show begin!

my $thawed = thaw(freeze(\%utf8hash));

is($nk = nkeys($thawed),
   nkeys(\%utf8hash),
   "scalar keys \%{\$thawed} (nkeys=$nk)");
for my $k (sort keys %$thawed){
    is($utf8hash{$k}, $thawed->{$k}, "frozen item chr($utf8hash{$k})");
}

my $storage = "utfhash.po"; # po = perl object!
my $retrieved;

ok((nstore \%utf8hash, $storage), "nstore to $storage");
ok(($retrieved = retrieve($storage)), "retrieve from $storage");

is($nk = nkeys($retrieved),
   nkeys(\%utf8hash),
   "scalar keys \%{\$retrieved} (nkeys=$nk)");
for my $k (sort keys %$retrieved){
    is($utf8hash{$k}, $retrieved->{$k}, "nstored item chr($utf8hash{$k})");
}
unlink $storage;


ok((store \%utf8hash, $storage), "store to $storage");
ok(($retrieved = retrieve($storage)), "retrieve from $storage");
is($nk = nkeys($retrieved),
   nkeys(\%utf8hash),
   "scalar keys \%{\$retrieved} (nkeys=$nk)");
for my $k (sort keys %$retrieved){
    is($utf8hash{$k}, $retrieved->{$k}, "stored item chr($utf8hash{$k})");
}
$DEBUGME or unlink $storage;

# On the premis that more tests are good, here are NWC's tests:

package Hash_Test;

sub me_second {
  return (undef, $_[0]);
}

package main;

my $utf8 = "Schlo\xdf" . chr 256;
chop $utf8;

# Set this to 1 to test the test by bypassing Storable.
my $bypass = 0;

sub class_test {
  my ($object, $package) = @_;
  unless ($package) {
    is ref $object, 'HASH', "$object is unblessed";
    return;
  }
  isa_ok ($object, $package);
  my ($garbage, $copy) = eval {$object->me_second};
  is $@, "", "check it has correct method";
  cmp_ok $copy, '==', $object, "and that it returns the same object";
}

# Thanks to Dan Kogai for the Kanji for "castle" (which he informs me also
# means 'a city' in Mandarin).
my %hash = (map {$_, $_} 'castle', "ch\xe5teau", $utf8, "\x{57CE}");

for my $package ('', 'Hash_Test') {
  # Run through and sanity check these.
  if ($package) {
    bless \%hash, $package;
  }
  for (keys %hash) {
    my $l = 0 + /^\w+$/;
    my $r = 0 + $hash{$_} =~ /^\w+$/;
    cmp_ok ($l, '==', $r);
  }

  # Grr. This cperl mode thinks that ${ is a punctuation variable.
  # I presume it's punishment for using xemacs rather than emacs. Or OS/2 :-)
  my $copy = $bypass ? \%hash : ${thaw freeze \\%hash};
  class_test ($copy, $package);

  for (keys %$copy) {
    my $l = 0 + /^\w+$/;
    my $r = 0 + $copy->{$_} =~ /^\w+$/;
    cmp_ok ($l, '==', $r, sprintf "key length %d", length $_);
  }


  my $bytes = my $char = chr 27182;
  utf8::encode ($bytes);

  my $orig = {$char => 1};
  if ($package) {
    bless $orig, $package;
  }
  my $just_utf8 = $bypass ? $orig : ${thaw freeze \$orig};
  class_test ($just_utf8, $package);
  cmp_ok (scalar keys %$just_utf8, '==', 1, "1 key in utf8?");
  cmp_ok ($just_utf8->{$char}, '==', 1, "utf8 key present?");
  ok (!exists $just_utf8->{$bytes}, "bytes key absent?");

  $orig = {$bytes => 1};
  if ($package) {
    bless $orig, $package;
  }
  my $just_bytes = $bypass ? $orig : ${thaw freeze \$orig};
  class_test ($just_bytes, $package);

  cmp_ok (scalar keys %$just_bytes, '==', 1, "1 key in bytes?");
  cmp_ok ($just_bytes->{$bytes}, '==', 1, "bytes key present?");
  ok (!exists $just_bytes->{$char}, "utf8 key absent?");

  die sprintf "Both have length %d, which is crazy", length $char
    if length $char == length $bytes;

  $orig = {$bytes => length $bytes, $char => length $char};
  if ($package) {
    bless $orig, $package;
  }
  my $both = $bypass ? $orig : ${thaw freeze \$orig};
  class_test ($both, $package);

  cmp_ok (scalar keys %$both, '==', 2, "2 keys?");
  cmp_ok ($both->{$bytes}, '==', length $bytes, "bytes key present?");
  cmp_ok ($both->{$char}, '==', length $char, "utf8 key present?");
}

}