summaryrefslogtreecommitdiff
path: root/t/op/utf8cache.t
blob: 65254b1b478059ac29bbfa7a8c6a0cb58e04639b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#!./perl -w
# Test for malfunctions of utf8 cache

BEGIN {
    chdir 't' if -d 't';
    @INC = '../lib';
    require './test.pl';
}

use strict;

plan(tests => 15);

SKIP: {
skip_without_dynamic_extension("Devel::Peek");

my $pid = open CHILD, '-|';
die "kablam: $!\n" unless defined $pid;
unless ($pid) {
    open STDERR, ">&STDOUT";
    $a = "hello \x{1234}";
    for (1..2) {
        bar(substr($a, $_, 1));
    }
    sub bar {
        $_[0] = "\x{4321}";
        Devel::Peek::Dump($_[0]);
    }
    exit;
}

{ local $/; $_ = <CHILD> }

my $utf8magic = qr{ ^ \s+ MAGIC \s = .* \n
                      \s+ MG_VIRTUAL \s = .* \n
                      \s+ MG_TYPE \s = \s PERL_MAGIC_utf8 .* \n
                      \s+ MG_LEN \s = .* \n }xm;

unlike($_, qr{ $utf8magic $utf8magic }x);

} # SKIP

# With bad caching, this code used to go quadratic and take 10s of minutes.
# The 'test' in this case is simply that it doesn't hang.

{
    local ${^UTF8CACHE} = 1; # enable cache, disable debugging
    my $x = "\x{100}" x 1000000;
    while ($x =~ /./g) {
	my $p = pos($x);
    }
    pass("quadratic pos");
}

# Get-magic can reallocate the PV.  Check that the cache is reset in
# such cases.

# Regexp vars
"\x{100}" =~ /(.+)/;
() = substr $1, 0, 1;
"a\x{100}" =~ /(.+)/;
is ord substr($1, 1, 1), 0x100, 'get-magic resets utf8cache on match vars';

# Substr lvalues
my $x = "a\x{100}";
my $l = \substr $x, 0;
() = substr $$l, 1, 1;
substr $x, 0, 1, = "\x{100}";
is ord substr($$l, 1, 1), 0x100, 'get-magic resets utf8cache on LVALUEs';

# defelem magic
my %h;
sub {
  $_[0] = "a\x{100}";
  () = ord substr $_[0], 1, 1;
  $h{k} = "\x{100}"x2;
  is ord substr($_[0], 1, 1), 0x100,
    'get-magic resets uf8cache on defelems';
}->($h{k});


# Overloading can also reallocate the PV.

package UTF8Toggle {
    use overload '""' => 'stringify', fallback => 1;

    sub new {
	my $class = shift;
	my $value = shift;
	my $state = shift||0;
	return bless [$value, $state], $class;
    }

    sub stringify {
	my $self = shift;
	$self->[1] = ! $self->[1];
	if ($self->[1]) {
	    utf8::downgrade($self->[0]);
	} else {
	    utf8::upgrade($self->[0]);
	}
	$self->[0];
    }
}
my $u = UTF8Toggle->new(" \x{c2}7 ");

pos $u = 2;
is pos $u, 2, 'pos on overloaded utf8 toggler';
() = "$u"; # flip flag
pos $u = 2;
is pos $u, 2, 'pos on overloaded utf8 toggler (again)';

() = ord ${\substr $u, 1};
is ord ${\substr($u, 1)}, 0xc2,
    'utf8 cache + overloading does not confuse substr lvalues';
() = "$u"; # flip flag
() = ord substr $u, 1;
is ord substr($u, 1), 0xc2,
    'utf8 cache + overloading does not confuse substr lvalues (again)';

$u = UTF8Toggle->new(" \x{c2}7 ");
() = ord ${\substr $u, 2};
{ no warnings; ${\substr($u, 2, 1)} = 0; }
is $u, " \x{c2}0 ",
    'utf8 cache + overloading does not confuse substr lvalue assignment';
$u = UTF8Toggle->new(" \x{c2}7 ");
() = "$u"; # flip flag
() = ord ${\substr $u, 2};
{ no warnings; ${\substr($u, 2, 1)} = 0; }
is $u, " \x{c2}0 ",
    'utf8 cache + overload does not confuse substr lv assignment (again)';


# Typeglobs and references should not get a cache
use utf8;

#substr
my $globref = \*αabcdefg_::_;
() = substr($$globref, 2, 3);
*_abcdefgα:: = \%αabcdefg_::;
undef %αabcdefg_::;
{ no strict; () = *{"_abcdefgα::_"} }
is substr($$globref, 2, 3), "abc", 'no utf8 pos cache on globs';

my $ref = bless [], "αabcd_";
() = substr($ref, 1, 3);
bless $ref, "_abcdα";
is substr($ref, 1, 3), "abc", 'no utf8 pos cache on references';

#length
$globref = \*αabcdefg_::_;
() = "$$globref";  # turn utf8 flag on
() = length($$globref);
*_abcdefgα:: = \%αabcdefg_::;
undef %αabcdefg_::;
{ no strict; () = *{"_abcdefgα::_"} }
is length($$globref), length("$$globref"), 'no utf8 length cache on globs';

$ref = bless [], "αabcd_";
() = "$ref"; # turn utf8 flag on
() = length $ref;
bless $ref, "α";
is length $ref, length "$ref", 'no utf8 length cache on references';