t/uni/parser.t


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264

#!./perl

# Checks if the parser behaves correctly in edge cases
# (including weird syntax errors)

BEGIN {
    chdir 't' if -d 't';
    require './test.pl';
    skip_all_without_unicode_tables();
}

plan (tests => 55);

use utf8;
use open qw( :utf8 :std );

is *tèst, "*main::tèst", "sanity check.";
ok $::{"tèst"}, "gets the right glob in the stash.";

my $glob_by_sub = sub { *ｍａｉｎ::method }->();

is *ｍａｉｎ::method, "*ｍａｉｎ::method", "glob stringy works";
is "" . *ｍａｉｎ::method, "*ｍａｉｎ::method", "glob stringify-through-concat works";
is $glob_by_sub, "*ｍａｉｎ::method", "glob stringy works";
is "" . $glob_by_sub, "*ｍａｉｎ::method", "";

sub gimme_glob {
    no strict 'refs';
    is *{$_[0]}, "*main::$_[0]";
    *{$_[0]};
}

is "" . gimme_glob("下郎"), "*main::下郎";
$a = *下郎;
is "" . $a, "*main::下郎";

*{gimme_glob("下郎")} = sub {};

{
    ok defined *{"下郎"}{CODE};
    ok !defined *{"\344\270\213\351\203\216"}{CODE};
}

$Lèon = 1;
is ${*Lèon{SCALAR}}, 1, "scalar define in the right glob,";
ok !${*{"L\303\250on"}{SCALAR}}, "..and nothing in the wrong one.";

my $a = "foo" . chr(190);
my $b = $a    . chr(256);
chop $b; # $b is $a with utf8 on

is $a, $b, '$a equals $b';

*$b = sub { 5 };

is eval { main->$a }, 5, q!$a can call $b's sub!;
ok !$@, "..and there's no error.";

my $c = $b;
utf8::encode($c);
ok $b ne $c, '$b unequal $c';
eval { main->$c };
ok $@, q!$c can't call $b's sub.!;

# Now define another sub under the downgraded name:
*$a = sub { 6 };
# Call it:
is eval { main->$a }, 6, "Adding a new sub to *a and calling it works,";
ok !$@, "..without errors.";
eval { main->$c };
ok $@, "but it's still unreachable through *c";

*$b = \10;
is ${*$a{SCALAR}}, 10;
is ${*$b{SCALAR}}, 10;
is ${*$c{SCALAR}}, undef;

opendir FÒÒ, ".";
closedir FÒÒ;
::ok($::{"FÒÒ"}, "Bareword generates the right glob.");
::ok(!$::{"F\303\222\303\222"});

sub участники { 1 }

ok $::{"участники"}, "non-const sub declarations generate the right glob";
is $::{"участники"}->(), 1;

sub 原 () { 1 }

is grep({ $_ eq "\x{539f}"     } keys %::), 1, "Constant subs generate the right glob.";
is grep({ $_ eq "\345\216\237" } keys %::), 0;

#These should probably go elsewhere.
eval q{ sub wròng1 (_$); wròng1(1,2) };
like( $@, qr/Malformed prototype for main::wròng1/, 'Malformed prototype croak is clean.' );

eval q{ sub ча::ики ($__); ча::ики(1,2) };
like( $@, qr/Malformed prototype for ча::ики/ );

our $問 = 10;
is $問, 10, "our works";
is $main::問, 10, "...as does getting the same variable through the fully qualified name";
is ${"main::\345\225\217"}, undef, "..and using the encoded form doesn't";

{
    use charnames qw( :full );

    eval qq! my \$\x{30cb} \N{DROMEDARY CAMEL} !;
    $@ =~ s/eval \d+/eval 11/;
    is $@, 'Unrecognized character \x{1f42a}; marked by <-- HERE after  my $ニ <-- HERE near column 8 at (eval 11) line 1.
', "'Unrecognized character' croak is UTF-8 clean";

    eval "q\0foobar\0 \x{FFFF}+1";
    $@ =~ s/eval \d+/eval 11/;
    is(
        $@,
       "Unrecognized character \\x{ffff}; marked by <-- HERE after q\0foobar\0 <-- HERE near column 11 at (eval 11) line 1.\n",
       "...and nul-clean"
    );

    {
        use re 'eval';
        my $f = qq{(?{\$ネ+ 1; \x{1F42A} })};
        eval { "a" =~ /^a$f/ };
        my $e = $@;
        $e =~ s/eval \d+/eval 11/;
        is(
            $e,
            "Unrecognized character \\x{1f42a}; marked by <-- HERE after (?{\$ネ+ 1; <-- HERE near column 13 at (eval 11) line 1.\n",
            "Messages from a re-eval are UTF-8 clean"
        );

        $f = qq{(?{q\0foobar\0 \x{FFFF}+1 })};
        eval { "a" =~ /^a$f/ };
        my $e = $@;
        $e =~ s/eval \d+/eval 11/;
        is(
            $e,
            "Unrecognized character \\x{ffff}; marked by <-- HERE after q\x{0}foobar\x{0} <-- HERE near column 16 at (eval 11) line 1.\n",
           "...and nul-clean"
        );
    }
    
    {
        eval qq{\$ネ+ 1; \x{1F42A}};
        $@ =~ s/eval \d+/eval 11/;
        is(
            $@,
            "Unrecognized character \\x{1f42a}; marked by <-- HERE after \$ネ+ 1; <-- HERE near column 8 at (eval 11) line 1.\n",
            "Unrecognized character error doesn't cut off in the middle of characters"
        )
    }

}

{
    use feature 'state';
    for ( qw( my state our ) ) {
        local $@;
        eval "$_ Ｆｏｏ $x = 1;";
        like $@, qr/No such class Ｆｏｏ/u, "'No such class' warning for $_ is UTF-8 clean";
    }
}

{
    local $@;
    eval "our \$main::\x{30cb};";
    like $@, qr!No package name allowed for variable \$main::\x{30cb} in "our"!, "'No such package name allowed for variable' is UTF-8 clean";
}

{
    use feature 'state';
    local $@;
    for ( qw( my state ) ) {
        eval "$_ \$::\x{30cb};";
        like $@, qr!"$_" variable \$::\x{30cb} can't be in a package!, qq!'"$_" variable %s can't be in a package' is UTF-8 clean!;
    }
}

{
    local $@;
    eval qq!print \x{30cb}, "comma""!;
    like $@, qr/No comma allowed after filehandle/, "No comma allowed after filehandle triggers correctly for UTF-8 filehandles.";
}

# tests for "Bad name"
eval q{ Ｆｏｏ::$bar };
like( $@, qr/Bad name after Ｆｏｏ::/, 'Bad name after Ｆｏｏ::' );
eval q{ Ｆｏｏ''bar };
like( $@, qr/Bad name after Ｆｏｏ'/, 'Bad name after Ｆｏｏ\'' );

{
    no warnings 'utf8';
    local $SIG{__WARN__} = sub { }; # The eval will also output a warning,
                                    # which we ignore
    my $malformed_to_be = ($::IS_EBCDIC)   # Overlong sequence
                           ? "\x{74}\x{41}"
                           : "\x{c0}\x{a0}";
    CORE::evalbytes "use charnames ':full'; use utf8; my \$x = \"\\N{abc$malformed_to_be}\"";
    like( $@, qr/Malformed UTF-8 character \(fatal\) at /, 'Malformed UTF-8 input to \N{}');
}

# RT# 124216: Perl_sv_clear: Assertion
# If a parsing error occurred during a forced token within an interpolated
# context, the stack unwinding failed to restore PL_lex_defer and so after
# error recovery the state restored after the forced token was processed
# was the wrong one, resulting in the lexer thinking we're still inside a
# quoted string and things getting freed multiple times.
#
# The \x{3030} char isn't a legal var name, and this triggers the error.
#
# NB: this only failed if the closing quote of the interpolated string is
# the last char of the file (i.e. no trailing \n).

{
    my $bad = "\x{3030}";
    # Write out the individual utf8 bytes making up \x{3030}. This
    # avoids 'Wide char in print' warnings from test.pl. (We may still
    # get that warning when compiling the prog itself, since the
    # error it prints to stderr contains a wide char.)
    utf8::encode($bad);

    fresh_perl_like(qq{use utf8; "\$$bad"},
        qr/
            \A
            ( \QWide character in print at - line 1.\E\n )?
            \Qsyntax error at - line 1, near \E"\$.*"\n
            \QExecution of - aborted due to compilation errors.\E\z
        /xm,

        {stderr => 1}, "RT# 124216");
}

SKIP: {   # [perl #128738]
    use Config;
    if ($Config{uvsize} < 8) {
        skip("test is only valid on 64-bit ints", 2);
    }
    else {
        no warnings 'deprecated';
        my $a;
        eval "\$a = q \x{ffffffff}Hello, \\\\whirled!\x{ffffffff}";
        is $@, "",
               "No errors in eval'ing a string with large code point delimiter";
        is $a, 'Hello, \whirled!',
               "Got expected result in eval'ing a string with a large code point"
            . " delimiter";
    }
}


# New tests go here ^^^^^

# Keep this test last, as it will mess up line number reporting for any
# subsequent tests.

<<END;
${
#line 57
qq ϟϟ }
END
is __LINE__, 59, '#line directive and qq with uni delims inside heredoc';

# Put new tests above the line number tests.