summaryrefslogtreecommitdiff
path: root/t/uni/lex_utf8.t
diff options
context:
space:
mode:
authorkarl williamson <public@khwilliamson.com>2008-12-08 21:59:05 -0700
committerRafael Garcia-Suarez <rgarciasuarez@gmail.com>2009-01-15 16:59:41 +0100
commit77a135fea310715f77af2560dd03830df874b5fa (patch)
tree4a036303d45e2123a8c5b3552a472fb00d3c5f0a /t/uni/lex_utf8.t
parente23621c75d209a8ee0c88579b09beccd0c4a5610 (diff)
downloadperl-77a135fea310715f77af2560dd03830df874b5fa.tar.gz
[perl #59908] \x, \0, and \N{} not ok in double-quotish when followed by > \x100
Attached is a patch for this problem. The root cause was that S_scan_const() was not recoding to utf8 under some circumstances when it should be. I also changed it so that in all places, the flag that indicates the output is in utf8 is changed from false to true if and only if the destination is recoded to utf8. One place was skipping this, and then setting it unconditionally later on. In one place in the routine, the routine had code to do the recoding itself. In the other places, it called sv_utf8_upgrade(). I changed it to call the subroutine in all cases. I fixed a bug that would appear only on EBCDIC machines where constants of the form \N{U+....} would have been interpreted as EBCDIC. And in inspecting the code, I realized there were problems with growing the scalar value to fit the input. I cleaned those up. I also added a number of comments to document things I found out, and changed some existing ones to be more accurate. Since no one responded to my request for where to put the test cases, and I couldn't figure out a good place to put them, I added a new test file, t/uni/lex_utf8.t.
Diffstat (limited to 't/uni/lex_utf8.t')
-rw-r--r--t/uni/lex_utf8.t44
1 files changed, 44 insertions, 0 deletions
diff --git a/t/uni/lex_utf8.t b/t/uni/lex_utf8.t
new file mode 100644
index 0000000000..c7d1778bef
--- /dev/null
+++ b/t/uni/lex_utf8.t
@@ -0,0 +1,44 @@
+#
+# This script is written intentionally in UTF-8
+
+BEGIN {
+ if (ord("A") == 193) {
+ print "1..0 # Skip: EBCDIC\n";
+ exit 0;
+ }
+ $| = 1;
+}
+
+use strict;
+
+use Test::More tests => 10;
+use charnames ':full';
+
+use utf8;
+
+my $A_with_ogonek = "Ą";
+my $micro_sign = "µ";
+my $hex_first = "a\x{A2}Ą";
+my $hex_last = "aĄ\x{A2}";
+my $name_first = "b\N{MICRO SIGN}Ɓ";
+my $name_last = "bƁ\N{MICRO SIGN}";
+my $uname_first = "b\N{U+00B5}Ɓ";
+my $uname_last = "bƁ\N{U+00B5}";
+my $octal_first = "c\377Ć";
+my $octal_last = "cĆ\377";
+
+do {
+ use bytes;
+ is((join "", unpack("C*", $A_with_ogonek)), "196" . "132", 'single char above 0x100');
+ is((join "", unpack("C*", $micro_sign)), "194" . "181", 'single char in 0x80 .. 0xFF');
+ is((join "", unpack("C*", $hex_first)), "97" . "194" . "162" . "196" . "132", 'a . \x{A2} . char above 0x100');
+ is((join "", unpack("C*", $hex_last)), "97" . "196" . "132" . "194" . "162", 'a . char above 0x100 . \x{A2}');
+ is((join "", unpack("C*", $name_first)), "98" . "194" . "181" . "198" . "129", 'b . \N{MICRO SIGN} . char above 0x100');
+ is((join "", unpack("C*", $name_last)), "98" . "198" . "129" . "194" . "181", 'b . char above 0x100 . \N{MICRO SIGN}');
+ is((join "", unpack("C*", $uname_first)), "98" . "194" . "181" . "198" . "129", 'b . \N{U+00B5} . char above 0x100');
+ is((join "", unpack("C*", $uname_last)), "98" . "198" . "129" . "194" . "181", 'b . char above 0x100 . \N{U+00B5}');
+ is((join "", unpack("C*", $octal_first)), "99" . "195" . "191" . "196" . "134", 'c . \377 . char above 0x100');
+ is((join "", unpack("C*", $octal_last)), "99" . "196" . "134" . "195" . "191", 'c . char above 0x100 . \377');
+}
+__END__
+