From 8451602e68fc7c1db0ce6513366724e57dd945df Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Mon, 25 Mar 2013 19:56:50 -0600 Subject: t/op/lc.t: Generalize for non-ASCII platforms This had code that attempted this, but it was wrong. The conversion to EBCDIC must be done before the \U, or similar. --- t/op/lc.t | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 't/op') diff --git a/t/op/lc.t b/t/op/lc.t index 716cb2a35b..ffea0ae784 100644 --- a/t/op/lc.t +++ b/t/op/lc.t @@ -104,17 +104,17 @@ is(uc($b) , "\x{100}\x{100}AA", 'uc'); is(lc($b) , "\x{101}\x{101}aa", 'lc'); is(fc($b) , "\x{101}\x{101}aa", 'fc'); +my $sharp_s = uni_to_native("\x{DF}"); # \x{DF} is LATIN SMALL LETTER SHARP S, its uppercase is SS or \x{53}\x{53}; # \x{149} is LATIN SMALL LETTER N PRECEDED BY APOSTROPHE, its uppercase is # \x{2BC}\x{E4} or MODIFIER LETTER APOSTROPHE and N. -is(uni_to_native("\U\x{DF}aB\x{149}cD"), uni_to_native("SSAB\x{2BC}NCD"), - "multicharacter uppercase"); +is("\U${sharp_s}aB\x{149}cD", "SSAB\x{2BC}NCD", "multicharacter uppercase"); # The \x{DF} is its own lowercase, ditto for \x{149}. # There are no single character -> multiple characters lowercase mappings. -is(uni_to_native("\L\x{DF}aB\x{149}cD"), uni_to_native("\x{DF}ab\x{149}cd"), +is("\L${sharp_s}aB\x{149}cD", "${sharp_s}ab\x{149}cd", "multicharacter lowercase"); # \x{DF} is LATIN SMALL LETTER SHARP S, its foldcase is ss or \x{73}\x{73}; @@ -122,8 +122,7 @@ is(uni_to_native("\L\x{DF}aB\x{149}cD"), uni_to_native("\x{DF}ab\x{149}cd"), # \x{2BC}\x{6E} or MODIFIER LETTER APOSTROPHE and n. # Note that is this further tested in t/uni/fold.t -is(uni_to_native("\F\x{DF}aB\x{149}cD"), uni_to_native("ssab\x{2BC}ncd"), - "multicharacter foldcase"); +is("\F${sharp_s}aB\x{149}cD", "ssab\x{2BC}ncd", "multicharacter foldcase"); # titlecase is used for \u / ucfirst. @@ -283,15 +282,15 @@ for ("$temp") { } # new in Unicode 5.1.0 -is(lc("\x{1E9E}"), "\x{df}", "lc(LATIN CAPITAL LETTER SHARP S)"); +is(lc("\x{1E9E}"), uni_to_native("\x{df}"), "lc(LATIN CAPITAL LETTER SHARP S)"); { use feature 'unicode_strings'; use bytes; - is(lc("\xc0"), "\xc0", "lc of above-ASCII Latin1 is itself under use bytes"); - is(lcfirst("\xc0"), "\xc0", "lcfirst of above-ASCII Latin1 is itself under use bytes"); - is(uc("\xe0"), "\xe0", "uc of above-ASCII Latin1 is itself under use bytes"); - is(ucfirst("\xe0"), "\xe0", "ucfirst of above-ASCII Latin1 is itself under use bytes"); + is(lc(uni_to_native("\xc0")), uni_to_native("\xc0"), "lc of above-ASCII Latin1 is itself under use bytes"); + is(lcfirst(uni_to_native("\xc0")), uni_to_native("\xc0"), "lcfirst of above-ASCII Latin1 is itself under use bytes"); + is(uc(uni_to_native("\xe0")), uni_to_native("\xe0"), "uc of above-ASCII Latin1 is itself under use bytes"); + is(ucfirst(uni_to_native("\xe0")), uni_to_native("\xe0"), "ucfirst of above-ASCII Latin1 is itself under use bytes"); } # Brought up in ticket #117855: Constant folding applied to uc() should use -- cgit v1.2.1