From d2aaa77e10d841339f3651b4c6cfc980b9f58b7a Mon Sep 17 00:00:00 2001 From: Jarkko Hietaniemi Date: Sun, 2 Mar 2003 16:22:35 +0000 Subject: Integrate: [ 18784] An already fixed bug from perl-unicode. [ 18785] Subject: Re: [PATCH] Re: format, PerlIO and utf8 From: Inaba Hiroto Date: Mon, 10 Feb 2003 00:04:51 +0900 Message-ID: <3E466E12.E5349D84@st.rim.or.jp> [ 18794] Further Unicode formats patching from Inaba Hiroto. [ 18797] Patch for [perl #9402], known also as "glibc _moddi3 bug with negative quads", or also as RedHat bug #65612. [ 18798] Go with "right = -right" for greater portability, some platforms might require llabs() to get abs() of long longs. [ 18799] The -CI cannot be easily tested under UTF-8 locale with the t/test.pl (the 'stdin' data gets doubly UTF-8-ied). Also other small tweaks in the -C neighbourhood. [ 18801] Update all copyrights to 2003, from Jarkko [ 18802] Narrow down the -C test skippage. [ 18803] Subject: RE: [perl #20613] Perl_magic_setsig/clearsig problems (patch included) From: "Anders Johnson" Date: Mon, 10 Feb 2003 14:09:46 -0800 Message-ID: <000e01c2d151$2228ca90$9800a8c0@wis.com> p4raw-link: @18803 on //depot/perl: 2d4fcd5e8be8d83efa948a259c49b56fc6c27ee5 p4raw-link: @18802 on //depot/perl: 81ff9b36babbaa3576696f80427c25a4b7bfa9dd p4raw-link: @18801 on //depot/perl: 4c79ee7a1e7564ef83d0ac25d6677fdebb3ec7aa p4raw-link: @18799 on //depot/perl: 06e66572fd5541df0d1349cac2b404c3b9e446ee p4raw-link: @18798 on //depot/perl: 97edce3fcdc8a02187aed95f7b6f4a46eb120a6e p4raw-link: @18797 on //depot/perl: 224ec32361cf57b93c61b661abceec9635d9d527 p4raw-link: @18794 on //depot/perl: 78da4d13e9515c4d820a5423a160a2c81889d633 p4raw-link: @18785 on //depot/perl: 1bd51a4ce2ce8ac9d8d3503b73fce0e0fc8f8426 p4raw-link: @18784 on //depot/perl: 351208f1b69ff774788ddf0fb62571002594640e p4raw-id: //depot/maint-5.8/perl@18804 p4raw-branched: from //depot/perl@18791 'branch in' t/uni/write.t p4raw-integrated: from //depot/perl@18791 'copy in' EXTERN.h INTERN.h form.h run.c x2p/EXTERN.h x2p/INTERN.h x2p/a2p.c x2p/a2p.y x2p/hash.h x2p/proto.h x2p/str.h x2p/util.c x2p/util.h (@14391..) cc_runtime.h fakethr.h unixish.h (@14400..) globals.c (@14802..) README (@14841..) regcomp.h (@15356..) utf8.h (@15762..) x2p/hash.c x2p/walk.c (@16076..) gv.h (@16325..) perlsdio.h (@16368..) hv.h (@16656..) utfebcdic.h (@16888..) x2p/a2p.h (@17033..) perlsfio.h (@17242..) av.h (@17727..) x2p/str.c (@17759..) numeric.c (@17874..) handy.h (@17920..) mg.h (@17947..) perlapi.c (@18113..) taint.c (@18261..) pad.h (@18311..) scope.h (@18312..) util.h (@18321..) t/op/magic.t (@18371..) reentr.c reentr.h reentr.pl (@18498..) fakesdio.h nostdio.h (@18524..) perlio.h (@18556..) XSUB.h (@18636..) locale.c (@18731..) t/op/pat.t (@18782..) t/run/switchC.t (@18799..) 'edit in' pp_ctl.c (@18794..) pp.c (@18798..) perl.c perl.h util.c (@18799..) mg.c (@18801..) 'merge in' keywords.h (@17682..) doop.c (@17984..) deb.c dosish.h miniperlmain.c pp.h (@18030..) keywords.pl (@18160..) thread.h (@18229..) perlvars.h (@18239..) perly.y (@18336..) scope.c (@18410..) opnames.h (@18413..) pp_sort.c (@18456..) hv.c (@18463..) av.c (@18554..) op.h (@18589..) pad.c (@18601..) cop.h (@18643..) doio.c (@18676..) universal.c (@18697..) gv.c (@18715..) regcomp.c regexp.h thrdvar.h (@18726..) cv.h dump.c xsutils.c (@18727..) embed.pl (@18734..) patchlevel.h (@18749..) opcode.h opcode.pl pp_pack.c (@18751..) utf8.c (@18760..) sv.h (@18764..) embedvar.h pp_sys.c (@18766..) pp_hot.c (@18774..) op.c (@18777..) MANIFEST (@18779..) perlapi.h (@18781..) proto.h regexec.c sv.c (@18782..) embed.h (@18795..) global.sym (@18796..) p4raw-integrated: from //depot/perl@18785 'edit in' toke.c (@18753..) --- EXTERN.h | 2 +- INTERN.h | 2 +- MANIFEST | 1 + README | 2 +- XSUB.h | 2 +- av.c | 2 +- av.h | 2 +- cc_runtime.h | 2 +- cop.h | 2 +- cv.h | 2 +- deb.c | 2 +- doio.c | 2 +- doop.c | 2 +- dosish.h | 2 +- dump.c | 2 +- embed.h | 2 +- embed.pl | 2 +- embedvar.h | 2 +- fakesdio.h | 2 +- fakethr.h | 2 +- form.h | 2 +- global.sym | 2 +- globals.c | 2 +- gv.c | 2 +- gv.h | 2 +- handy.h | 2 +- hv.c | 2 +- hv.h | 2 +- keywords.h | 2 +- keywords.pl | 2 +- locale.c | 2 +- mg.c | 127 ++++++++++++++++++++++++++++++++++++++++++++++---------- mg.h | 2 +- miniperlmain.c | 2 +- nostdio.h | 2 +- numeric.c | 2 +- op.c | 2 +- op.h | 2 +- opcode.h | 2 +- opcode.pl | 4 +- opnames.h | 2 +- pad.c | 2 +- pad.h | 2 +- patchlevel.h | 2 +- perl.c | 17 ++++---- perl.h | 17 +++++--- perlapi.c | 2 +- perlapi.h | 2 +- perlio.h | 2 +- perlsdio.h | 2 +- perlsfio.h | 2 +- perlvars.h | 2 +- perly.y | 2 +- pp.c | 81 ++++++++++++++++++++++++++++++++---- pp.h | 2 +- pp_ctl.c | 79 +++++++++++++++++++++++++++++------ pp_hot.c | 2 +- pp_pack.c | 2 +- pp_sort.c | 2 +- pp_sys.c | 2 +- proto.h | 2 +- reentr.c | 2 +- reentr.h | 2 +- reentr.pl | 4 +- regcomp.c | 2 +- regcomp.h | 2 +- regexec.c | 2 +- regexp.h | 2 +- run.c | 2 +- scope.c | 2 +- scope.h | 2 +- sv.c | 2 +- sv.h | 2 +- t/op/magic.t | 34 +++++++++++++-- t/op/pat.t | 20 ++++++++- t/run/switchC.t | 16 ++++--- t/uni/write.t | 96 ++++++++++++++++++++++++++++++++++++++++++ taint.c | 2 +- thrdvar.h | 2 +- thread.h | 2 +- toke.c | 8 +++- universal.c | 2 +- unixish.h | 2 +- utf8.c | 2 +- utf8.h | 2 +- utfebcdic.h | 2 +- util.c | 4 +- util.h | 2 +- x2p/EXTERN.h | 2 +- x2p/INTERN.h | 2 +- x2p/a2p.c | 2 +- x2p/a2p.h | 2 +- x2p/a2p.y | 2 +- x2p/hash.c | 2 +- x2p/hash.h | 2 +- x2p/proto.h | 2 +- x2p/str.c | 2 +- x2p/str.h | 2 +- x2p/util.c | 2 +- x2p/util.h | 2 +- x2p/walk.c | 2 +- xsutils.c | 2 +- 102 files changed, 524 insertions(+), 160 deletions(-) create mode 100644 t/uni/write.t diff --git a/EXTERN.h b/EXTERN.h index ed541bb033..ced967310c 100644 --- a/EXTERN.h +++ b/EXTERN.h @@ -1,6 +1,6 @@ /* EXTERN.h * - * Copyright (c) 1991-2002, Larry Wall + * Copyright (c) 1991-2003, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. diff --git a/INTERN.h b/INTERN.h index c9bab9a384..e62003ab5e 100644 --- a/INTERN.h +++ b/INTERN.h @@ -1,6 +1,6 @@ /* INTERN.h * - * Copyright (c) 1991-2002, Larry Wall + * Copyright (c) 1991-2003, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. diff --git a/MANIFEST b/MANIFEST index 3a3a05bc8b..daffa67d8b 100644 --- a/MANIFEST +++ b/MANIFEST @@ -2716,6 +2716,7 @@ t/uni/tr_eucjp.t See if Unicode tr/// works t/uni/tr_sjis.t See if Unicode tr/// works t/uni/tr_utf8.t See if Unicode tr/// works t/uni/upper.t See if Unicode casing works +t/uni/write.t See if Unicode formats work t/win32/longpath.t Test if Win32::GetLongPathName() works t/win32/system.t See if system works in Win* t/win32/system_tests Test runner for system.t diff --git a/README b/README index 27baec474d..a5dd97463d 100644 --- a/README +++ b/README @@ -1,7 +1,7 @@ Perl Kit, Version 5 - Copyright 1989-2002, Larry Wall + Copyright 1989-2003, Larry Wall All rights reserved. This program is free software; you can redistribute it and/or modify diff --git a/XSUB.h b/XSUB.h index af19c81c76..9117f9e728 100644 --- a/XSUB.h +++ b/XSUB.h @@ -1,6 +1,6 @@ /* XSUB.h * - * Copyright (c) 1997-2002, Larry Wall + * Copyright (c) 1997-2003, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. diff --git a/av.c b/av.c index 647c12351b..78376dc4e8 100644 --- a/av.c +++ b/av.c @@ -1,6 +1,6 @@ /* av.c * - * Copyright (c) 1991-2002, Larry Wall + * Copyright (c) 1991-2003, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. diff --git a/av.h b/av.h index beed09d26d..94871bfbce 100644 --- a/av.h +++ b/av.h @@ -1,6 +1,6 @@ /* av.h * - * Copyright (c) 1991-2002, Larry Wall + * Copyright (c) 1991-2003, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. diff --git a/cc_runtime.h b/cc_runtime.h index b1fc5b52b7..3efed14dde 100644 --- a/cc_runtime.h +++ b/cc_runtime.h @@ -1,6 +1,6 @@ /* cc_runtime.h * - * Copyright (c) 1998-2002, Larry Wall + * Copyright (c) 1998-2003, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. diff --git a/cop.h b/cop.h index d28cd44d89..70a26f1634 100644 --- a/cop.h +++ b/cop.h @@ -1,6 +1,6 @@ /* cop.h * - * Copyright (c) 1991-2002, Larry Wall + * Copyright (c) 1991-2003, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. diff --git a/cv.h b/cv.h index 9877f6d350..1459003a4a 100644 --- a/cv.h +++ b/cv.h @@ -1,6 +1,6 @@ /* cv.h * - * Copyright (c) 1991-2002, Larry Wall + * Copyright (c) 1991-2003, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. diff --git a/deb.c b/deb.c index 800427a5e6..ddb3a37950 100644 --- a/deb.c +++ b/deb.c @@ -1,6 +1,6 @@ /* deb.c * - * Copyright (c) 1991-2002, Larry Wall + * Copyright (c) 1991-2003, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. diff --git a/doio.c b/doio.c index 74b4cf0cbf..f8681e4103 100644 --- a/doio.c +++ b/doio.c @@ -1,6 +1,6 @@ /* doio.c * - * Copyright (c) 1991-2002, Larry Wall + * Copyright (c) 1991-2003, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. diff --git a/doop.c b/doop.c index 81936c9435..5bd3f95279 100644 --- a/doop.c +++ b/doop.c @@ -1,6 +1,6 @@ /* doop.c * - * Copyright (c) 1991-2002, Larry Wall + * Copyright (c) 1991-2003, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. diff --git a/dosish.h b/dosish.h index 045fef5b50..34948630c6 100644 --- a/dosish.h +++ b/dosish.h @@ -1,6 +1,6 @@ /* dosish.h * - * Copyright (c) 1997-2002, Larry Wall + * Copyright (c) 1997-2003, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. diff --git a/dump.c b/dump.c index 3a3c0c6866..c47ad1d202 100644 --- a/dump.c +++ b/dump.c @@ -1,6 +1,6 @@ /* dump.c * - * Copyright (c) 1991-2002, Larry Wall + * Copyright (c) 1991-2003, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. diff --git a/embed.h b/embed.h index 3addcf77d9..07957532ad 100644 --- a/embed.h +++ b/embed.h @@ -1,7 +1,7 @@ /* * embed.h * - * Copyright (c) 1997-2002, Larry Wall + * Copyright (c) 1997-2003, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. diff --git a/embed.pl b/embed.pl index 78b896cda6..762b387b0a 100755 --- a/embed.pl +++ b/embed.pl @@ -22,7 +22,7 @@ sub do_not_edit ($) $file - Copyright (c) 1997-2002, Larry Wall + Copyright (c) 1997-2003, Larry Wall You may distribute under the terms of either the GNU General Public License or the Artistic License, as specified in the README file. diff --git a/embedvar.h b/embedvar.h index a3d5353bc3..02ccff14b2 100644 --- a/embedvar.h +++ b/embedvar.h @@ -1,7 +1,7 @@ /* * embedvar.h * - * Copyright (c) 1997-2002, Larry Wall + * Copyright (c) 1997-2003, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. diff --git a/fakesdio.h b/fakesdio.h index 8be514f180..edec0292fe 100644 --- a/fakesdio.h +++ b/fakesdio.h @@ -1,6 +1,6 @@ /* fakestdio.h * - * Copyright (c) 2000-2002, Larry Wall + * Copyright (c) 2000-2003, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. diff --git a/fakethr.h b/fakethr.h index 5bbe8ac906..02232d3315 100644 --- a/fakethr.h +++ b/fakethr.h @@ -1,6 +1,6 @@ /* fakethr.h * - * Copyright (c) 1997-2002, Larry Wall + * Copyright (c) 1997-2003, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. diff --git a/form.h b/form.h index 2da17bb76a..50b5c8497f 100644 --- a/form.h +++ b/form.h @@ -1,6 +1,6 @@ /* form.h * - * Copyright (c) 1991-2002, Larry Wall + * Copyright (c) 1991-2003, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. diff --git a/global.sym b/global.sym index 37be8965a6..62048d50bd 100644 --- a/global.sym +++ b/global.sym @@ -1,7 +1,7 @@ # # global.sym # -# Copyright (c) 1997-2002, Larry Wall +# Copyright (c) 1997-2003, Larry Wall # # You may distribute under the terms of either the GNU General Public # License or the Artistic License, as specified in the README file. diff --git a/globals.c b/globals.c index b9d70cc004..b0c444d17d 100644 --- a/globals.c +++ b/globals.c @@ -1,6 +1,6 @@ /* globals.c * - * Copyright (c) 1997-2002, Larry Wall + * Copyright (c) 1997-2003, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. diff --git a/gv.c b/gv.c index 12825b43b0..7b31073ceb 100644 --- a/gv.c +++ b/gv.c @@ -1,6 +1,6 @@ /* gv.c * - * Copyright (c) 1991-2002, Larry Wall + * Copyright (c) 1991-2003, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. diff --git a/gv.h b/gv.h index 39b61feb81..cc1738fa9d 100644 --- a/gv.h +++ b/gv.h @@ -1,6 +1,6 @@ /* gv.h * - * Copyright (c) 1991-2002, Larry Wall + * Copyright (c) 1991-2003, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. diff --git a/handy.h b/handy.h index c16ba471ff..416dad8824 100644 --- a/handy.h +++ b/handy.h @@ -1,6 +1,6 @@ /* handy.h * - * Copyright (c) 1991-2002, Larry Wall + * Copyright (c) 1991-2003, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. diff --git a/hv.c b/hv.c index 2726be059d..864f329e93 100644 --- a/hv.c +++ b/hv.c @@ -1,6 +1,6 @@ /* hv.c * - * Copyright (c) 1991-2002, Larry Wall + * Copyright (c) 1991-2003, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. diff --git a/hv.h b/hv.h index 16b14828e2..50faedceac 100644 --- a/hv.h +++ b/hv.h @@ -1,6 +1,6 @@ /* hv.h * - * Copyright (c) 1991-2002, Larry Wall + * Copyright (c) 1991-2003, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. diff --git a/keywords.h b/keywords.h index 767f356da5..08c2ca03fd 100644 --- a/keywords.h +++ b/keywords.h @@ -1,7 +1,7 @@ /* * keywords.h * - * Copyright (c) 1997-2002, Larry Wall + * Copyright (c) 1997-2003, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. diff --git a/keywords.pl b/keywords.pl index 477174db23..5fd954fd8b 100755 --- a/keywords.pl +++ b/keywords.pl @@ -9,7 +9,7 @@ print <op_ppaddr = + PL_ppaddr[OP_I_MODULO] = + &Perl_pp_i_modulo_0; + /* .. but if we have glibc, we might have a buggy _moddi3 + * (at least glicb 2.2.5 is known to have this bug), in other + * words our integer modulus with negative quad as the second + * argument might be broken. Test for this and re-patch the + * opcode dispatch table if that is the case, remembering to + * also apply the workaround so that this first round works + * right, too. See [perl #9402] for more information. */ +#if defined(__GLIBC__) && IVSIZE == 8 + { + IV l = 3; + IV r = -10; + /* Cannot do this check with inlined IV constants since + * that seems to work correctly even with the buggy glibc. */ + if (l % r == -3) { + /* Yikes, we have the bug. + * Patch in the workaround version. */ + PL_op->op_ppaddr = + PL_ppaddr[OP_I_MODULO] = + &Perl_pp_i_modulo_1; + /* Make certain we work right this time, too. */ + if (right < 0) + right = -right; + } + } +#endif + SETi( left % right ); + RETURN; + } } PP(pp_i_add) diff --git a/pp.h b/pp.h index 9a909c8157..1e393c2baf 100644 --- a/pp.h +++ b/pp.h @@ -1,6 +1,6 @@ /* pp.h * - * Copyright (c) 1991-2002, Larry Wall + * Copyright (c) 1991-2003, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. diff --git a/pp_ctl.c b/pp_ctl.c index 8ee14b5da6..714a97c172 100644 --- a/pp_ctl.c +++ b/pp_ctl.c @@ -1,6 +1,6 @@ /* pp_ctl.c * - * Copyright (c) 1991-2002, Larry Wall + * Copyright (c) 1991-2003, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. @@ -328,7 +328,9 @@ PP(pp_formline) bool gotsome = FALSE; STRLEN len; STRLEN fudge = SvCUR(tmpForm) * (IN_BYTES ? 1 : 3) + 1; - bool item_is_utf = FALSE; + bool item_is_utf8 = FALSE; + bool targ_is_utf8 = FALSE; + SV * nsv = Nullsv; if (!SvMAGICAL(tmpForm) || !SvCOMPILED(tmpForm)) { if (SvREADONLY(tmpForm)) { @@ -339,8 +341,9 @@ PP(pp_formline) else doparseform(tmpForm); } - SvPV_force(PL_formtarget, len); + if (DO_UTF8(PL_formtarget)) + targ_is_utf8 = TRUE; t = SvGROW(PL_formtarget, len + fudge + 1); /* XXX SvCUR bad */ t += len; f = SvPV(tmpForm, len); @@ -387,6 +390,21 @@ PP(pp_formline) case FF_LITERAL: arg = *fpc++; + if (targ_is_utf8 && !SvUTF8(tmpForm)) { + SvCUR_set(PL_formtarget, t - SvPVX(PL_formtarget)); + *t = '\0'; + sv_catpvn_utf8_upgrade(PL_formtarget, f, arg, nsv); + t = SvEND(PL_formtarget); + break; + } + if (!targ_is_utf8 && DO_UTF8(tmpForm)) { + SvCUR_set(PL_formtarget, t - SvPVX(PL_formtarget)); + *t = '\0'; + sv_utf8_upgrade(PL_formtarget); + SvGROW(PL_formtarget, SvCUR(PL_formtarget) + fudge + 1); + t = SvEND(PL_formtarget); + targ_is_utf8 = TRUE; + } while (arg--) *t++ = *f++; break; @@ -431,13 +449,13 @@ PP(pp_formline) break; s++; } - item_is_utf = TRUE; + item_is_utf8 = TRUE; itemsize = s - item; sv_pos_b2u(sv, &itemsize); break; } } - item_is_utf = FALSE; + item_is_utf8 = FALSE; if (itemsize > fieldsize) itemsize = fieldsize; send = chophere = s + itemsize; @@ -492,11 +510,11 @@ PP(pp_formline) itemsize = chophere - item; sv_pos_b2u(sv, &itemsize); } - item_is_utf = TRUE; + item_is_utf8 = TRUE; break; } } - item_is_utf = FALSE; + item_is_utf8 = FALSE; if (itemsize <= fieldsize) { send = chophere = s + itemsize; while (s < send) { @@ -552,7 +570,15 @@ PP(pp_formline) case FF_ITEM: arg = itemsize; s = item; - if (item_is_utf) { + if (item_is_utf8) { + if (!targ_is_utf8) { + SvCUR_set(PL_formtarget, t - SvPVX(PL_formtarget)); + *t = '\0'; + sv_utf8_upgrade(PL_formtarget); + SvGROW(PL_formtarget, SvCUR(PL_formtarget) + fudge + 1); + t = SvEND(PL_formtarget); + targ_is_utf8 = TRUE; + } while (arg--) { if (UTF8_IS_CONTINUED(*s)) { STRLEN skip = UTF8SKIP(s); @@ -578,6 +604,21 @@ PP(pp_formline) } break; } + if (targ_is_utf8 && !item_is_utf8) { + SvCUR_set(PL_formtarget, t - SvPVX(PL_formtarget)); + *t = '\0'; + sv_catpvn_utf8_upgrade(PL_formtarget, s, arg, nsv); + for (; t < SvEND(PL_formtarget); t++) { +#ifdef EBCDIC + int ch = *t++ = *s++; + if (iscntrl(ch)) +#else + if (!(*t & ~31)) +#endif + *t = ' '; + } + break; + } while (arg--) { #ifdef EBCDIC int ch = *t++ = *s++; @@ -601,22 +642,32 @@ PP(pp_formline) case FF_LINEGLOB: item = s = SvPV(sv, len); itemsize = len; - item_is_utf = FALSE; /* XXX is this correct? */ + if ((item_is_utf8 = DO_UTF8(sv))) + itemsize = sv_len_utf8(sv); if (itemsize) { + bool chopped = FALSE; gotsome = TRUE; - send = s + itemsize; + send = s + len; while (s < send) { if (*s++ == '\n') { - if (s == send) + if (s == send) { itemsize--; + chopped = TRUE; + } else lines++; } } SvCUR_set(PL_formtarget, t - SvPVX(PL_formtarget)); - sv_catpvn(PL_formtarget, item, itemsize); + if (targ_is_utf8) + SvUTF8_on(PL_formtarget); + sv_catsv(PL_formtarget, sv); + if (chopped) + SvCUR_set(PL_formtarget, SvCUR(PL_formtarget) - 1); SvGROW(PL_formtarget, SvCUR(PL_formtarget) + fudge + 1); t = SvPVX(PL_formtarget) + SvCUR(PL_formtarget); + if (item_is_utf8) + targ_is_utf8 = TRUE; } break; @@ -712,6 +763,8 @@ PP(pp_formline) if (strnEQ(linemark, linemark - arg, arg)) DIE(aTHX_ "Runaway format"); } + if (targ_is_utf8) + SvUTF8_on(PL_formtarget); FmLINES(PL_formtarget) = lines; SP = ORIGMARK; RETURNOP(cLISTOP->op_first); @@ -751,6 +804,8 @@ PP(pp_formline) case FF_END: *t = '\0'; SvCUR_set(PL_formtarget, t - SvPVX(PL_formtarget)); + if (targ_is_utf8) + SvUTF8_on(PL_formtarget); FmLINES(PL_formtarget) += lines; SP = ORIGMARK; RETPUSHYES; diff --git a/pp_hot.c b/pp_hot.c index 63c7d3b658..c9e1228c27 100644 --- a/pp_hot.c +++ b/pp_hot.c @@ -1,6 +1,6 @@ /* pp_hot.c * - * Copyright (c) 1991-2002, Larry Wall + * Copyright (c) 1991-2003, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. diff --git a/pp_pack.c b/pp_pack.c index 44764541df..dc4733dae4 100644 --- a/pp_pack.c +++ b/pp_pack.c @@ -1,6 +1,6 @@ /* pp_pack.c * - * Copyright (c) 1991-2002, Larry Wall + * Copyright (c) 1991-2003, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. diff --git a/pp_sort.c b/pp_sort.c index db886c20f6..3628471405 100644 --- a/pp_sort.c +++ b/pp_sort.c @@ -1,6 +1,6 @@ /* pp_sort.c * - * Copyright (c) 1991-2002, Larry Wall + * Copyright (c) 1991-2003, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. diff --git a/pp_sys.c b/pp_sys.c index 97a1ac28ff..3aa6907676 100644 --- a/pp_sys.c +++ b/pp_sys.c @@ -1,6 +1,6 @@ /* pp_sys.c * - * Copyright (c) 1991-2002, Larry Wall + * Copyright (c) 1991-2003, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. diff --git a/proto.h b/proto.h index b58528723f..b413e99d37 100644 --- a/proto.h +++ b/proto.h @@ -1,7 +1,7 @@ /* * proto.h * - * Copyright (c) 1997-2002, Larry Wall + * Copyright (c) 1997-2003, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. diff --git a/reentr.c b/reentr.c index 052af90336..8b655da86c 100644 --- a/reentr.c +++ b/reentr.c @@ -1,7 +1,7 @@ /* * reentr.c * - * Copyright (c) 1997-2002, Larry Wall + * Copyright (c) 1997-2003, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. diff --git a/reentr.h b/reentr.h index 4c805883da..df5a5d660c 100644 --- a/reentr.h +++ b/reentr.h @@ -1,7 +1,7 @@ /* * reentr.h * - * Copyright (c) 1997-2002, Larry Wall + * Copyright (c) 1997-2003, Larry Wall * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. diff --git a/reentr.pl b/reentr.pl index 6b23aa865d..0e1cefdfb0 100644 --- a/reentr.pl +++ b/reentr.pl @@ -41,7 +41,7 @@ print < + # Date: Wed, 26 Feb 2003 16:53:12 +0000 + # Message-Id: + # To: perl-unicode@perl.org + + $x = "\x{2019}\nk"; $x =~ s/(\S)\n(\S)/$1 $2/sg; + ok($x eq "\x{2019} k", "Markus Kuhn 2003-02-26"); + + $x = "b\nk"; $x =~ s/(\S)\n(\S)/$1 $2/sg; + ok($x eq "b k", "Markus Kuhn 2003-02-26"); + + ok("\x{2019}" =~ /\S/, "Markus Kuhn 2003-02-26"); +} + +# last test 993 diff --git a/t/run/switchC.t b/t/run/switchC.t index 9283fa879b..c3cc4033a7 100644 --- a/t/run/switchC.t +++ b/t/run/switchC.t @@ -25,11 +25,17 @@ $r = runperl( switches => [ '-CO', '-w' ], stderr => 1 ); is( $r, "\xC4\x80", '-CO: no warning on UTF-8 output' ); -$r = runperl( switches => [ '-CI', '-w' ], - prog => 'print ord()', - stderr => 1, - stdin => chr(256) ); -is( $r, 256, '-CI: read in UTF-8 output' ); +SKIP: { + if (exists $ENV{PERL_UNICODE} && + ($ENV{PERL_UNICODE} eq "" || $ENV{PERL_UNICODE} =~ /[SO]/)) { + skip(qq[cannot test with PERL_UNICODE locale "" or /[SO]/], 1); + } + $r = runperl( switches => [ '-CI', '-w' ], + prog => 'print ord()', + stderr => 1, + stdin => "\xC4\x80" ); + is( $r, 256, '-CI: read in UTF-8 input' ); +} $r = runperl( switches => [ '-CE', '-w' ], prog => 'warn chr(256), qq(\n)', diff --git a/t/uni/write.t b/t/uni/write.t new file mode 100644 index 0000000000..95c3bbb36a --- /dev/null +++ b/t/uni/write.t @@ -0,0 +1,96 @@ +#!./perl -w +use strict; + +BEGIN { + chdir 't' if -d 't'; + @INC = qw(../lib .); + require "test.pl"; +} + +plan tests => 6; + +# Some tests for UTF8 and format/write + +our ($bitem1, $uitem1) = ("\x{ff}", "\x{100}"); +our ($bitem2, $uitem2) = ("\x{fe}", "\x{101}"); +our ($blite1, $ulite1) = ("\x{fd}", "\x{102}"); +our ($blite2, $ulite2) = ("\x{fc}", "\x{103}"); +our ($bmulti, $umulti) = ("\x{fb}\n\x{fa}\n\x{f9}\n", + "\x{104}\n\x{105}\n\x{106}\n"); + +sub fmwrtest { + no strict 'refs'; + my ($out, $format, $expect, $name) = @_; + eval "format $out =\n$format.\n"; die $@ if $@; + open $out, '>:utf8', 'Uni_write.tmp' or die "Can't create Uni_write.tmp"; + write $out; + close $out or die "Could not close $out: $!"; + + open UIN, '<:utf8', 'Uni_write.tmp' or die "Can't open Uni_write.tmp";; + my $result = do { local $/; ; }; + close UIN; + + is($result, $expect, $name); +} + +fmwrtest OUT1 => < < < < < <