diff options
-rw-r--r-- | Changes | 186 | ||||
-rw-r--r-- | MANIFEST | 3 | ||||
-rw-r--r-- | doop.c | 14 | ||||
-rw-r--r-- | ext/Encode/Encode/iso8859-16.enc | 20 | ||||
-rw-r--r-- | ext/POSIX/Makefile.PL | 7 | ||||
-rw-r--r-- | ext/POSIX/hints/svr4.pl | 12 | ||||
-rw-r--r-- | hints/svr4.sh | 16 | ||||
-rw-r--r-- | lib/ExtUtils/MM_OS2.pm | 4 | ||||
-rw-r--r-- | lib/ExtUtils/MM_VMS.pm | 21 | ||||
-rw-r--r-- | lib/ExtUtils/Manifest.pm | 2 | ||||
-rw-r--r-- | patchlevel.h | 2 | ||||
-rw-r--r-- | pod/perlapi.pod | 33 | ||||
-rw-r--r-- | pod/perlfunc.pod | 4 | ||||
-rw-r--r-- | pp.h | 10 | ||||
-rw-r--r-- | pp_hot.c | 36 | ||||
-rw-r--r-- | t/README | 7 | ||||
-rwxr-xr-x | t/base/term.t | 7 | ||||
-rwxr-xr-x | t/lib/bigfltpm.t | 8 | ||||
-rw-r--r-- | t/op/reverse.t | 33 | ||||
-rw-r--r-- | t/op/utf8decode.t | 14 | ||||
-rw-r--r-- | t/pragma/warn/utf8 | 4 | ||||
-rw-r--r-- | utf8.c | 75 | ||||
-rw-r--r-- | utf8.h | 22 |
23 files changed, 420 insertions, 120 deletions
@@ -32,6 +32,190 @@ Version v5.7.1 Development release working toward v5.8 -------------- ____________________________________________________________________________ +[ 8042] By: jhi on 2000/12/08 16:33:39 + Log: Do not return the Unicode replacement character if UTF-8 + decoding goes awry, it should be up to the caller to decide. + Branch: perl + ! pod/perlapi.pod utf8.c +____________________________________________________________________________ +[ 8041] By: jhi on 2000/12/08 16:22:28 + Log: metaconfig maintenance. + Branch: metaconfig + ! U/modified/stdchar.U + Branch: metaconfig/U/perl + + testsyml.U +____________________________________________________________________________ +[ 8040] By: jhi on 2000/12/08 16:03:08 + Log: Subject: [ID 20001207.009] Not OK: perl v5.7.0 +DEVEL8030 on os2-64int-ld 2.30 + From: sthoenna@efn.org + Date: Thu, 7 Dec 2000 21:32:43 -0800 (PST) + Message-Id: <200012080532.eB85Wh729109@garcia.efn.org> + Branch: perl + ! Changes lib/ExtUtils/MM_OS2.pm +____________________________________________________________________________ +[ 8039] By: jhi on 2000/12/08 15:57:11 + Log: Subject: [PATCH] Re: ebcdic <-> ascii tables interjected in uv <-> utf8 considered harmful + From: Simon Cozens <simon@cozens.net> + Date: Fri, 8 Dec 2000 13:33:31 +0000 + Message-ID: <20001208133331.A11535@deep-dark-truthful-mirror.perlhacker.org> + + (The pp_hot part needed a rewrite.) + Branch: perl + ! doop.c pp_hot.c utf8.c +____________________________________________________________________________ +[ 8038] By: jhi on 2000/12/08 15:25:08 + Log: Subject: djSP + From: Michael Stevens <michael@etla.org> + Date: Wed, 6 Dec 2000 23:24:01 +0000 + Message-ID: <20001206232400.A21381@firedrake.org> + + Plus a note from Nick Ing-Simmons. + Branch: perl + ! pp.h +____________________________________________________________________________ +[ 8037] By: jhi on 2000/12/08 15:18:35 + Log: Filetype is text. + Branch: perl + ! t/op/utf8decode.t +____________________________________________________________________________ +[ 8036] By: jhi on 2000/12/08 03:31:27 + Log: Subject: [ID 20001207.004] [PATCH 5.6.0 and 5.7.x] add NCR MP-RAS support + From: Andy Dougherty <doughera@lafayette.edu> + Date: Thu, 7 Dec 2000 12:36:45 -0500 (EST) + Message-Id: <Pine.SOL.4.10.10012071235400.13162-100000@maxwell.phys.lafayette.edu> + + Subject: Re: [ID 20001207.004] [PATCH 5.6.0 and 5.7.x] add NCR MP-RAS support + From: Andy Dougherty <doughera@lafayette.edu> + Date: Thu, 7 Dec 2000 13:56:10 -0500 (EST) + Message-ID: <Pine.SOL.4.10.10012071354420.6665-100000@maxwell.phys.lafayette.edu> + Branch: perl + + ext/POSIX/hints/svr4.pl + ! MANIFEST ext/POSIX/Makefile.PL hints/svr4.sh t/lib/bigfltpm.t +____________________________________________________________________________ +[ 8035] By: jhi on 2000/12/08 03:26:01 + Log: Subject: [ID 20001207.003] [PATCH] t/base/term.t causes abort if Config.pm not built + From: Andy Dougherty <doughera@lafayette.edu> + Date: Thu, 7 Dec 2000 10:50:47 -0500 (EST) + Message-Id: <Pine.SOL.4.10.10012071049400.7566-100000@maxwell.phys.lafayette.edu> + Branch: perl + ! t/README t/base/term.t +____________________________________________________________________________ +[ 8034] By: jhi on 2000/12/08 03:21:59 + Log: Subject: DOC PATCH 5.6.0: -s return value incompletely documented + From: mjd@plover.com + Date: 7 Dec 2000 21:04:20 -0000 + Message-ID: <20001207210420.22282.qmail@plover.com> + + Plus -z doc. + Branch: perl + ! pod/perlfunc.pod +____________________________________________________________________________ +[ 8033] By: jhi on 2000/12/08 03:19:03 + Log: Use the UTF8 macros a bit. They can't be used with abandon + everywhere because we do generate illegal UTF-8 in some situations. + This is of course naughty. + Branch: perl + ! pod/perlapi.pod utf8.c utf8.h +____________________________________________________________________________ +[ 8032] By: jhi on 2000/12/08 03:00:09 + Log: Out of sync? + Branch: perl + ! t/pragma/warn/utf8 +____________________________________________________________________________ +[ 8031] By: jhi on 2000/12/08 02:22:39 + Log: Cnt spl. + Branch: perl + ! MANIFEST +____________________________________________________________________________ +[ 8030] By: jhi on 2000/12/08 01:23:54 + Log: Add test for reverse(). + Branch: perl + + t/op/reverse.t + ! MANIFEST +____________________________________________________________________________ +[ 8029] By: jhi on 2000/12/08 01:21:47 + Log: Integrate perlio. + Branch: perl + !> ext/IO/IO.xs fakesdio.h global.sym lib/warnings.pm perlapi.c + !> perlio.c perlio.h perlsdio.h warnings.pl +____________________________________________________________________________ +[ 8028] By: jhi on 2000/12/08 01:19:08 + Log: Introduce macros for UTF8 decoding. + Branch: perl + ! t/op/utf8decode.t t/pragma/warn/utf8 utf8.c utf8.h +____________________________________________________________________________ +[ 8027] By: nick on 2000/12/07 22:18:19 + Log: Integrate mainline + Branch: perlio + !> embed.pl pod/perlapi.pod utf8.c +____________________________________________________________________________ +[ 8026] By: nick on 2000/12/07 21:45:08 + Log: Various oddities p4 diff -se showed up + Remove 'our' from warnings.pl + Branch: perlio + ! global.sym lib/warnings.pm pod/perlapi.pod warnings.pl +____________________________________________________________________________ +[ 8025] By: nick on 2000/12/07 21:43:32 + Log: Change PerlIO_(get|set)pos to take SV * + Should fix, OS/2, VMS, (sfio??) + Branch: perlio + ! ext/IO/IO.xs fakesdio.h perlapi.c perlio.c perlio.h perlsdio.h +____________________________________________________________________________ +[ 8024] By: jhi on 2000/12/07 19:05:32 + Log: Document utf8_to_uv() better. + Branch: perl + ! pod/perlapi.pod utf8.c +____________________________________________________________________________ +[ 8023] By: jhi on 2000/12/07 18:23:47 + Log: Document utf8_length(), utf8_distance(), and utf8_hop(). + Branch: perl + ! embed.pl pod/perlapi.pod utf8.c +____________________________________________________________________________ +[ 8022] By: jhi on 2000/12/07 04:13:51 + Log: Integrate perlio. + Branch: perl + !> perlio.c t/io/dup.t win32/config.vc win32/config_H.bc + !> win32/config_H.gc win32/config_H.vc win32/config_h.PL + !> win32/makefile.mk +____________________________________________________________________________ +[ 8021] By: nick on 2000/12/07 00:28:14 + Log: Various attempts at MSVC debug - not sure what has + changed but works now. + Seems atexit() _may_ work for DLLs built with MSVC so don't + call cleanup that way. + Branch: perlio + ! perlio.c win32/makefile.mk +____________________________________________________________________________ +[ 8020] By: nick on 2000/12/06 19:57:20 + Log: Integrate mainline + Branch: perlio + +> lib/ExtUtils/MANIFEST.SKIP t/op/concat.t + !> (integrate 75 files) +____________________________________________________________________________ +[ 8019] By: nick on 2000/12/06 19:28:21 + Log: Add useperlio to config.vc + Turn off binmode in config_H.PL + Regen all the config_H.xx + Attempt to get debugging build with MSVC. + Branch: perlio + ! win32/config.vc win32/config_H.bc win32/config_H.gc + ! win32/config_H.vc win32/config_h.PL win32/makefile.mk +____________________________________________________________________________ +[ 8018] By: nick on 2000/12/06 19:21:57 + Log: Test various dup/external program options on all platforms. + Branch: perlio + ! t/io/dup.t +____________________________________________________________________________ +[ 8017] By: nick on 2000/12/06 19:20:47 + Log: Fix harness to be less picky + Branch: perlio + ! lib/Test/Harness.pm +____________________________________________________________________________ +[ 8016] By: jhi on 2000/12/06 16:45:12 + Log: Update Changes. + Branch: perl + ! Changes patchlevel.h +____________________________________________________________________________ [ 8015] By: jhi on 2000/12/06 16:41:03 Log: Test \x{...} with ord(). Branch: perl @@ -269,7 +453,7 @@ ____________________________________________________________________________ Date: Mon, 04 Dec 2000 22:21:59 -0800 Message-ID: <HmIL6gzkgepS092yn@efn.org> - Harness shouldn't ignore responses like "ok 3\r\n\n". + Harness shouldn't ignore responses like "ok 3\r\r\n". Branch: perl ! lib/Test/Harness.pm ____________________________________________________________________________ @@ -247,6 +247,7 @@ ext/Encode/Encode/iso8859-10.enc Encoding tables ext/Encode/Encode/iso8859-13.enc Encoding tables ext/Encode/Encode/iso8859-14.enc Encoding tables ext/Encode/Encode/iso8859-15.enc Encoding tables +ext/Encode/Encode/iso8859-16.enc Encoding tables ext/Encode/Encode/iso8859-2.enc Encoding tables ext/Encode/Encode/iso8859-3.enc Encoding tables ext/Encode/Encode/iso8859-4.enc Encoding tables @@ -364,6 +365,7 @@ ext/POSIX/hints/netbsd.pl Hint for POSIX for named architecture ext/POSIX/hints/next_3.pl Hint for POSIX for named architecture ext/POSIX/hints/openbsd.pl Hint for POSIX for named architecture ext/POSIX/hints/sunos_4.pl Hint for POSIX for named architecture +ext/POSIX/hints/svr4.pl Hint for POSIX for named architecture ext/POSIX/typemap POSIX extension interface types ext/SDBM_File/Makefile.PL SDBM extension makefile writer ext/SDBM_File/SDBM_File.pm SDBM extension Perl module @@ -1557,6 +1559,7 @@ t/op/regexp.t See if regular expressions work t/op/regexp_noamp.t See if regular expressions work with optimizations t/op/regmesg.t See if one can get regular expression errors t/op/repeat.t See if x operator works +t/op/reverse.t See if reverse operator works t/op/runlevel.t See if die() works from perl_call_*() t/op/sleep.t See if sleep works t/op/sort.t See if sort works @@ -79,10 +79,7 @@ S_do_trans_simple(pTHX_ SV *sv) c = utf8_to_uv(s, send - s, &ulen, 0); if (c < 0x100 && (ch = tbl[(short)c]) >= 0) { matches++; - if (ch < 0x80) - *d++ = ch; - else - d = uv_to_utf8(d,ch); + d = uv_to_utf8(d,ch); s += ulen; } else { /* No match -> copy */ @@ -192,12 +189,9 @@ S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */ matches--; } - if (ch >= 0) { - if (hasutf) - d = uv_to_utf8(d, ch); - else - *d++ = ch; - } + if (ch >= 0) + d = uv_to_utf8(d, ch); + matches++; s += hasutf && *s & 0x80 ? UNISKIP(*s) : 1; diff --git a/ext/Encode/Encode/iso8859-16.enc b/ext/Encode/Encode/iso8859-16.enc new file mode 100644 index 0000000000..1936b97b6f --- /dev/null +++ b/ext/Encode/Encode/iso8859-16.enc @@ -0,0 +1,20 @@ +# Encoding file: iso8859-16, single-byte +S +003F 0 1 +00 +0000000100020003000400050006000700080009000A000B000C000D000E000F +0010001100120013001400150016001700180019001A001B001C001D001E001F +0020002100220023002400250026002700280029002A002B002C002D002E002F +0030003100320033003400350036003700380039003A003B003C003D003E003F +0040004100420043004400450046004700480049004A004B004C004D004E004F +0050005100520053005400550056005700580059005A005B005C005D005E005F +0060006100620063006400650066006700680069006A006B006C006D006E006F +0070007100720073007400750076007700780079007A007B007C007D007E007F +0080008100820083008400850086008700880089008A008B008C008D008E008F +0090009100920093009400950096009700980099009A009B009C009D009E009F +00A001040105014120AC00AB016000A7016100A90218201E017900AD017A017B +00B000B1010C0142017D201D00B600B7017E010D021900BB015201530178017C +00C000C100C2010200C4010600C600C700C800C900CA00CB00CC00CD00CE00CF +0110014300D200D300D4015000D6015A017000D900DA00DB00DC0118021A00DF +00E000E100E2010300E4010700E600E700E800E900EA00EB00EC00ED00EE00EF +0111014400F200F300F4015100F6015B017100F900FA00FB00FC0119021B00FF diff --git a/ext/POSIX/Makefile.PL b/ext/POSIX/Makefile.PL index 55c5c1fbf3..73bb02dddb 100644 --- a/ext/POSIX/Makefile.PL +++ b/ext/POSIX/Makefile.PL @@ -2,12 +2,7 @@ use ExtUtils::MakeMaker; use Config; my @libs; if ($^O ne 'MSWin32') { - if ($Config{archname} =~ /RM\d\d\d-svr4/) { - @libs = ('LIBS' => ["-lm -lc -lposix -lcposix"]); - } - else { - @libs = ('LIBS' => ["-lm -lposix -lcposix"]); - } + @libs = ('LIBS' => ["-lm -lposix -lcposix"]); } WriteMakefile( NAME => 'POSIX', diff --git a/ext/POSIX/hints/svr4.pl b/ext/POSIX/hints/svr4.pl new file mode 100644 index 0000000000..07f2cb0412 --- /dev/null +++ b/ext/POSIX/hints/svr4.pl @@ -0,0 +1,12 @@ +# NCR MP-RAS. Thanks to Doug Hendricks for this info. +# Configure sets osname=svr4.0, osvers=3.0, archname='3441-svr4.0' +# This system needs to explicitly link against -lmw to pull in some +# symbols such as _mwoflocheckl and possibly others. +# A. Dougherty Thu Dec 7 11:55:28 EST 2000 +if ($Config{'archname'} =~ /3441-svr4/) { + $self->{LIBS} = ['-lm -posix -lcposix -lmw']; +} +# Not sure what OS this one is. +elsif ($Config{archname} =~ /RM\d\d\d-svr4/) { + $self->{LIBS} = ['-lm -lc -lposix -lcposix']; +} diff --git a/hints/svr4.sh b/hints/svr4.sh index 8109b39752..69af6fda2f 100644 --- a/hints/svr4.sh +++ b/hints/svr4.sh @@ -135,6 +135,22 @@ case "`uname -sm`" in ;; esac +# NCR MP-RAS. Thanks to Doug Hendricks for this info. +# The output of uname -a looks like this +# foo foo 4.0 3.0 3441 Pentium III(TM)-ISA/PCI +# Configure sets osname=svr4.0, osvers=3.0, archname='3441-svr4.0' +case "$myuname" in +*3441*) + # With the NCR High Performance C Compiler R3.0c, miniperl fails + # t/op/regexp.t test 461 unless we compile with optimizie=-g. + # The whole O/S is being phased out, so more detailed probing + # is probably not warranted. + case "$optimize" in + '') optimize='-g' ;; + esac + ;; +esac + # Configure may fail to find lstat() since it's a static/inline function # in <sys/stat.h> on Unisys U6000 SVR4, UnixWare 2.x, and possibly other # SVR4 derivatives. (Though UnixWare has it in /usr/ccs/lib/libc.so.) diff --git a/lib/ExtUtils/MM_OS2.pm b/lib/ExtUtils/MM_OS2.pm index 501832bfec..c0c52402f6 100644 --- a/lib/ExtUtils/MM_OS2.pm +++ b/lib/ExtUtils/MM_OS2.pm @@ -38,7 +38,7 @@ $self->{BASEEXT}.def: Makefile.PL ', "DL_VARS" => ', neatvalue($vars), ');\' '); } - if (%{$self->{IMPORTS}}) { + if ($self->{IMPORTS} && %{$self->{IMPORTS}}) { # Make import files (needed for static build) -d 'tmp_imp' or mkdir 'tmp_imp', 0777 or die "Can't mkdir tmp_imp"; open IMP, '>tmpimp.imp' or die "Can't open tmpimp.imp"; @@ -61,7 +61,7 @@ $self->{BASEEXT}.def: Makefile.PL sub static_lib { my($self) = @_; my $old = $self->ExtUtils::MM_Unix::static_lib(); - return $old unless %{$self->{IMPORTS}}; + return $old unless $self->{IMPORTS} && %{$self->{IMPORTS}}; my @chunks = split /\n{2,}/, $old; shift @chunks unless length $chunks[0]; # Empty lines at the start diff --git a/lib/ExtUtils/MM_VMS.pm b/lib/ExtUtils/MM_VMS.pm index 3485786bf8..cc63d2f67a 100644 --- a/lib/ExtUtils/MM_VMS.pm +++ b/lib/ExtUtils/MM_VMS.pm @@ -126,7 +126,7 @@ sub ExtUtils::MM_VMS::makeaperl; sub ExtUtils::MM_VMS::ext; sub ExtUtils::MM_VMS::nicetext; -#use SelfLoader; +our $AUTOLOAD; sub AUTOLOAD { my $code; if (defined fileno(DATA)) { @@ -562,21 +562,21 @@ MM_VMS_REVISION = $ExtUtils::MM_VMS::Revision # DLBASE = Basename part of dynamic library. May be just equal BASEEXT. ]; - for $tmp (qw/ + for my $tmp (qw/ FULLEXT VERSION_FROM OBJECT LDFROM / ) { next unless defined $self->{$tmp}; push @m, "$tmp = ",$self->fixpath($self->{$tmp},0),"\n"; } - for $tmp (qw/ + for my $tmp (qw/ BASEEXT PARENT_NAME DLBASE INC DEFINE LINKTYPE / ) { next unless defined $self->{$tmp}; push @m, "$tmp = $self->{$tmp}\n"; } - for $tmp (qw/ XS MAN1PODS MAN3PODS PM /) { + for my $tmp (qw/ XS MAN1PODS MAN3PODS PM /) { next unless defined $self->{$tmp}; my(%tmp,$key); for $key (keys %{$self->{$tmp}}) { @@ -585,7 +585,7 @@ MM_VMS_REVISION = $ExtUtils::MM_VMS::Revision $self->{$tmp} = \%tmp; } - for $tmp (qw/ C O_FILES H /) { + for my $tmp (qw/ C O_FILES H /) { next unless defined $self->{$tmp}; my(@tmp,$val); for $val (@{$self->{$tmp}}) { @@ -606,7 +606,7 @@ MAN3PODS = ',$self->wraplist(sort keys %{$self->{MAN3PODS}}),' '; - for $tmp (qw/ + for my $tmp (qw/ INST_MAN1DIR INSTALLMAN1DIR MAN1EXT INST_MAN3DIR INSTALLMAN3DIR MAN3EXT /) { next unless defined $self->{$tmp}; @@ -705,7 +705,7 @@ sub cflags { # conflate the ones from $Config{'ccflags'} and $self->{DEFINE} # ($self->{DEFINE} has already been VMSified in constants() above) if ($self->{DEFINE}) { $quals .= $self->{DEFINE}; } - for $type (qw(Def Undef)) { + for my $type (qw(Def Undef)) { my(@terms); while ($quals =~ m:/${type}i?n?e?=([^/]+):ig) { my $term = $1; @@ -1419,7 +1419,7 @@ sub processPL { my $list = ref($self->{PL_FILES}->{$plfile}) ? $self->{PL_FILES}->{$plfile} : [$self->{PL_FILES}->{$plfile}]; - foreach $target (@$list) { + foreach my $target (@$list) { my $vmsplfile = vmsify($plfile); my $vmsfile = vmsify($target); push @m, " @@ -2051,6 +2051,8 @@ Consequently, it hasn't really been tested, and may well be incomplete. =cut +our %olbs; + sub makeaperl { my($self, %attribs) = @_; my($makefilename, $searchdirs, $static, $extra, $perlinc, $target, $tmp, $libperl) = @@ -2093,7 +2095,7 @@ $(MAP_TARGET) :: $(MAKE_APERL_FILE) $linkcmd =~ s/\s+/ /g; # Which *.olb files could we make use of... - local(%olbs); + local(%olbs); # XXX can this be lexical? $olbs{$self->{INST_ARCHAUTODIR}} = "$self->{BASEEXT}\$(LIB_EXT)"; require File::Find; File::Find::find(sub { @@ -2190,6 +2192,7 @@ $(MAP_TARGET) :: $(MAKE_APERL_FILE) push @optlibs, @$extra; $target = "Perl$Config{'exe_ext'}" unless $target; + my $shrtarget; ($shrtarget,$targdir) = fileparse($target); $shrtarget =~ s/^([^.]*)/$1Shr/; $shrtarget = $targdir . $shrtarget; diff --git a/lib/ExtUtils/Manifest.pm b/lib/ExtUtils/Manifest.pm index 4b98185e93..46ace64e61 100644 --- a/lib/ExtUtils/Manifest.pm +++ b/lib/ExtUtils/Manifest.pm @@ -430,6 +430,6 @@ L<ExtUtils::MakeMaker> which has handy targets for most of the functionality. =head1 AUTHOR -Andreas Koenig <F<koenig@franz.ww.TU-Berlin.DE>> +Andreas Koenig <F<andreas.koenig@anima.de>> =cut diff --git a/patchlevel.h b/patchlevel.h index acee45547b..ca39c4e892 100644 --- a/patchlevel.h +++ b/patchlevel.h @@ -70,7 +70,7 @@ #if !defined(PERL_PATCHLEVEL_H_IMPLICIT) && !defined(LOCAL_PATCH_COUNT) static char *local_patches[] = { NULL - ,"DEVEL8015" + ,"DEVEL8042" ,NULL }; diff --git a/pod/perlapi.pod b/pod/perlapi.pod index 50ac40c650..fa18e661b6 100644 --- a/pod/perlapi.pod +++ b/pod/perlapi.pod @@ -2368,19 +2368,19 @@ false, defined or undefined. Does not handle 'get' magic. =for hackers Found in file sv.h -=item svtype +=item SvTYPE -An enum of flags for Perl types. These are found in the file B<sv.h> -in the C<svtype> enum. Test these flags with the C<SvTYPE> macro. +Returns the type of the SV. See C<svtype>. + + svtype SvTYPE(SV* sv) =for hackers Found in file sv.h -=item SvTYPE - -Returns the type of the SV. See C<svtype>. +=item svtype - svtype SvTYPE(SV* sv) +An enum of flags for Perl types. These are found in the file B<sv.h> +in the C<svtype> enum. Test these flags with the C<SvTYPE> macro. =for hackers Found in file sv.h @@ -3234,11 +3234,12 @@ Found in file utf8.c =item utf8_hop -Move the C<s> pointing to UTF-8 data by C<off> characters, either forward -or backward. +Return the UTF-8 pointer C<s> displaced by C<off> characters, either +forward or backward. WARNING: do not use the following unless you *know* C<off> is within -the UTF-8 buffer pointed to by C<s>. +the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned +on the first byte of character or just after the last byte of a character. U8* utf8_hop(U8 *s, I32 off) @@ -3278,11 +3279,13 @@ and the pointer C<s> will be advanced to the end of the character. If C<s> does not point to a well-formed UTF8 character, the behaviour is dependent on the value of C<flags>: if it contains UTF8_CHECK_ONLY, it is assumed that the caller will raise a warning, and this function -will set C<retlen> to C<-1> and return zero. If the C<flags> does not -contain UTF8_CHECK_ONLY, the UNICODE_REPLACEMENT_CHARACTER (0xFFFD) -will be returned, and C<retlen> will be set to the expected length of -the UTF-8 character in bytes. The C<flags> can also contain various -flags to allow deviations from the strict UTF-8 encoding (see F<utf8.h>). +will silently just set C<retlen> to C<-1> and return zero. If the +C<flags> does not contain UTF8_CHECK_ONLY, warnings about +malformations will be given, C<retlen> will be set to the expected +length of the UTF-8 character in bytes, and zero will be returned. + +The C<flags> can also contain various flags to allow deviations from +the strict UTF-8 encoding (see F<utf8.h>). U8* s utf8_to_uv(STRLEN curlen, STRLEN *retlen, U32 flags) diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod index f2bf51e18a..06d3b1da2d 100644 --- a/pod/perlfunc.pod +++ b/pod/perlfunc.pod @@ -274,8 +274,8 @@ X<-S>X<-b>X<-c>X<-t>X<-u>X<-g>X<-k>X<-T>X<-B>X<-M>X<-A>X<-C> -O File is owned by real uid. -e File exists. - -z File has zero size. - -s File has nonzero size (returns size). + -z File has zero size (is empty). + -s File has nonzero size (returns size in bytes). -f File is a plain file. -d File is a directory. @@ -33,6 +33,13 @@ L<perlcall>. Declares a local copy of perl's stack pointer for the XSUB, available via the C<SP> macro. See C<SP>. +=for apidoc ms||djSP + +Declare Just C<SP>. This is actually identical to C<dSP>, and declares +a local copy of perl's stack pointer, available via the C<SP> macro. +See C<SP>. (Available for backward source code compatibility with the +old (Perl 5.005) thread model.) + =for apidoc Ams||dMARK Declare a stack marker variable, C<mark>, for the XSUB. See C<MARK> and C<dORIGMARK>. @@ -46,8 +53,7 @@ The original stack mark for the XSUB. See C<dORIGMARK>. =for apidoc Ams||SPAGAIN Refetch the stack pointer. Used after a callback. See L<perlcall>. -=cut -*/ +=cut */ #define SP sp #define MARK mark @@ -142,19 +142,19 @@ PP(pp_concat) dPOPTOPssrl; STRLEN len; U8 *s; - bool left_utf; - bool right_utf; + bool left_utf8; + bool right_utf8; if (TARG == right && SvGMAGICAL(right)) mg_get(right); if (SvGMAGICAL(left)) mg_get(left); - left_utf = DO_UTF8(left); - right_utf = DO_UTF8(right); + left_utf8 = DO_UTF8(left); + right_utf8 = DO_UTF8(right); - if (left_utf != right_utf) { - if (TARG == right && !right_utf) { + if (left_utf8 != right_utf8) { + if (TARG == right && !right_utf8) { sv_utf8_upgrade(TARG); /* Now straight binary copy */ SvUTF8_on(TARG); } @@ -163,7 +163,7 @@ PP(pp_concat) U8 *l, *c, *olds = NULL; STRLEN targlen; s = (U8*)SvPV(right,len); - right_utf |= DO_UTF8(right); + right_utf8 |= DO_UTF8(right); if (TARG == right) { /* Take a copy since we're about to overwrite TARG */ olds = s = (U8*)savepvn((char*)s, len); @@ -175,28 +175,28 @@ PP(pp_concat) sv_setpv(left, ""); /* Suppress warning. */ } l = (U8*)SvPV(left, targlen); - left_utf |= DO_UTF8(left); + left_utf8 |= DO_UTF8(left); if (TARG != left) sv_setpvn(TARG, (char*)l, targlen); - if (!left_utf) + if (!left_utf8) sv_utf8_upgrade(TARG); /* Extend TARG to length of right (s) */ targlen = SvCUR(TARG) + len; - if (!right_utf) { + if (!right_utf8) { /* plus one for each hi-byte char if we have to upgrade */ for (c = s; c < s + len; c++) { - if (*c & 0x80) + if (UTF8_IS_CONTINUED(*c)) targlen++; } } SvGROW(TARG, targlen+1); /* And now copy, maybe upgrading right to UTF8 on the fly */ - for (c = (U8*)SvEND(TARG); len--; s++) { - if (*s & 0x80 && !right_utf) - c = uv_to_utf8(c, *s); - else - *c++ = *s; - } + if (right_utf8) + Copy(s, SvEND(TARG), len, U8); + else { + for (c = (U8*)SvEND(TARG); len--; s++) + c = uv_to_utf8(c, *s); + } SvCUR_set(TARG, targlen); *SvEND(TARG) = '\0'; SvUTF8_on(TARG); @@ -235,7 +235,7 @@ PP(pp_concat) } else sv_setpvn(TARG, (char *)s, len); /* suppress warning */ - if (left_utf) + if (left_utf8) SvUTF8_on(TARG); SETTARG; RETURN; @@ -1,4 +1,4 @@ -This is the perl test library. To run all the tests, just type 'TEST'. +This is the perl test library. To run all the tests, just type './TEST'. To add new tests, just look at the current tests and do likewise. @@ -14,3 +14,8 @@ will fail, you may want to use Test::Harness thusly: This method pinpoints failed tests automatically. If you come up with new tests, please send them to perlbug@perl.org. + +Tests in the base/ directory ought to be runnable with plain miniperl. +That is, they should not require Config.pm nor should they require any +extensions to have been built. TEST will abort if any tests in the +base/ directory fail. diff --git a/t/base/term.t b/t/base/term.t index e96313dec5..49df11fa31 100755 --- a/t/base/term.t +++ b/t/base/term.t @@ -4,19 +4,16 @@ BEGIN { chdir 't' if -d 't'; - @INC = '../lib'; } -use Config; - print "1..7\n"; # check "" interpretation $x = "\n"; # 10 is ASCII/Iso Latin, 21 is EBCDIC. -if ($x eq chr(10) || - ($Config{ebcdic} eq 'define' && $x eq chr(21))) {print "ok 1\n";} +if ($x eq chr(10)) { print "ok 1\n";} +elsif ($x eq chr(21)) { print "ok 1 # EBCDIC\n"; } else {print "not ok 1\n";} # check `` processing diff --git a/t/lib/bigfltpm.t b/t/lib/bigfltpm.t index a8fb19209a..b335d13016 100755 --- a/t/lib/bigfltpm.t +++ b/t/lib/bigfltpm.t @@ -449,10 +449,10 @@ $Math::BigFloat::div_scale = 20 $Math::BigFloat::div_scale = 40 &fsqrt +0:0 --1:/^(?i:0|\?|NaNQ?|-n\.an)$ --2:/^(?i:0|\?|NaNQ?|-n\.an)$ --16:/^(?i:0|\?|NaNQ?|-n\.an)$ --123.456:/^(?i:0|\?|NaNQ?|-n\.an)$ +-1:/^(?i:0|\?|-?N\.?aNQ?)$ +-2:/^(?i:0|\?|-?N\.?aNQ?)$ +-16:/^(?i:0|\?|-?N\.?aNQ?)$ +-123.456:/^(?i:0|\?|-?N\.?aNQ?)$ +1:1. +1.44:1.2 +2:1.41421356237309504880168872420969807857 diff --git a/t/op/reverse.t b/t/op/reverse.t new file mode 100644 index 0000000000..bb7b9b77fe --- /dev/null +++ b/t/op/reverse.t @@ -0,0 +1,33 @@ +#!./perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} + +print "1..4\n"; + +print "not " unless reverse("abc") eq "cba"; +print "ok 1\n"; + +$_ = "foobar"; +print "not " unless reverse() eq "raboof"; +print "ok 2\n"; + +{ + my @a = ("foo", "bar"); + my @b = reverse @a; + + print "not " unless $b[0] eq $a[1] && $b[1] eq $a[0]; + print "ok 3\n"; +} + +{ + # Unicode. + + my $a = "\x{263A}\x{263A}x\x{263A}y\x{263A}"; + my $b = scalar reverse($a); + my $c = scalar reverse($b); + print "not " unless $a eq $c; + print "ok 4\n"; +} diff --git a/t/op/utf8decode.t b/t/op/utf8decode.t index c631c0a7a9..ac42b85577 100644 --- a/t/op/utf8decode.t +++ b/t/op/utf8decode.t @@ -53,11 +53,11 @@ my @MK = split(/\n/, <<__EOMK__); 3.1.8 n "" - 7 80:bf:80:bf:80:bf:80 - unexpected continuation byte 0x80 3.1.9 n "" - 64 80:81:82:83:84:85:86:87:88:89:8a:8b:8c:8d:8e:8f:90:91:92:93:94:95:96:97:98:99:9a:9b:9c:9d:9e:9f:a0:a1:a2:a3:a4:a5:a6:a7:a8:a9:aa:ab:ac:ad:ae:af:b0:b1:b2:b3:b4:b5:b6:b7:b8:b9:ba:bb:bc:bd:be:bf - unexpected continuation byte 0x80 3.2 Lonely start characters -3.2.1 n " " - 64 c0:20:c1:20:c2:20:c3:20:c4:20:c5:20:c6:20:c7:20:c8:20:c9:20:ca:20:cb:20:cc:20:cd:20:ce:20:cf:20:d0:20:d1:20:d2:20:d3:20:d4:20:d5:20:d6:20:d7:20:d8:20:d9:20:da:20:db:20:dc:20:dd:20:de:20:df:20 - unexpected non-continuation byte 0x20 after byte 0xc0 -3.2.2 n " " - 32 e0:20:e1:20:e2:20:e3:20:e4:20:e5:20:e6:20:e7:20:e8:20:e9:20:ea:20:eb:20:ec:20:ed:20:ee:20:ef:20 - unexpected non-continuation byte 0x20 after byte 0xe0 -3.2.3 n " " - 16 f0:20:f1:20:f2:20:f3:20:f4:20:f5:20:f6:20:f7:20 - unexpected non-continuation byte 0x20 after byte 0xf0 -3.2.4 n " " - 8 f8:20:f9:20:fa:20:fb:20 - unexpected non-continuation byte 0x20 after byte 0xf8 -3.2.5 n " " - 4 fc:20:fd:20 - unexpected non-continuation byte 0x20 after byte 0xfc +3.2.1 n " " - 64 c0:20:c1:20:c2:20:c3:20:c4:20:c5:20:c6:20:c7:20:c8:20:c9:20:ca:20:cb:20:cc:20:cd:20:ce:20:cf:20:d0:20:d1:20:d2:20:d3:20:d4:20:d5:20:d6:20:d7:20:d8:20:d9:20:da:20:db:20:dc:20:dd:20:de:20:df:20 - unexpected non-continuation byte 0x20 after start byte 0xc0 +3.2.2 n " " - 32 e0:20:e1:20:e2:20:e3:20:e4:20:e5:20:e6:20:e7:20:e8:20:e9:20:ea:20:eb:20:ec:20:ed:20:ee:20:ef:20 - unexpected non-continuation byte 0x20 after start byte 0xe0 +3.2.3 n " " - 16 f0:20:f1:20:f2:20:f3:20:f4:20:f5:20:f6:20:f7:20 - unexpected non-continuation byte 0x20 after start byte 0xf0 +3.2.4 n " " - 8 f8:20:f9:20:fa:20:fb:20 - unexpected non-continuation byte 0x20 after start byte 0xf8 +3.2.5 n " " - 4 fc:20:fd:20 - unexpected non-continuation byte 0x20 after start byte 0xfc 3.3 Sequences with last continuation byte missing 3.3.1 n "" - 1 c0 - 1 byte, need 2 3.3.2 n "" - 2 e0:80 - 2 bytes, need 3 @@ -70,7 +70,7 @@ my @MK = split(/\n/, <<__EOMK__); 3.3.9 n "" - 4 fb:bf:bf:bf - 4 bytes, need 5 3.3.10 n "" - 5 fd:bf:bf:bf:bf - 5 bytes, need 6 3.4 Concatenation of incomplete sequences -3.4.1 n "" - 30 c0:e0:80:f0:80:80:f8:80:80:80:fc:80:80:80:80:df:ef:bf:f7:bf:bf:fb:bf:bf:bf:fd:bf:bf:bf:bf - unexpected continuation byte 0xe0 +3.4.1 n "" - 30 c0:e0:80:f0:80:80:f8:80:80:80:fc:80:80:80:80:df:ef:bf:f7:bf:bf:fb:bf:bf:bf:fd:bf:bf:bf:bf - unexpected non-continuation byte 0xe0 after start byte 0xc0 3.5 Impossible bytes 3.5.1 n "" - 1 fe - byte 0xfe 3.5.2 n "" - 1 ff - byte 0xff @@ -125,7 +125,7 @@ __EOMK__ local $SIG{__WARN__} = sub { - # print "# $id: @_"; + print "# $id: @_"; $WARNCNT++; $WARNMSG = "@_"; }; diff --git a/t/pragma/warn/utf8 b/t/pragma/warn/utf8 index adc10c645e..9a7dbafdee 100644 --- a/t/pragma/warn/utf8 +++ b/t/pragma/warn/utf8 @@ -30,6 +30,6 @@ my $a = "snstorm" ; my $a = "snstorm"; } EXPECT -Malformed UTF-8 character (unexpected non-continuation byte 0x73 after byte 0xf8) at - line 9. -Malformed UTF-8 character (unexpected non-continuation byte 0x73 after byte 0xf8) at - line 14. +Malformed UTF-8 character (unexpected non-continuation byte 0x73 after start byte 0xf8) at - line 9. +Malformed UTF-8 character (unexpected non-continuation byte 0x73 after start byte 0xf8) at - line 14. ######## @@ -137,7 +137,7 @@ Perl_is_utf8_char(pTHX_ U8 *s) while (slen--) { if ((*s & 0xc0) != 0x80) return 0; - uv = (uv << 6) | (*s & 0x3f); + uv = UTF8_ACCUMULATE(uv, *s); if (uv < ouv) return 0; ouv = uv; @@ -189,11 +189,13 @@ and the pointer C<s> will be advanced to the end of the character. If C<s> does not point to a well-formed UTF8 character, the behaviour is dependent on the value of C<flags>: if it contains UTF8_CHECK_ONLY, it is assumed that the caller will raise a warning, and this function -will set C<retlen> to C<-1> and return zero. If the C<flags> does not -contain UTF8_CHECK_ONLY, the UNICODE_REPLACEMENT_CHARACTER (0xFFFD) -will be returned, and C<retlen> will be set to the expected length of -the UTF-8 character in bytes. The C<flags> can also contain various -flags to allow deviations from the strict UTF-8 encoding (see F<utf8.h>). +will silently just set C<retlen> to C<-1> and return zero. If the +C<flags> does not contain UTF8_CHECK_ONLY, warnings about +malformations will be given, C<retlen> will be set to the expected +length of the UTF-8 character in bytes, and zero will be returned. + +The C<flags> can also contain various flags to allow deviations from +the strict UTF-8 encoding (see F<utf8.h>). =cut */ @@ -216,13 +218,13 @@ Perl_utf8_to_uv(pTHX_ U8* s, STRLEN curlen, STRLEN* retlen, U32 flags) goto malformed; } - if (uv <= 0x7f) { /* Pure ASCII. */ + if (UTF8_IS_ASCII(uv)) { if (retlen) *retlen = 1; return *s; } - if ((uv >= 0x80 && uv <= 0xbf) && + if (UTF8_IS_CONTINUATION(uv) && !(flags & UTF8_ALLOW_CONTINUATION)) { if (dowarn) Perl_warner(aTHX_ WARN_UTF8, @@ -231,11 +233,11 @@ Perl_utf8_to_uv(pTHX_ U8* s, STRLEN curlen, STRLEN* retlen, U32 flags) goto malformed; } - if ((uv >= 0xc0 && uv <= 0xfd && curlen > 1 && s[1] < 0x80) && + if (UTF8_IS_START(uv) && curlen > 1 && !UTF8_IS_CONTINUATION(s[1]) && !(flags & UTF8_ALLOW_NON_CONTINUATION)) { if (dowarn) Perl_warner(aTHX_ WARN_UTF8, - "Malformed UTF-8 character (unexpected non-continuation byte 0x%02"UVxf" after byte 0x%02"UVxf")", + "Malformed UTF-8 character (unexpected non-continuation byte 0x%02"UVxf" after start byte 0x%02"UVxf")", (UV)s[1], uv); goto malformed; } @@ -276,15 +278,16 @@ Perl_utf8_to_uv(pTHX_ U8* s, STRLEN curlen, STRLEN* retlen, U32 flags) ouv = uv; while (len--) { - if ((*s & 0xc0) != 0x80) { + if (!UTF8_IS_CONTINUATION(*s) && + !(flags & UTF8_ALLOW_NON_CONTINUATION)) { if (dowarn) Perl_warner(aTHX_ WARN_UTF8, - "Malformed UTF-8 character (unexpected continuation byte 0x%02x)", + "Malformed UTF-8 character (unexpected non-continuation byte 0x%02x)", *s); goto malformed; } else - uv = (uv << 6) | (*s & 0x3f); + uv = UTF8_ACCUMULATE(uv, *s); if (uv < ouv) { /* This cannot be allowed. */ if (dowarn) @@ -297,14 +300,14 @@ Perl_utf8_to_uv(pTHX_ U8* s, STRLEN curlen, STRLEN* retlen, U32 flags) ouv = uv; } - if ((uv >= 0xd800 && uv <= 0xdfff) && + if (UNICODE_IS_SURROGATE(uv) && !(flags & UTF8_ALLOW_SURROGATE)) { if (dowarn) Perl_warner(aTHX_ WARN_UTF8, "Malformed UTF-8 character (UTF-16 surrogate 0x%04"UVxf")", uv); goto malformed; - } else if ((uv == 0xfffe) && + } else if (UNICODE_IS_BYTE_ORDER_MARK(uv) && !(flags & UTF8_ALLOW_BOM)) { if (dowarn) Perl_warner(aTHX_ WARN_UTF8, @@ -318,7 +321,7 @@ Perl_utf8_to_uv(pTHX_ U8* s, STRLEN curlen, STRLEN* retlen, U32 flags) "Malformed UTF-8 character (%d byte%s, need %d)", expectlen, expectlen == 1 ? "": "s", UNISKIP(uv)); goto malformed; - } else if ((uv == 0xffff) && + } else if (UNICODE_IS_ILLEGAL(uv) && !(flags & UTF8_ALLOW_FFFF)) { if (dowarn) Perl_warner(aTHX_ WARN_UTF8, @@ -338,9 +341,9 @@ malformed: } if (retlen) - *retlen = expectlen; + *retlen = expectlen ? expectlen : len; - return UNICODE_REPLACEMENT_CHARACTER; + return 0; } /* @@ -378,6 +381,10 @@ Perl_utf8_length(pTHX_ U8* s, U8* e) { STRLEN len = 0; + /* Note: cannot use UTF8_IS_...() too eagerly here since e.g. + * the bitops (especially ~) can create illegal UTF-8. + * In other words: in Perl UTF-8 is not just for Unicode. */ + if (e < s) Perl_croak(aTHX_ "panic: utf8_length: unexpected end"); while (s < e) { @@ -408,6 +415,10 @@ Perl_utf8_distance(pTHX_ U8 *a, U8 *b) { IV off = 0; + /* Note: cannot use UTF8_IS_...() too eagerly here since e.g. + * the bitops (especially ~) can create illegal UTF-8. + * In other words: in Perl UTF-8 is not just for Unicode. */ + if (a < b) { while (a < b) { U8 c = UTF8SKIP(a); @@ -435,17 +446,22 @@ Perl_utf8_distance(pTHX_ U8 *a, U8 *b) /* =for apidoc Am|U8*|utf8_hop|U8 *s|I32 off -Move the C<s> pointing to UTF-8 data by C<off> characters, either forward -or backward. +Return the UTF-8 pointer C<s> displaced by C<off> characters, either +forward or backward. WARNING: do not use the following unless you *know* C<off> is within -the UTF-8 buffer pointed to by C<s>. +the UTF-8 data pointed to by C<s> *and* that on entry C<s> is aligned +on the first byte of character or just after the last byte of a character. =cut */ U8 * Perl_utf8_hop(pTHX_ U8 *s, I32 off) { + /* Note: cannot use UTF8_IS_...() too eagerly here since e.g + * the bitops (especially ~) can create illegal UTF-8. + * In other words: in Perl UTF-8 is not just for Unicode. */ + if (off >= 0) { while (off--) s += UTF8SKIP(s); @@ -453,10 +469,8 @@ Perl_utf8_hop(pTHX_ U8 *s, I32 off) else { while (off++) { s--; - if (*s & 0x80) { - while ((*s & 0xc0) == 0x80) - s--; - } + while (UTF8_IS_CONTINUATION(*s)) + s--; } } return s; @@ -494,14 +508,9 @@ Perl_utf8_to_bytes(pTHX_ U8* s, STRLEN *len) d = s = save; while (s < send) { - if (*s < 0x80) { - *d++ = *s++; - } - else { - STRLEN ulen; - *d++ = (U8)utf8_to_uv_simple(s, &ulen); - s += ulen; - } + STRLEN ulen; + *d++ = (U8)utf8_to_uv_simple(s, &ulen); + s += ulen; } *d = '\0'; *len = d - save; @@ -46,10 +46,31 @@ END_EXTERN_C #define UTF8_ALLOW_ANY 0x00ff #define UTF8_CHECK_ONLY 0x0100 +#define UNICODE_SURROGATE_FIRST 0xd800 +#define UNICODE_SURROGATE_LAST 0xdfff +#define UNICODE_REPLACEMENT 0xfffd +#define UNICODE_BYTER_ORDER_MARK 0xfffe +#define UNICODE_ILLEGAL 0xffff + +#define UNICODE_IS_SURROGATE(c) ((c) >= UNICODE_SURROGATE_FIRST && \ + (c) <= UNICODE_SURROGATE_LAST) +#define UNICODE_IS_REPLACEMENT(c) ((c) == UNICODE_REPLACMENT) +#define UNICODE_IS_BYTE_ORDER_MARK(c) ((c) == UNICODE_BYTER_ORDER_MARK) +#define UNICODE_IS_ILLEGAL(c) ((c) == UNICODE_ILLEGAL) + #define UTF8SKIP(s) PL_utf8skip[*(U8*)s] #define UTF8_QUAD_MAX UINT64_C(0x1000000000) +#define UTF8_IS_ASCII(c) ((c) < 0x80) +#define UTF8_IS_START(c) ((c) >= 0xc0 && ((c) <= 0xfd)) +#define UTF8_IS_CONTINUATION(c) ((c) >= 0x80 && ((c) <= 0xbf)) +#define UTF8_IS_CONTINUED(c) ((c) & 0x80) + +#define UTF8_CONTINUATION_MASK 0x3f +#define UTF8_ACCUMULATION_SHIFT 6 +#define UTF8_ACCUMULATE(old, new) ((old) << UTF8_ACCUMULATION_SHIFT | ((new) & UTF8_CONTINUATION_MASK)) + #ifdef HAS_QUAD #define UNISKIP(uv) ( (uv) < 0x80 ? 1 : \ (uv) < 0x800 ? 2 : \ @@ -68,7 +89,6 @@ END_EXTERN_C (uv) < 0x80000000 ? 6 : 7 ) #endif -#define UNICODE_REPLACEMENT_CHARACTER 0xfffd /* * Note: we try to be careful never to call the isXXX_utf8() functions |