summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile.SH21
-rwxr-xr-xt/TEST41
-rw-r--r--toke.c64
-rw-r--r--utf8.c8
4 files changed, 92 insertions, 42 deletions
diff --git a/Makefile.SH b/Makefile.SH
index e8fbf3e432..3e66a5dee2 100644
--- a/Makefile.SH
+++ b/Makefile.SH
@@ -1053,6 +1053,7 @@ makedepend: makedepend.SH config.sh
.PHONY: test check test_prep test_prep_nodll test_prep_pre _test_prep \
test_tty test-tty _test_tty test_notty test-notty _test_notty \
utest ucheck test.utf8 check.utf8 test.torture torturetest \
+ test.utf16 check.utf16 utest.utf16 ucheck.utf16 \
test.third check.third utest.third ucheck.third test_notty.third \
test.deparse test_notty.deparse test_harness test_harness_notty \
test.bytecompile minitest coretest test.taintwarn
@@ -1114,6 +1115,18 @@ test-notty: test_notty
test.torture torturetest: test_prep
PERL=./perl TEST_ARGS=-torture $(MAKE) _test
+# Targets for UTF16 testing:
+
+minitest.utf16: minitest.prep
+ - cd t && (rm -f perl$(EXE_EXT); $(LNS) ../miniperl$(EXE_EXT) perl$(EXE_EXT)) \
+ && $(LDLIBPTH) ./perl TEST -minitest -utf16 base/*.t comp/*.t cmd/*.t run/*.t io/*.t op/*.t uni/*.t </dev/tty
+
+test.utf16 check.utf16: test_prep
+ PERL=./perl $(MAKE) TEST_ARGS=-utf16 _test
+
+utest.utf16 ucheck.utf16: test_prep
+ PERL=./perl $(MAKE) TEST_ARGS="-utf8 -utf16" _test
+
# Targets for valgrind testing:
test_prep.valgrind: test_prep perl.valgrind
@@ -1160,15 +1173,17 @@ test_notty.deparse: test_prep
test.taintwarn: test_prep
PERL=./perl TEST_ARGS=-taintwarn $(MAKE) _test
-# Can't depend on lib/Config.pm because that might be where miniperl
-# is crashing.
-minitest: miniperl$(EXE_EXT) lib/re.pm
+minitest.prep:
-@test -f lib/lib.pm && test -f lib/Config.pm || \
$(MAKE) lib/Config.pm lib/lib.pm $(unidatafiles)
@echo " "
@echo "You may see some irrelevant test failures if you have been unable"
@echo "to build lib/Config.pm, lib/lib.pm or the Unicode data files."
@echo " "
+
+# Can't depend on lib/Config.pm because that might be where miniperl
+# is crashing.
+minitest: miniperl$(EXE_EXT) lib/re.pm minitest.prep
- cd t && (rm -f perl$(EXE_EXT); $(LNS) ../miniperl$(EXE_EXT) perl$(EXE_EXT)) \
&& $(LDLIBPTH) ./perl TEST -minitest base/*.t comp/*.t cmd/*.t run/*.t io/*.t op/*.t uni/*.t </dev/tty
diff --git a/t/TEST b/t/TEST
index 357f230b00..afce704b53 100755
--- a/t/TEST
+++ b/t/TEST
@@ -1,7 +1,9 @@
#!./perl
# This is written in a peculiar style, since we're trying to avoid
-# most of the constructs we'll be testing for.
+# most of the constructs we'll be testing for. (This comment is
+# probably obsolete on the avoidance side, though still currrent
+# on the peculiarity side.)
$| = 1;
@@ -20,7 +22,8 @@ if ($#ARGV >= 0) {
$core = 1 if $1 eq 'core';
$verbose = 1 if $1 eq 'v';
$torture = 1 if $1 eq 'torture';
- $with_utf= 1 if $1 eq 'utf8';
+ $with_utf8 = 1 if $1 eq 'utf8';
+ $with_utf16 = 1 if $1 eq 'utf16';
$bytecompile = 1 if $1 eq 'bytecompile';
$compile = 1 if $1 eq 'compile';
$taintwarn = 1 if $1 eq 'taintwarn';
@@ -134,6 +137,32 @@ elsif( $compile ) {
elsif( $bytecompile ) {
_testprogs('bytecompile', '', @ARGV);
}
+elsif ($with_utf16) {
+ for my $e (0, 1) {
+ for my $b (0, 1) {
+ print STDERR "# ENDIAN $e BOM $b\n";
+ my @UARGV;
+ for my $a (@ARGV) {
+ my $u = $a . "." . ($e ? "l" : "b") . "e" . ($b ? "b" : "");
+ my $f = $e ? "v" : "n";
+ push @UARGV, $u;
+ unlink($u);
+ if (open(A, $a)) {
+ if (open(U, ">$u")) {
+ print U pack("$f", 0xFEFF);
+ while (<A>) {
+ print U pack("$f*", unpack("C*", $_));
+ }
+ close(A);
+ }
+ close(B);
+ }
+ }
+ _testprogs('perl', '', @UARGV);
+ unlink(@UARGV);
+ }
+ }
+}
else {
_testprogs('compile', '', @ARGV) if -e "../testcompile";
_testprogs('perl', '', @ARGV);
@@ -243,7 +272,7 @@ EOT
close(SCRIPT);
}
- my $utf = $with_utf ? '-I../lib -Mutf8' : '';
+ my $utf8 = $with_utf8 ? '-I../lib -Mutf8' : '';
my $testswitch = '-I. -MTestInit'; # -T will strict . from @INC
if ($type eq 'deparse') {
my $deparse =
@@ -275,7 +304,7 @@ EOT
my $bytecompile =
"$perl $testswitch $switch -I../lib $bswitch".
"-o$test.plc $test 2>$null &&".
- "$perl $testswitch $switch -I../lib $utf $test.plc |";
+ "$perl $testswitch $switch -I../lib $utf8 $test.plc |";
open(RESULTS,$bytecompile)
or print "can't byte-compile '$bytecompile': $!.\n";
}
@@ -288,7 +317,7 @@ EOT
. "--num-callers=50 --logfile-fd=3 $perl";
$redir = "3>$valgrind_log";
}
- my $run = "$perl" . _quote_args("$testswitch $switch $utf") . " $test $redir|";
+ my $run = "$perl" . _quote_args("$testswitch $switch $utf8") . " $test $redir|";
open(RESULTS,$run) or print "can't run '$run': $!.\n";
}
else {
@@ -296,7 +325,7 @@ EOT
my $pl2c = "$testswitch -I../lib ../utils/perlcc --testsuite " .
# -O9 for good measure, -fcog is broken ATM
"$switch -Wb=-O9,-fno-cog -L .. " .
- "-I \".. ../lib/CORE\" $args $utf $test -o ";
+ "-I \".. ../lib/CORE\" $args $utf8 $test -o ";
if( $^O eq 'MSWin32' ) {
$test_executable = "$test.exe";
diff --git a/toke.c b/toke.c
index aee151cf03..80a9ba7682 100644
--- a/toke.c
+++ b/toke.c
@@ -2157,19 +2157,17 @@ Perl_filter_del(pTHX_ filter_t funcp)
}
-/* Invoke the n'th filter function for the current rsfp. */
+/* Invoke the idxth filter function for the current rsfp. */
+/* maxlen 0 = read one text line */
I32
Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
-
-
- /* 0 = read one text line */
{
filter_t funcp;
SV *datasv = NULL;
if (!PL_rsfp_filters)
return -1;
- if (idx > AvFILLp(PL_rsfp_filters)){ /* Any more filters? */
+ if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
/* Provide a default input filter to make life easy. */
/* Note that we append to the line. This is handy. */
DEBUG_P(PerlIO_printf(Perl_debug_log,
@@ -2200,7 +2198,7 @@ Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
return SvCUR(buf_sv);
}
/* Skip this filter slot if filter has been deleted */
- if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef){
+ if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
DEBUG_P(PerlIO_printf(Perl_debug_log,
"filter_read %d: skipped (filter deleted)\n",
idx));
@@ -2226,7 +2224,6 @@ S_filter_gets(pTHX_ register SV *sv, register PerlIO *fp, STRLEN append)
}
#endif
if (PL_rsfp_filters) {
-
if (!append)
SvCUR_set(sv, 0); /* start with empty line */
if (FILTER_READ(0, sv, 0) > 0)
@@ -6834,10 +6831,11 @@ S_scan_heredoc(pTHX_ register char *s)
av_store(CopFILEAV(PL_curcop), (I32)CopLINE(PL_curcop),sv);
}
if (*s == term && memEQ(s,PL_tokenbuf,len)) {
- s = PL_bufend - 1;
- *s = ' ';
+ STRLEN off = PL_bufend - 1 - SvPVX(PL_linestr);
+ *(SvPVX(PL_linestr) + off ) = ' ';
sv_catsv(PL_linestr,herewas);
PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
+ s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
}
else {
s = PL_bufend;
@@ -7999,10 +7997,9 @@ S_swallow_bom(pTHX_ U8 *s)
filter_add(utf16rev_textfilter, NULL);
New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
- PL_bufend =
- (char*)utf16_to_utf8_reversed(s, news,
- PL_bufend - (char*)s - 1,
- &newlen);
+ utf16_to_utf8_reversed(s, news,
+ PL_bufend - (char*)s - 1,
+ &newlen);
sv_setpvn(PL_linestr, (const char*)news, newlen);
Safefree(news);
SvUTF8_on(PL_linestr);
@@ -8026,10 +8023,9 @@ S_swallow_bom(pTHX_ U8 *s)
filter_add(utf16_textfilter, NULL);
New(898, news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
- PL_bufend =
- (char*)utf16_to_utf8(s, news,
- PL_bufend - (char*)s,
- &newlen);
+ utf16_to_utf8(s, news,
+ PL_bufend - (char*)s,
+ &newlen);
sv_setpvn(PL_linestr, (const char*)news, newlen);
Safefree(news);
SvUTF8_on(PL_linestr);
@@ -8096,38 +8092,42 @@ restore_rsfp(pTHX_ void *f)
static I32
utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
{
+ STRLEN old = SvCUR(sv);
I32 count = FILTER_READ(idx+1, sv, maxlen);
+ DEBUG_P(PerlIO_printf(Perl_debug_log,
+ "utf16_textfilter(%p): %d %d (%d)\n",
+ utf16_textfilter, idx, maxlen, count));
if (count) {
U8* tmps;
- U8* tend;
I32 newlen;
New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
- if (!*SvPV_nolen(sv))
- /* Game over, but don't feed an odd-length string to utf16_to_utf8 */
- return count;
-
- tend = utf16_to_utf8((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen);
- sv_usepvn(sv, (char*)tmps, tend - tmps);
+ Copy(SvPVX(sv), tmps, old, char);
+ utf16_to_utf8((U8*)SvPVX(sv) + old, tmps + old,
+ SvCUR(sv) - old, &newlen);
+ sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
}
- return count;
+ DEBUG_P({sv_dump(sv);});
+ return SvCUR(sv);
}
static I32
utf16rev_textfilter(pTHX_ int idx, SV *sv, int maxlen)
{
+ STRLEN old = SvCUR(sv);
I32 count = FILTER_READ(idx+1, sv, maxlen);
+ DEBUG_P(PerlIO_printf(Perl_debug_log,
+ "utf16rev_textfilter(%p): %d %d (%d)\n",
+ utf16rev_textfilter, idx, maxlen, count));
if (count) {
U8* tmps;
- U8* tend;
I32 newlen;
- if (!*SvPV_nolen(sv))
- /* Game over, but don't feed an odd-length string to utf16_to_utf8 */
- return count;
-
New(898, tmps, SvCUR(sv) * 3 / 2 + 1, U8);
- tend = utf16_to_utf8_reversed((U8*)SvPVX(sv), tmps, SvCUR(sv), &newlen);
- sv_usepvn(sv, (char*)tmps, tend - tmps);
+ Copy(SvPVX(sv), tmps, old, char);
+ utf16_to_utf8((U8*)SvPVX(sv) + old, tmps + old,
+ SvCUR(sv) - old, &newlen);
+ sv_usepvn(sv, (char*)tmps, (STRLEN)newlen + old);
}
+ DEBUG_P({ sv_dump(sv); });
return count;
}
#endif
diff --git a/utf8.c b/utf8.c
index 24bf93d9d7..f12696ecf1 100644
--- a/utf8.c
+++ b/utf8.c
@@ -868,8 +868,14 @@ Perl_utf16_to_utf8(pTHX_ U8* p, U8* d, I32 bytelen, I32 *newlen)
U8* pend;
U8* dstart = d;
+ if (bytelen == 1 && p[0] == 0) { /* Be understanding. */
+ d[0] = 0;
+ *newlen = 1;
+ return d;
+ }
+
if (bytelen & 1)
- Perl_croak(aTHX_ "panic: utf16_to_utf8: odd bytelen");
+ Perl_croak(aTHX_ "panic: utf16_to_utf8: odd bytelen %d", bytelen);
pend = p + bytelen;