diff options
-rw-r--r-- | perl.c | 7 | ||||
-rw-r--r-- | perlio.c | 6 | ||||
-rw-r--r-- | pp_sys.c | 3 | ||||
-rw-r--r-- | t/lib/tie-refhash.t | 42 | ||||
-rw-r--r-- | t/op/utf8decode.t | 14 | ||||
-rw-r--r-- | t/pragma/warn/utf8 | 4 | ||||
-rw-r--r-- | utf8.c | 29 | ||||
-rw-r--r-- | utf8.h | 17 |
8 files changed, 65 insertions, 57 deletions
@@ -788,12 +788,13 @@ perl_free(pTHXx) # if defined(WIN32) # if defined(PERL_IMPLICIT_SYS) void *host = w32_internal_host; - if (PerlProc_lasthost()) - PerlIO_cleanup(); + if (PerlProc_lasthost()) { + PerlIO_cleanup(); + } PerlMem_free(aTHXx); win32_delete_internal_host(host); #else - PerlIO_cleanup(); + PerlIO_cleanup(); PerlMem_free(aTHXx); #endif # else @@ -28,12 +28,6 @@ #define PERL_IN_PERLIO_C #include "perl.h" -#undef PerlMemShared_calloc -#define PerlMemShared_calloc(x,y) calloc(x,y) -#undef PerlMemShared_free -#define PerlMemShared_free(x) free(x) - - #ifndef PERLIO_LAYERS int PerlIO_apply_layers(pTHX_ PerlIO *f, const char *mode, const char *names) @@ -3724,9 +3724,6 @@ PP(pp_fork) if (childpid < 0) RETSETUNDEF; if (!childpid) { -#ifdef SOCKS_64BIT_BUG - Perl_do_s64_init_buffer(); -#endif /*SUPPRESS 560*/ if ((tmpgv = gv_fetchpv("$", TRUE, SVt_PV))) sv_setiv(GvSV(tmpgv), (IV)PerlProc_getpid()); diff --git a/t/lib/tie-refhash.t b/t/lib/tie-refhash.t index a82c19c743..d80b2e10fc 100644 --- a/t/lib/tie-refhash.t +++ b/t/lib/tie-refhash.t @@ -1,19 +1,19 @@ #!/usr/bin/perl -w -# +# # Basic test suite for Tie::RefHash and Tie::RefHash::Nestable. -# +# # The testing is in two parts: first, run lots of tests on both a tied # hash and an ordinary un-tied hash, and check they give the same # answer. Then there are tests for those cases where the tied hashes # should behave differently to normal hashes, that is, when using # references as keys. -# +# BEGIN { chdir 't' if -d 't'; - @INC = '.'; + @INC = '.'; push @INC, '../lib'; -} +} use strict; use Tie::RefHash; @@ -28,7 +28,7 @@ my $ref = []; my $ref1 = []; # on a tied hash and on a normal hash, and checking that the results # are the same. This does of course assume that Perl hashes are not # buggy :-) -# +# my @tests = standard_hash_tests(); my @ordinary_results = runtests(\@tests, undef); @@ -40,13 +40,13 @@ foreach my $class ('Tie::RefHash', 'Tie::RefHash::Nestable') { foreach my $i (0 .. $#ordinary_results) { my ($or, $ow, $oe) = @{$ordinary_results[$i]}; my ($tr, $tw, $te) = @{$tied_results[$i]}; - + my $ok = 1; local $^W = 0; $ok = 0 if (defined($or) != defined($tr)) or ($or ne $tr); $ok = 0 if (defined($ow) != defined($tw)) or ($ow ne $tw); $ok = 0 if (defined($oe) != defined($te)) or ($oe ne $te); - + if (not $ok) { print STDERR "failed for $class: $tests[$i]\n", @@ -127,7 +127,7 @@ exit(); # Print 'ok X' if true, 'not ok X' if false # Uses global $currtest. -# +# sub test { my $t = shift; print 'not ' if not $t; @@ -135,7 +135,7 @@ sub test { } -# Wrapper for Data::Dumper to 'dump' a scalar as an EXPR string. +# Wrapper for Data::Dumper to 'dump' a scalar as an EXPR string. sub dumped { my $s = shift; my $d = Dumper($s); @@ -148,7 +148,7 @@ sub dumped { # Crudely dump a hash into a canonical string representation (because # hash keys can appear in any order, Data::Dumper may give different # strings for the same hash). -# +# sub dumph { my $h = shift; my $r = ''; @@ -159,17 +159,17 @@ sub dumph { } # Run the tests and give results. -# +# # Parameters: reference to list of tests to run # name of class to use for tied hash, or undef if not tied -# +# # Returns: list of [R, W, E] tuples, one for each test. # R is the return value from running the test, W any warnings it gave, # and E any exception raised with 'die'. E and W will be tidied up a # little to remove irrelevant details like line numbers :-) -# +# # Will also run a few of its own 'ok N' tests. -# +# sub runtests { my ($tests, $class) = @_; my @r; @@ -215,14 +215,14 @@ sub runtests { # Things that should work just the same for an ordinary hash and a # Tie::RefHash. -# +# # Each test is a code string to be eval'd, it should do something with # %h and give a scalar return value. The global $ref and $ref1 may # also be used. -# +# # One thing we don't test is that the ordering from 'keys', 'values' # and 'each' is the same. You can't reasonably expect that. -# +# sub standard_hash_tests { my @r; @@ -234,12 +234,12 @@ sub standard_hash_tests { { my ($k, $v, %tmp); $tmp{"$k$;$v"}++ while (($k, $v) = each %h); dumph(\%tmp) } END ; - + # Tests on the existence of the element 'foo' my $FOO_TESTS = <<'END' defined $h{foo}; exists $h{foo}; - $h{foo}; + $h{foo}; END ; @@ -278,7 +278,7 @@ END ; } } - + # Test hash slices my @slicetests; @slicetests = split /\n/, <<'END' diff --git a/t/op/utf8decode.t b/t/op/utf8decode.t index c631c0a7a9..ac42b85577 100644 --- a/t/op/utf8decode.t +++ b/t/op/utf8decode.t @@ -53,11 +53,11 @@ my @MK = split(/\n/, <<__EOMK__); 3.1.8 n "�������" - 7 80:bf:80:bf:80:bf:80 - unexpected continuation byte 0x80 3.1.9 n "����������������������������������������������������������������" - 64 80:81:82:83:84:85:86:87:88:89:8a:8b:8c:8d:8e:8f:90:91:92:93:94:95:96:97:98:99:9a:9b:9c:9d:9e:9f:a0:a1:a2:a3:a4:a5:a6:a7:a8:a9:aa:ab:ac:ad:ae:af:b0:b1:b2:b3:b4:b5:b6:b7:b8:b9:ba:bb:bc:bd:be:bf - unexpected continuation byte 0x80 3.2 Lonely start characters -3.2.1 n "� � � � � � � � � � � � � � � � � � � � � � � � � � � � � � � � " - 64 c0:20:c1:20:c2:20:c3:20:c4:20:c5:20:c6:20:c7:20:c8:20:c9:20:ca:20:cb:20:cc:20:cd:20:ce:20:cf:20:d0:20:d1:20:d2:20:d3:20:d4:20:d5:20:d6:20:d7:20:d8:20:d9:20:da:20:db:20:dc:20:dd:20:de:20:df:20 - unexpected non-continuation byte 0x20 after byte 0xc0 -3.2.2 n "� � � � � � � � � � � � � � � � " - 32 e0:20:e1:20:e2:20:e3:20:e4:20:e5:20:e6:20:e7:20:e8:20:e9:20:ea:20:eb:20:ec:20:ed:20:ee:20:ef:20 - unexpected non-continuation byte 0x20 after byte 0xe0 -3.2.3 n "� � � � � � � � " - 16 f0:20:f1:20:f2:20:f3:20:f4:20:f5:20:f6:20:f7:20 - unexpected non-continuation byte 0x20 after byte 0xf0 -3.2.4 n "� � � � " - 8 f8:20:f9:20:fa:20:fb:20 - unexpected non-continuation byte 0x20 after byte 0xf8 -3.2.5 n "� � " - 4 fc:20:fd:20 - unexpected non-continuation byte 0x20 after byte 0xfc +3.2.1 n "� � � � � � � � � � � � � � � � � � � � � � � � � � � � � � � � " - 64 c0:20:c1:20:c2:20:c3:20:c4:20:c5:20:c6:20:c7:20:c8:20:c9:20:ca:20:cb:20:cc:20:cd:20:ce:20:cf:20:d0:20:d1:20:d2:20:d3:20:d4:20:d5:20:d6:20:d7:20:d8:20:d9:20:da:20:db:20:dc:20:dd:20:de:20:df:20 - unexpected non-continuation byte 0x20 after start byte 0xc0 +3.2.2 n "� � � � � � � � � � � � � � � � " - 32 e0:20:e1:20:e2:20:e3:20:e4:20:e5:20:e6:20:e7:20:e8:20:e9:20:ea:20:eb:20:ec:20:ed:20:ee:20:ef:20 - unexpected non-continuation byte 0x20 after start byte 0xe0 +3.2.3 n "� � � � � � � � " - 16 f0:20:f1:20:f2:20:f3:20:f4:20:f5:20:f6:20:f7:20 - unexpected non-continuation byte 0x20 after start byte 0xf0 +3.2.4 n "� � � � " - 8 f8:20:f9:20:fa:20:fb:20 - unexpected non-continuation byte 0x20 after start byte 0xf8 +3.2.5 n "� � " - 4 fc:20:fd:20 - unexpected non-continuation byte 0x20 after start byte 0xfc 3.3 Sequences with last continuation byte missing 3.3.1 n "�" - 1 c0 - 1 byte, need 2 3.3.2 n "��" - 2 e0:80 - 2 bytes, need 3 @@ -70,7 +70,7 @@ my @MK = split(/\n/, <<__EOMK__); 3.3.9 n "����" - 4 fb:bf:bf:bf - 4 bytes, need 5 3.3.10 n "�����" - 5 fd:bf:bf:bf:bf - 5 bytes, need 6 3.4 Concatenation of incomplete sequences -3.4.1 n "�����������������������������" - 30 c0:e0:80:f0:80:80:f8:80:80:80:fc:80:80:80:80:df:ef:bf:f7:bf:bf:fb:bf:bf:bf:fd:bf:bf:bf:bf - unexpected continuation byte 0xe0 +3.4.1 n "�����������������������������" - 30 c0:e0:80:f0:80:80:f8:80:80:80:fc:80:80:80:80:df:ef:bf:f7:bf:bf:fb:bf:bf:bf:fd:bf:bf:bf:bf - unexpected non-continuation byte 0xe0 after start byte 0xc0 3.5 Impossible bytes 3.5.1 n "�" - 1 fe - byte 0xfe 3.5.2 n "�" - 1 ff - byte 0xff @@ -125,7 +125,7 @@ __EOMK__ local $SIG{__WARN__} = sub { - # print "# $id: @_"; + print "# $id: @_"; $WARNCNT++; $WARNMSG = "@_"; }; diff --git a/t/pragma/warn/utf8 b/t/pragma/warn/utf8 index adc10c645e..9a7dbafdee 100644 --- a/t/pragma/warn/utf8 +++ b/t/pragma/warn/utf8 @@ -30,6 +30,6 @@ my $a = "sn�storm" ; my $a = "sn�storm"; } EXPECT -Malformed UTF-8 character (unexpected non-continuation byte 0x73 after byte 0xf8) at - line 9. -Malformed UTF-8 character (unexpected non-continuation byte 0x73 after byte 0xf8) at - line 14. +Malformed UTF-8 character (unexpected non-continuation byte 0x73 after start byte 0xf8) at - line 9. +Malformed UTF-8 character (unexpected non-continuation byte 0x73 after start byte 0xf8) at - line 14. ######## @@ -190,10 +190,10 @@ If C<s> does not point to a well-formed UTF8 character, the behaviour is dependent on the value of C<flags>: if it contains UTF8_CHECK_ONLY, it is assumed that the caller will raise a warning, and this function will set C<retlen> to C<-1> and return zero. If the C<flags> does not -contain UTF8_CHECK_ONLY, the UNICODE_REPLACEMENT_CHARACTER (0xFFFD) -will be returned, and C<retlen> will be set to the expected length of -the UTF-8 character in bytes. The C<flags> can also contain various -flags to allow deviations from the strict UTF-8 encoding (see F<utf8.h>). +contain UTF8_CHECK_ONLY, the UNICODE_REPLACEMENT (0xFFFD) will be +returned, and C<retlen> will be set to the expected length of the +UTF-8 character in bytes. The C<flags> can also contain various flags +to allow deviations from the strict UTF-8 encoding (see F<utf8.h>). =cut */ @@ -216,13 +216,13 @@ Perl_utf8_to_uv(pTHX_ U8* s, STRLEN curlen, STRLEN* retlen, U32 flags) goto malformed; } - if (uv <= 0x7f) { /* Pure ASCII. */ + if (UTF8_IS_ASCII(uv)) { if (retlen) *retlen = 1; return *s; } - if ((uv >= 0x80 && uv <= 0xbf) && + if (UTF8_IS_CONTINUATION(uv) && !(flags & UTF8_ALLOW_CONTINUATION)) { if (dowarn) Perl_warner(aTHX_ WARN_UTF8, @@ -231,11 +231,11 @@ Perl_utf8_to_uv(pTHX_ U8* s, STRLEN curlen, STRLEN* retlen, U32 flags) goto malformed; } - if ((uv >= 0xc0 && uv <= 0xfd && curlen > 1 && s[1] < 0x80) && + if (UTF8_IS_START(uv) && curlen > 1 && !UTF8_IS_CONTINUATION(s[1]) && !(flags & UTF8_ALLOW_NON_CONTINUATION)) { if (dowarn) Perl_warner(aTHX_ WARN_UTF8, - "Malformed UTF-8 character (unexpected non-continuation byte 0x%02"UVxf" after byte 0x%02"UVxf")", + "Malformed UTF-8 character (unexpected non-continuation byte 0x%02"UVxf" after start byte 0x%02"UVxf")", (UV)s[1], uv); goto malformed; } @@ -276,10 +276,11 @@ Perl_utf8_to_uv(pTHX_ U8* s, STRLEN curlen, STRLEN* retlen, U32 flags) ouv = uv; while (len--) { - if ((*s & 0xc0) != 0x80) { + if (!UTF8_IS_CONTINUATION(*s) && + !(flags & UTF8_ALLOW_NON_CONTINUATION)) { if (dowarn) Perl_warner(aTHX_ WARN_UTF8, - "Malformed UTF-8 character (unexpected continuation byte 0x%02x)", + "Malformed UTF-8 character (unexpected non-continuation byte 0x%02x)", *s); goto malformed; } @@ -297,14 +298,14 @@ Perl_utf8_to_uv(pTHX_ U8* s, STRLEN curlen, STRLEN* retlen, U32 flags) ouv = uv; } - if ((uv >= 0xd800 && uv <= 0xdfff) && + if (UNICODE_IS_SURROGATE(uv) && !(flags & UTF8_ALLOW_SURROGATE)) { if (dowarn) Perl_warner(aTHX_ WARN_UTF8, "Malformed UTF-8 character (UTF-16 surrogate 0x%04"UVxf")", uv); goto malformed; - } else if ((uv == 0xfffe) && + } else if (UNICODE_IS_BYTE_ORDER_MARK(uv) && !(flags & UTF8_ALLOW_BOM)) { if (dowarn) Perl_warner(aTHX_ WARN_UTF8, @@ -318,7 +319,7 @@ Perl_utf8_to_uv(pTHX_ U8* s, STRLEN curlen, STRLEN* retlen, U32 flags) "Malformed UTF-8 character (%d byte%s, need %d)", expectlen, expectlen == 1 ? "": "s", UNISKIP(uv)); goto malformed; - } else if ((uv == 0xffff) && + } else if (UNICODE_IS_ILLEGAL(uv) && !(flags & UTF8_ALLOW_FFFF)) { if (dowarn) Perl_warner(aTHX_ WARN_UTF8, @@ -340,7 +341,7 @@ malformed: if (retlen) *retlen = expectlen; - return UNICODE_REPLACEMENT_CHARACTER; + return UNICODE_REPLACEMENT; } /* @@ -46,10 +46,26 @@ END_EXTERN_C #define UTF8_ALLOW_ANY 0x00ff #define UTF8_CHECK_ONLY 0x0100 +#define UNICODE_SURROGATE_FIRST 0xd800 +#define UNICODE_SURROGATE_LAST 0xdfff +#define UNICODE_REPLACEMENT 0xfffd +#define UNICODE_BYTER_ORDER_MARK 0xfffe +#define UNICODE_ILLEGAL 0xffff + +#define UNICODE_IS_SURROGATE(c) ((c) >= UNICODE_SURROGATE_FIRST && \ + (c) <= UNICODE_SURROGATE_LAST) +#define UNICODE_IS_REPLACEMENT(c) ((c) == UNICODE_REPLACMENT) +#define UNICODE_IS_BYTE_ORDER_MARK(c) ((c) == UNICODE_BYTER_ORDER_MARK) +#define UNICODE_IS_ILLEGAL(c) ((c) == UNICODE_ILLEGAL) + #define UTF8SKIP(s) PL_utf8skip[*(U8*)s] #define UTF8_QUAD_MAX UINT64_C(0x1000000000) +#define UTF8_IS_ASCII(c) ((c) < 0x80) +#define UTF8_IS_START(c) ((c) >= 0xc0 && ((c) <= 0xfd)) +#define UTF8_IS_CONTINUATION(c) ((c) >= 0x80 && ((c) <= 0xbf)) + #ifdef HAS_QUAD #define UNISKIP(uv) ( (uv) < 0x80 ? 1 : \ (uv) < 0x800 ? 2 : \ @@ -68,7 +84,6 @@ END_EXTERN_C (uv) < 0x80000000 ? 6 : 7 ) #endif -#define UNICODE_REPLACEMENT_CHARACTER 0xfffd /* * Note: we try to be careful never to call the isXXX_utf8() functions |