diff options
-rw-r--r-- | pp.c | 12 | ||||
-rwxr-xr-x | t/op/join.t | 19 | ||||
-rw-r--r-- | utf8.c | 6 | ||||
-rw-r--r-- | utf8.h | 6 |
4 files changed, 23 insertions, 20 deletions
@@ -2942,17 +2942,11 @@ PP(pp_sprintf) PP(pp_ord) { djSP; dTARGET; - UV value; - SV *tmpsv = POPs; + SV *argsv = POPs; STRLEN len; - U8 *tmps = (U8*)SvPVx(tmpsv, len); - STRLEN retlen; + U8 *s = (U8*)SvPVx(argsv, len); - if ((*tmps & 0x80) && DO_UTF8(tmpsv)) - value = utf8_to_uv(tmps, len, &retlen, 0); - else - value = (UV)(*tmps & 255); - XPUSHu(value); + XPUSHu(DO_UTF8(argsv) ? utf8_to_uv_simple(s, 0) : (*s & 0xff)); RETURN; } diff --git a/t/op/join.t b/t/op/join.t index eea9add850..4cbe692b80 100755 --- a/t/op/join.t +++ b/t/op/join.t @@ -46,21 +46,32 @@ if ($f eq 'baeak') {print "ok 6\n";} else {print "# '$f'\nnot ok 6\n";} }; { my $s = join("", chr(1234),chr(255)); - print "not " unless length($s) == 2; + print "not " unless length($s) == 2 && + ord(substr($s,0,1)) == 1234 && + ord(substr($s,1,1)) == 255; print "ok 11\n"; } { my $s = join(chr(2345), chr(1234),chr(255)); - print "not " unless length($s) == 3; + print "not " unless length($s) == 3 && + ord(substr($s,0,1)) == 1234 && + ord(substr($s,1,1)) == 2345 && + ord(substr($s,2,1)) == 255; print "ok 12\n"; } { my $s = join(chr(2345), chr(1234),chr(3456)); - print "not " unless length($s) == 3; + print "not " unless length($s) == 3 && + ord(substr($s,0,1)) == 1234 && + ord(substr($s,1,1)) == 2345 && + ord(substr($s,2,1)) == 3456; print "ok 13\n"; } { my $s = join(chr(255), chr(1234),chr(2345)); - print "not " unless length($s) == 3; + print "not " unless length($s) == 3 && + ord(substr($s,0,1)) == 1234 && + ord(substr($s,1,1)) == 255 && + ord(substr($s,2,1)) == 2345; print "ok 14\n"; } @@ -183,8 +183,7 @@ Perl_is_utf8_string(pTHX_ U8 *s, STRLEN len) Returns the character value of the first character in the string C<s> which is assumed to be in UTF8 encoding and no longer than C<curlen>; -C<retlen> will be set to the length, in bytes, of that character, -and the pointer C<s> will be advanced to the end of the character. +C<retlen> will be set to the length, in bytes, of that character. If C<s> does not point to a well-formed UTF8 character, the behaviour is dependent on the value of C<flags>: if it contains UTF8_CHECK_ONLY, @@ -351,8 +350,7 @@ malformed: Returns the character value of the first character in the string C<s> which is assumed to be in UTF8 encoding; C<retlen> will be set to the -length, in bytes, of that character, and the pointer C<s> will be -advanced to the end of the character. +length, in bytes, of that character. If C<s> does not point to a well-formed UTF8 character, zero is returned and retlen is set, if possible, to -1. @@ -69,10 +69,10 @@ END_EXTERN_C #define UTF8_CONTINUATION_MASK ((U8)0x3f) #define UTF8_ACCUMULATION_SHIFT 6 -#define UTF8_ACCUMULATE(old, new) ((old) << UTF8_ACCUMULATION_SHIFT | ((new) & UTF8_CONTINUATION_MASK)) +#define UTF8_ACCUMULATE(old, new) ((old) << UTF8_ACCUMULATION_SHIFT | (((U8)new) & UTF8_CONTINUATION_MASK)) -#define UTF8_EIGHT_BIT_HI(c) ( (((U8)c)>>6) |0xc0) -#define UTF8_EIGHT_BIT_LO(c) (((((U8)c)>>6)&0x3f)|0x80) +#define UTF8_EIGHT_BIT_HI(c) ( (((U8)(c))>>6) |0xc0) +#define UTF8_EIGHT_BIT_LO(c) (((((U8)(c)) )&0x3f)|0x80) #ifdef HAS_QUAD #define UNISKIP(uv) ( (uv) < 0x80 ? 1 : \ |