summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--pp.c12
-rwxr-xr-xt/op/join.t19
-rw-r--r--utf8.c6
-rw-r--r--utf8.h6
4 files changed, 23 insertions, 20 deletions
diff --git a/pp.c b/pp.c
index eac753208b..11506977b2 100644
--- a/pp.c
+++ b/pp.c
@@ -2942,17 +2942,11 @@ PP(pp_sprintf)
PP(pp_ord)
{
djSP; dTARGET;
- UV value;
- SV *tmpsv = POPs;
+ SV *argsv = POPs;
STRLEN len;
- U8 *tmps = (U8*)SvPVx(tmpsv, len);
- STRLEN retlen;
+ U8 *s = (U8*)SvPVx(argsv, len);
- if ((*tmps & 0x80) && DO_UTF8(tmpsv))
- value = utf8_to_uv(tmps, len, &retlen, 0);
- else
- value = (UV)(*tmps & 255);
- XPUSHu(value);
+ XPUSHu(DO_UTF8(argsv) ? utf8_to_uv_simple(s, 0) : (*s & 0xff));
RETURN;
}
diff --git a/t/op/join.t b/t/op/join.t
index eea9add850..4cbe692b80 100755
--- a/t/op/join.t
+++ b/t/op/join.t
@@ -46,21 +46,32 @@ if ($f eq 'baeak') {print "ok 6\n";} else {print "# '$f'\nnot ok 6\n";}
};
{ my $s = join("", chr(1234),chr(255));
- print "not " unless length($s) == 2;
+ print "not " unless length($s) == 2 &&
+ ord(substr($s,0,1)) == 1234 &&
+ ord(substr($s,1,1)) == 255;
print "ok 11\n";
}
{ my $s = join(chr(2345), chr(1234),chr(255));
- print "not " unless length($s) == 3;
+ print "not " unless length($s) == 3 &&
+ ord(substr($s,0,1)) == 1234 &&
+ ord(substr($s,1,1)) == 2345 &&
+ ord(substr($s,2,1)) == 255;
print "ok 12\n";
}
{ my $s = join(chr(2345), chr(1234),chr(3456));
- print "not " unless length($s) == 3;
+ print "not " unless length($s) == 3 &&
+ ord(substr($s,0,1)) == 1234 &&
+ ord(substr($s,1,1)) == 2345 &&
+ ord(substr($s,2,1)) == 3456;
print "ok 13\n";
}
{ my $s = join(chr(255), chr(1234),chr(2345));
- print "not " unless length($s) == 3;
+ print "not " unless length($s) == 3 &&
+ ord(substr($s,0,1)) == 1234 &&
+ ord(substr($s,1,1)) == 255 &&
+ ord(substr($s,2,1)) == 2345;
print "ok 14\n";
}
diff --git a/utf8.c b/utf8.c
index 8d812ab3e5..24dc69297b 100644
--- a/utf8.c
+++ b/utf8.c
@@ -183,8 +183,7 @@ Perl_is_utf8_string(pTHX_ U8 *s, STRLEN len)
Returns the character value of the first character in the string C<s>
which is assumed to be in UTF8 encoding and no longer than C<curlen>;
-C<retlen> will be set to the length, in bytes, of that character,
-and the pointer C<s> will be advanced to the end of the character.
+C<retlen> will be set to the length, in bytes, of that character.
If C<s> does not point to a well-formed UTF8 character, the behaviour
is dependent on the value of C<flags>: if it contains UTF8_CHECK_ONLY,
@@ -351,8 +350,7 @@ malformed:
Returns the character value of the first character in the string C<s>
which is assumed to be in UTF8 encoding; C<retlen> will be set to the
-length, in bytes, of that character, and the pointer C<s> will be
-advanced to the end of the character.
+length, in bytes, of that character.
If C<s> does not point to a well-formed UTF8 character, zero is
returned and retlen is set, if possible, to -1.
diff --git a/utf8.h b/utf8.h
index e9598b8759..8d46aa9dfa 100644
--- a/utf8.h
+++ b/utf8.h
@@ -69,10 +69,10 @@ END_EXTERN_C
#define UTF8_CONTINUATION_MASK ((U8)0x3f)
#define UTF8_ACCUMULATION_SHIFT 6
-#define UTF8_ACCUMULATE(old, new) ((old) << UTF8_ACCUMULATION_SHIFT | ((new) & UTF8_CONTINUATION_MASK))
+#define UTF8_ACCUMULATE(old, new) ((old) << UTF8_ACCUMULATION_SHIFT | (((U8)new) & UTF8_CONTINUATION_MASK))
-#define UTF8_EIGHT_BIT_HI(c) ( (((U8)c)>>6) |0xc0)
-#define UTF8_EIGHT_BIT_LO(c) (((((U8)c)>>6)&0x3f)|0x80)
+#define UTF8_EIGHT_BIT_HI(c) ( (((U8)(c))>>6) |0xc0)
+#define UTF8_EIGHT_BIT_LO(c) (((((U8)(c)) )&0x3f)|0x80)
#ifdef HAS_QUAD
#define UNISKIP(uv) ( (uv) < 0x80 ? 1 : \