summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSpider Boardman <spider@orb.nashua.nh.us>2000-09-07 22:21:02 -0400
committerJarkko Hietaniemi <jhi@iki.fi>2000-09-08 14:19:49 +0000
commit33b454808819084359e76a3f223a41b842c180b7 (patch)
tree7637e666df742859e20f1b79353fca4d01eb68d5
parentc5d572932fd269096c2a3545999d11f5ab9563b7 (diff)
downloadperl-33b454808819084359e76a3f223a41b842c180b7.tar.gz
Re-allow vec() for characters > 255.
Subject: [PATCH] Re: [ID 20000907.005] Not OK: perl v5.7.0 +devel-7030 on alpha-dec_osf-perlio 4.0f (UNINSTALLED) Message-Id: <200009080621.CAA03409@leggy.zk3.dec.com> p4raw-id: //depot/perl@7039
-rw-r--r--doop.c27
-rw-r--r--pod/perldelta.pod4
-rw-r--r--pod/perldiag.pod5
-rw-r--r--pod/perlfunc.pod9
-rwxr-xr-xt/op/vec.t7
5 files changed, 25 insertions, 27 deletions
diff --git a/doop.c b/doop.c
index 46ffc1b284..77c7324e31 100644
--- a/doop.c
+++ b/doop.c
@@ -537,8 +537,7 @@ Perl_do_sprintf(pTHX_ SV *sv, I32 len, SV **sarg)
SvTAINTED_on(sv);
}
-/* currently converts input to bytes if needed and croaks if a character
- > 255 is encountered */
+/* currently converts input to bytes if possible, but doesn't sweat failure */
UV
Perl_do_vecget(pTHX_ SV *sv, I32 offset, I32 size)
{
@@ -552,12 +551,7 @@ Perl_do_vecget(pTHX_ SV *sv, I32 offset, I32 size)
Perl_croak(aTHX_ "Illegal number of bits in vec");
if (SvUTF8(sv)) {
- if (Perl_utf8_to_bytes(aTHX_ (U8*) s, &srclen)) {
- SvUTF8_off(sv);
- SvCUR_set(sv, srclen);
- }
- else
- Perl_croak(aTHX_ "Character > 255 in vec()");
+ (void) Perl_sv_utf8_downgrade(aTHX_ sv, TRUE);
}
offset *= size; /* turn into bit offset */
@@ -681,8 +675,10 @@ Perl_do_vecget(pTHX_ SV *sv, I32 offset, I32 size)
return retnum;
}
-/* currently converts input to bytes if needed and croaks if a character
- > 255 is encountered */
+/* currently converts input to bytes if possible but doesn't sweat failures,
+ * although it does ensure that the string it clobbers is not marked as
+ * utf8-valid any more
+ */
void
Perl_do_vecset(pTHX_ SV *sv)
{
@@ -699,12 +695,11 @@ Perl_do_vecset(pTHX_ SV *sv)
return;
s = (unsigned char*)SvPV_force(targ, targlen);
if (SvUTF8(targ)) {
- if (Perl_utf8_to_bytes(aTHX_ (U8*) s, &targlen)) {
- /* SvUTF8_off(targ); SvPOK_only below ensures this */
- SvCUR_set(targ, targlen);
- }
- else
- Perl_croak(aTHX_ "Character > 255 in vec()");
+ /* This is handled by the SvPOK_only below...
+ if (!Perl_sv_utf8_downgrade(aTHX_ targ, TRUE))
+ SvUTF8_off(targ);
+ */
+ (void) Perl_sv_utf8_downgrade(aTHX_ targ, TRUE);
}
(void)SvPOK_only(targ);
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index 13e557e763..7c21f5fd3a 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -515,7 +515,9 @@ see pack('U0', ...)).
=item *
-vec() now refuses to deal with characters >255.
+vec() now tries to work with characters <= 255 when possible, but it leaves
+higher character values in place. In that case, if vec() was used to modify
+the string, it is no longer considered to be utf8-encoded.
=item *
diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index 3e9f6331f9..63d7f999d1 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -1043,11 +1043,6 @@ references can be weakened.
with an assignment operator, which implies modifying the value itself.
Perhaps you need to copy the value to a temporary, and repeat that.
-=item Character > 255 in vec()
-
-(F) You applied the vec() function to a UTF8 string which contained
-a character > 255. vec() currently only operates on characters < 256.
-
=item chmod() mode argument is missing initial 0
(W chmod) A novice will sometimes say
diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod
index d02b9ba437..323b83d247 100644
--- a/pod/perlfunc.pod
+++ b/pod/perlfunc.pod
@@ -5516,8 +5516,13 @@ If an element off the end of the string is written to, Perl will first
extend the string with sufficiently many zero bytes. It is an error
to try to write off the beginning of the string (i.e. negative OFFSET).
-The string must not contain any character with value > 255 (which
-can only happen if you're using UTF8 encoding).
+The string should not contain any character with the value > 255 (which
+can only happen if you're using UTF8 encoding). If it does, it will be
+treated as something which is not UTF8 encoded. When the C<vec> was
+assigned to, other parts of your program will also no longer consider the
+string to be UTF8 encoded. In other words, if you do have such characters
+in your string, vec() will operate on the actual byte string, and not the
+conceptual character string.
Strings created with C<vec> can also be manipulated with the logical
operators C<|>, C<&>, C<^>, and C<~>. These operators will assume a bit
diff --git a/t/op/vec.t b/t/op/vec.t
index b75bebfade..7fe0974770 100755
--- a/t/op/vec.t
+++ b/t/op/vec.t
@@ -57,13 +57,14 @@ $x = substr $foo, 1;
print "not " if vec($x, 0, 8) != 255;
print "ok 24\n";
eval { vec($foo, 1, 8) };
-print "not " unless $@ =~ /^Character > 255 in vec\(\) /;
+print "not " if $@;
print "ok 25\n";
eval { vec($foo, 1, 8) = 13 };
-print "not " unless $@ =~ /^Character > 255 in vec\(\) /;
+print "not " if $@;
print "ok 26\n";
-print "not " if $foo ne "\x{100}" . "\xff\xfe";
+print "not " if $foo ne "\xc4\x0d\xc3\xbf\xc3\xbe";
print "ok 27\n";
+$foo = "\x{100}" . "\xff\xfe";
$x = substr $foo, 1;
vec($x, 2, 4) = 7;
print "not " if $x ne "\xff\xf7";