Re-allow vec() for characters > 255.

Subject: [PATCH] Re: [ID 20000907.005] Not OK: perl v5.7.0 +devel-7030 on alpha-dec_osf-perlio 4.0f (UNINSTALLED) Message-Id: <200009080621.CAA03409@leggy.zk3.dec.com> p4raw-id: //depot/perl@7039
author: Spider Boardman <spider@orb.nashua.nh.us> 2000-09-07 22:21:02 -0400
committer: Jarkko Hietaniemi <jhi@iki.fi> 2000-09-08 14:19:49 +0000
commit: 33b454808819084359e76a3f223a41b842c180b7 (patch)
tree: 7637e666df742859e20f1b79353fca4d01eb68d5
parent: c5d572932fd269096c2a3545999d11f5ab9563b7 (diff)
download: perl-33b454808819084359e76a3f223a41b842c180b7.tar.gz
5 files changed, 25 insertions, 27 deletions
diff --git a/doop.c b/doop.c
index 46ffc1b284..77c7324e31 100644
--- a/doop.c
+++ b/doop.c
@@ -537,8 +537,7 @@ Perl_do_sprintf(pTHX_ SV *sv, I32 len, SV **sarg)
 	SvTAINTED_on(sv);
 }
 
-/* currently converts input to bytes if needed and croaks if a character
-   > 255 is encountered							*/
+/* currently converts input to bytes if possible, but doesn't sweat failure */
 UV
 Perl_do_vecget(pTHX_ SV *sv, I32 offset, I32 size)
 {
@@ -552,12 +551,7 @@ Perl_do_vecget(pTHX_ SV *sv, I32 offset, I32 size)
 	Perl_croak(aTHX_ "Illegal number of bits in vec");
 
     if (SvUTF8(sv)) {
-	if (Perl_utf8_to_bytes(aTHX_ (U8*) s, &srclen)) {
-	    SvUTF8_off(sv);
-	    SvCUR_set(sv, srclen);
-	}
-	else
-	    Perl_croak(aTHX_ "Character > 255 in vec()");
+	(void) Perl_sv_utf8_downgrade(aTHX_ sv, TRUE);
     }
 
     offset *= size;	/* turn into bit offset */
@@ -681,8 +675,10 @@ Perl_do_vecget(pTHX_ SV *sv, I32 offset, I32 size)
     return retnum;
 }
 
-/* currently converts input to bytes if needed and croaks if a character
-   > 255 is encountered							*/
+/* currently converts input to bytes if possible but doesn't sweat failures,
+ * although it does ensure that the string it clobbers is not marked as
+ * utf8-valid any more
+ */
 void
 Perl_do_vecset(pTHX_ SV *sv)
 {
@@ -699,12 +695,11 @@ Perl_do_vecset(pTHX_ SV *sv)
 	return;
     s = (unsigned char*)SvPV_force(targ, targlen);
     if (SvUTF8(targ)) {
-	if (Perl_utf8_to_bytes(aTHX_ (U8*) s, &targlen)) {
-	/*  SvUTF8_off(targ);   SvPOK_only below ensures this  */
-	    SvCUR_set(targ, targlen);
-	}
-	else
-	    Perl_croak(aTHX_ "Character > 255 in vec()");
+	/* This is handled by the SvPOK_only below...
+	if (!Perl_sv_utf8_downgrade(aTHX_ targ, TRUE))
+	    SvUTF8_off(targ);
+	 */
+	(void) Perl_sv_utf8_downgrade(aTHX_ targ, TRUE);
     }
 
     (void)SvPOK_only(targ);
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index 13e557e763..7c21f5fd3a 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -515,7 +515,9 @@ see pack('U0', ...)).
 
 =item *
 
-vec() now refuses to deal with characters >255.
+vec() now tries to work with characters <= 255 when possible, but it leaves
+higher character values in place.  In that case, if vec() was used to modify
+the string, it is no longer considered to be utf8-encoded.
 
 =item *
 
diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index 3e9f6331f9..63d7f999d1 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -1043,11 +1043,6 @@ references can be weakened.
 with an assignment operator, which implies modifying the value itself.
 Perhaps you need to copy the value to a temporary, and repeat that.
 
-=item Character > 255 in vec()
-
-(F) You applied the vec() function to a UTF8 string which contained
-a character > 255.   vec() currently only operates on characters < 256.
-
 =item chmod() mode argument is missing initial 0
 
 (W chmod) A novice will sometimes say
diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod
index d02b9ba437..323b83d247 100644
--- a/pod/perlfunc.pod
+++ b/pod/perlfunc.pod
@@ -5516,8 +5516,13 @@ If an element off the end of the string is written to, Perl will first
 extend the string with sufficiently many zero bytes.   It is an error
 to try to write off the beginning of the string (i.e. negative OFFSET).
 
-The string must not contain any character with value > 255 (which
-can only happen if you're using UTF8 encoding).
+The string should not contain any character with the value > 255 (which
+can only happen if you're using UTF8 encoding).  If it does, it will be
+treated as something which is not UTF8 encoded.  When the C<vec> was
+assigned to, other parts of your program will also no longer consider the
+string to be UTF8 encoded.  In other words, if you do have such characters
+in your string, vec() will operate on the actual byte string, and not the
+conceptual character string.
 
 Strings created with C<vec> can also be manipulated with the logical
 operators C<|>, C<&>, C<^>, and C<~>.  These operators will assume a bit
diff --git a/t/op/vec.t b/t/op/vec.t
index b75bebfade..7fe0974770 100755
--- a/t/op/vec.t
+++ b/t/op/vec.t
@@ -57,13 +57,14 @@ $x = substr $foo, 1;
 print "not " if vec($x, 0, 8) != 255;
 print "ok 24\n";
 eval { vec($foo, 1, 8) };
-print "not " unless $@ =~ /^Character > 255 in vec\(\) /;
+print "not " if $@;
 print "ok 25\n";
 eval { vec($foo, 1, 8) = 13 };
-print "not " unless $@ =~ /^Character > 255 in vec\(\) /;
+print "not " if $@;
 print "ok 26\n";
-print "not " if $foo ne "\x{100}" . "\xff\xfe";
+print "not " if $foo ne "\xc4\x0d\xc3\xbf\xc3\xbe";
 print "ok 27\n";
+$foo = "\x{100}" . "\xff\xfe";
 $x = substr $foo, 1;
 vec($x, 2, 4) = 7;
 print "not " if $x ne "\xff\xf7";
author	Spider Boardman <spider@orb.nashua.nh.us>	2000-09-07 22:21:02 -0400
committer	Jarkko Hietaniemi <jhi@iki.fi>	2000-09-08 14:19:49 +0000
commit	33b454808819084359e76a3f223a41b842c180b7 (patch)
tree	7637e666df742859e20f1b79353fca4d01eb68d5
parent	c5d572932fd269096c2a3545999d11f5ab9563b7 (diff)
download	perl-33b454808819084359e76a3f223a41b842c180b7.tar.gz