Add a new function newSVpvn_flags(), which takes a third parameter of

flag bits. Right now the only flag bit is SVf_UTF8, which will call SvUTF8_on() on the new SV for you. Provide a wrapper newSVpvn_utf8(), which takes a boolean, and passes in SVf_UTF8 if that is true. Refactor the core to use it where possible. It makes the source code clearer and smaller, but seems to be swings and roundabouts on object code size. p4raw-id: //depot/perl@32807
author: Nicholas Clark <nick@ccl4.org> 2008-01-02 23:41:21 +0000
committer: Nicholas Clark <nick@ccl4.org> 2008-01-02 23:41:21 +0000
commit: 740cce10afff4bec3346f61ab3d0f7bfa424948c (patch)
tree: 83c031300e2326e8d8ae399e088c4d3a42472a13
parent: e350b669f3dadb9da757b62a20659cbc7eca2190 (diff)
download: perl-740cce10afff4bec3346f61ab3d0f7bfa424948c.tar.gz
14 files changed, 119 insertions, 77 deletions
diff --git a/embed.fnc b/embed.fnc
index c041296261..d36e2fdc83 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -590,6 +590,7 @@ Apda	|SV*	|newSVuv	|UV u
 Apda	|SV*	|newSVnv	|NV n
 Apda	|SV*	|newSVpv	|NULLOK const char* s|STRLEN len
 Apda	|SV*	|newSVpvn	|NULLOK const char* s|STRLEN len
+Apda	|SV*	|newSVpvn_flags	|NULLOK const char* s|STRLEN len|U32 flags
 Apda	|SV*	|newSVhek	|NULLOK const HEK *hek
 Apda	|SV*	|newSVpvn_share	|NULLOK const char* s|I32 len|U32 hash
 Afpda	|SV*	|newSVpvf	|NN const char* pat|...
diff --git a/embed.h b/embed.h
index ba248710a6..d99785c969 100644
--- a/embed.h
+++ b/embed.h
@@ -572,7 +572,10 @@
 #define newSVnv			Perl_newSVnv
 #define newSVpv			Perl_newSVpv
 #define newSVpvn		Perl_newSVpvn
+#define newSVpvn_flags		Perl_newSVpvn_flags
+#ifdef PERL_CORE
 #define newSVhek		Perl_newSVhek
+#endif
 #define newSVpvn_share		Perl_newSVpvn_share
 #define newSVpvf		Perl_newSVpvf
 #define vnewSVpvf		Perl_vnewSVpvf
@@ -2865,7 +2868,10 @@
 #define newSVnv(a)		Perl_newSVnv(aTHX_ a)
 #define newSVpv(a,b)		Perl_newSVpv(aTHX_ a,b)
 #define newSVpvn(a,b)		Perl_newSVpvn(aTHX_ a,b)
+#define newSVpvn_flags(a,b,c)	Perl_newSVpvn_flags(aTHX_ a,b,c)
+#ifdef PERL_CORE
 #define newSVhek(a)		Perl_newSVhek(aTHX_ a)
+#endif
 #define newSVpvn_share(a,b,c)	Perl_newSVpvn_share(aTHX_ a,b,c)
 #define vnewSVpvf(a,b)		Perl_vnewSVpvf(aTHX_ a,b)
 #define newSVrv(a,b)		Perl_newSVrv(aTHX_ a,b)
diff --git a/hv.c b/hv.c
index 9523fb0b28..63e10497ce 100644
--- a/hv.c
+++ b/hv.c
@@ -350,9 +350,8 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
 		SV* obj = mg->mg_obj;
 
 		if (!keysv) {
-		    keysv = sv_2mortal(newSVpvn(key, klen));
-		    if (flags & HVhek_UTF8)
-			SvUTF8_on(keysv);
+		    keysv = sv_2mortal(newSVpvn_utf8(key, klen,
+						     flags & HVhek_UTF8));
 		}
 		
 		mg->mg_obj = keysv;         /* pass key */
@@ -391,11 +390,8 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
 		/* FIXME should be able to skimp on the HE/HEK here when
 		   HV_FETCH_JUST_SV is true.  */
 		if (!keysv) {
-		    keysv = newSVpvn(key, klen);
-		    if (is_utf8) {
-			SvUTF8_on(keysv);
-		    }
-		} else {
+		    keysv = newSVpvn_utf8(key, klen, is_utf8);
+  		} else {
 		    keysv = newSVsv(keysv);
 		}
                 sv = sv_newmortal();
@@ -472,8 +468,7 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
 
 		if (keysv || is_utf8) {
 		    if (!keysv) {
-			keysv = newSVpvn(key, klen);
-			SvUTF8_on(keysv);
+			keysv = newSVpvn_utf8(key, klen, TRUE);
 		    } else {
 			keysv = newSVsv(keysv);
 		    }
@@ -515,8 +510,7 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
 		const bool save_taint = PL_tainted;
 		if (keysv || is_utf8) {
 		    if (!keysv) {
-			keysv = newSVpvn(key, klen);
-			SvUTF8_on(keysv);
+			keysv = newSVpvn_utf8(key, klen, TRUE);
 		    }
 		    if (PL_tainting)
 			PL_tainted = SvTAINTED(keysv);
diff --git a/perl.c b/perl.c
index e0bc0e7fce..82412d2f79 100644
--- a/perl.c
+++ b/perl.c
@@ -2481,8 +2481,7 @@ Perl_get_cvn_flags(pTHX_ const char *name, STRLEN len, I32 flags)
      * It has the same effect as "sub name;", i.e. just a forward
      * declaration! */
     if ((flags & ~GV_NOADD_MASK) && !GvCVu(gv)) {
-	SV *const sv = newSVpvn(name,len);
-	SvFLAGS(sv) |= flags & SVf_UTF8;
+	SV *const sv = newSVpvn_flags(name, len, flags & SVf_UTF8);
     	return newSUB(start_subparse(FALSE, 0),
 		      newSVOP(OP_CONST, 0, sv),
 		      NULL, NULL);
diff --git a/pod/perlapi.pod b/pod/perlapi.pod
index df9572a87f..b7cb7b30d0 100644
--- a/pod/perlapi.pod
+++ b/pod/perlapi.pod
@@ -3918,6 +3918,17 @@ incremented.
 =for hackers
 Found in file sv.h
 
+=item newSVpvn_utf8
+X<newSVpvn_utf8>
+
+Creates a new SV and copies a string into it.  If utf8 is true, calls
+C<SvUTF8_on> on the new SV.  Implemented as a wrapper around C<newSVpvn_flags>.
+
+	SV*	newSVpvn_utf8(NULLOK const char* s, STRLEN len, U32 utf8)
+
+=for hackers
+Found in file sv.h
+
 =item SvCUR
 X<SvCUR>
 
@@ -5137,6 +5148,25 @@ C<len> bytes long.  If the C<s> argument is NULL the new SV will be undefined.
 =for hackers
 Found in file sv.c
 
+=item newSVpvn_flags
+X<newSVpvn_flags>
+
+Creates a new SV and copies a string into it.  The reference count for the
+SV is set to 1.  Note that if C<len> is zero, Perl will create a zero length
+string.  You are responsible for ensuring that the source string is at least
+C<len> bytes long.  If the C<s> argument is NULL the new SV will be undefined.
+Currently the only flag bit accepted is SVf_UTF8. If this is set, then it
+will be set on the new SV. C<newSVpvn_utf8()> is a convenience wrapper for
+this function, defined as
+
+    #define newSVpvn_utf8(s, len, u)			\
+	newSVpvn_flags((s), (len), (u) ? SVf_UTF8 : 0)
+
+	SV*	newSVpvn_flags(const char* s, STRLEN len, U32 flags)
+
+=for hackers
+Found in file sv.c
+
 =item newSVpvn_share
 X<newSVpvn_share>
 
diff --git a/pp.c b/pp.c
index 78e0e367c6..3ac702db9a 100644
--- a/pp.c
+++ b/pp.c
@@ -3312,9 +3312,7 @@ PP(pp_index)
 	   Otherwise I need to avoid calls to sv_pos_u2b(), which (dangerously)
 	   will trigger magic and overloading again, as will fbm_instr()
 	*/
-	big = sv_2mortal(newSVpvn(big_p, biglen));
-	if (big_utf8)
-	    SvUTF8_on(big);
+	big = sv_2mortal(newSVpvn_utf8(big_p, biglen, big_utf8));
 	big_p = SvPVX(big);
     }
     if (SvGAMAGIC(little) || (is_index && !SvOK(little))) {
@@ -3326,9 +3324,7 @@ PP(pp_index)
 	   This is all getting to messy. The API isn't quite clean enough,
 	   because data access has side effects.
 	*/
-	little = sv_2mortal(newSVpvn(little_p, llen));
-	if (little_utf8)
-	    SvUTF8_on(little);
+	little = sv_2mortal(newSVpvn_utf8(little_p, llen, little_utf8));
 	little_p = SvPVX(little);
     }
 
@@ -4755,11 +4751,9 @@ PP(pp_split)
 	    if (m >= strend)
 		break;
 
-	    dstr = newSVpvn(s, m-s);
+	    dstr = newSVpvn_utf8(s, m-s, do_utf8);
 	    if (make_mortal)
 		sv_2mortal(dstr);
-	    if (do_utf8)
-		(void)SvUTF8_on(dstr);
 	    XPUSHs(dstr);
 
 	    /* skip the whitespace found last */
@@ -4788,11 +4782,9 @@ PP(pp_split)
 	    m++;
 	    if (m >= strend)
 		break;
-	    dstr = newSVpvn(s, m-s);
+	    dstr = newSVpvn_utf8(s, m-s, do_utf8);
 	    if (make_mortal)
 		sv_2mortal(dstr);
-	    if (do_utf8)
-		(void)SvUTF8_on(dstr);
 	    XPUSHs(dstr);
 	    s = m;
 	}
@@ -4817,12 +4809,11 @@ PP(pp_split)
                 /* keep track of how many bytes we skip over */
                 m = s;
                 s += UTF8SKIP(s);
-                dstr = newSVpvn(m, s-m);
+                dstr = newSVpvn_utf8(m, s-m, TRUE);
 
                 if (make_mortal)
                     sv_2mortal(dstr);
 
-                (void)SvUTF8_on(dstr);
                 PUSHs(dstr);
 
                 if (s >= strend)
@@ -4859,11 +4850,9 @@ PP(pp_split)
 		    ;
 		if (m >= strend)
 		    break;
-		dstr = newSVpvn(s, m-s);
+		dstr = newSVpvn_utf8(s, m-s, do_utf8);
 		if (make_mortal)
 		    sv_2mortal(dstr);
-		if (do_utf8)
-		    (void)SvUTF8_on(dstr);
 		XPUSHs(dstr);
 		/* The rx->minlen is in characters but we want to step
 		 * s ahead by bytes. */
@@ -4878,11 +4867,9 @@ PP(pp_split)
 	      (m = fbm_instr((unsigned char*)s, (unsigned char*)strend,
 			     csv, multiline ? FBMrf_MULTILINE : 0)) )
 	    {
-		dstr = newSVpvn(s, m-s);
+		dstr = newSVpvn_utf8(s, m-s, do_utf8);
 		if (make_mortal)
 		    sv_2mortal(dstr);
-		if (do_utf8)
-		    (void)SvUTF8_on(dstr);
 		XPUSHs(dstr);
 		/* The rx->minlen is in characters but we want to step
 		 * s ahead by bytes. */
@@ -4913,11 +4900,9 @@ PP(pp_split)
 		strend = s + (strend - m);
 	    }
 	    m = RX_OFFS(rx)[0].start + orig;
-	    dstr = newSVpvn(s, m-s);
+	    dstr = newSVpvn_utf8(s, m-s, do_utf8);
 	    if (make_mortal)
 		sv_2mortal(dstr);
-	    if (do_utf8)
-		(void)SvUTF8_on(dstr);
 	    XPUSHs(dstr);
 	    if (RX_NPARENS(rx)) {
 		I32 i;
@@ -4929,14 +4914,12 @@ PP(pp_split)
 		       parens that didn't match -- they should be set to
 		       undef, not the empty string */
 		    if (m >= orig && s >= orig) {
-			dstr = newSVpvn(s, m-s);
+			dstr = newSVpvn_utf8(s, m-s, do_utf8);
 		    }
 		    else
 			dstr = &PL_sv_undef;  /* undef, not "" */
 		    if (make_mortal)
 			sv_2mortal(dstr);
-		    if (do_utf8)
-			(void)SvUTF8_on(dstr);
 		    XPUSHs(dstr);
 		}
 	    }
@@ -4951,11 +4934,9 @@ PP(pp_split)
     /* keep field after final delim? */
     if (s < strend || (iters && origlimit)) {
         const STRLEN l = strend - s;
-	dstr = newSVpvn(s, l);
+	dstr = newSVpvn_utf8(s, l, do_utf8);
 	if (make_mortal)
 	    sv_2mortal(dstr);
-	if (do_utf8)
-	    (void)SvUTF8_on(dstr);
 	XPUSHs(dstr);
 	iters++;
     }
diff --git a/pp_hot.c b/pp_hot.c
index 9099c88529..bf8f2fb388 100644
--- a/pp_hot.c
+++ b/pp_hot.c
@@ -2278,10 +2278,8 @@ PP(pp_subst)
       have_a_cow:
 #endif
 	rxtainted |= RX_MATCH_TAINTED(rx);
-	dstr = newSVpvn(m, s-m);
+	dstr = newSVpvn_utf8(m, s-m, DO_UTF8(TARG));
 	SAVEFREESV(dstr);
-	if (DO_UTF8(TARG))
-	    SvUTF8_on(dstr);
 	PL_curpm = pm;
 	if (!c) {
 	    register PERL_CONTEXT *cx;
diff --git a/pp_pack.c b/pp_pack.c
index db8a94e45d..0d456bd569 100644
--- a/pp_pack.c
+++ b/pp_pack.c
@@ -2510,9 +2510,9 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist )
 			       by copying it to a temporary.  */
 			    STRLEN len;
 			    const char *const pv = SvPV_const(*beglist, len);
-			    SV *const temp = sv_2mortal(newSVpvn(pv, len));
-			    if (SvUTF8(*beglist))
-				SvUTF8_on(temp);
+			    SV *const temp
+				= sv_2mortal(newSVpvn_flags(pv, len,
+							    SvUTF8(*beglist)));
 			    *beglist = temp;
 			}
 			count = DO_UTF8(*beglist) ?
diff --git a/proto.h b/proto.h
index 668aea16e5..5bbb5935a3 100644
--- a/proto.h
+++ b/proto.h
@@ -1607,6 +1607,10 @@ PERL_CALLCONV SV*	Perl_newSVpvn(pTHX_ const char* s, STRLEN len)
 			__attribute__malloc__
 			__attribute__warn_unused_result__;
 
+PERL_CALLCONV SV*	Perl_newSVpvn_flags(pTHX_ const char* s, STRLEN len, U32 flags)
+			__attribute__malloc__
+			__attribute__warn_unused_result__;
+
 PERL_CALLCONV SV*	Perl_newSVhek(pTHX_ const HEK *hek)
 			__attribute__malloc__
 			__attribute__warn_unused_result__;
diff --git a/regcomp.c b/regcomp.c
index 8bd18940cf..e384ff5809 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -1232,10 +1232,9 @@ is the recommended Unicode-aware way of saying
         /* store the word for dumping */                        \
         SV* tmp;                                                \
         if (OP(noper) != NOTHING)                               \
-            tmp = newSVpvn(STRING(noper), STR_LEN(noper));      \
+            tmp = newSVpvn_utf8(STRING(noper), STR_LEN(noper), UTF);	\
         else                                                    \
-            tmp = newSVpvn( "", 0 );                            \
-        if ( UTF ) SvUTF8_on( tmp );                            \
+            tmp = newSVpvn_utf8( "", 0, UTF );			\
         av_push( trie_words, tmp );                             \
     });                                                         \
                                                                 \
@@ -3320,9 +3319,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
 			
 			l -= old;
 			/* Get the added string: */
-			last_str = newSVpvn(s  + old, l);
-			if (UTF)
-			    SvUTF8_on(last_str);
+			last_str = newSVpvn_utf8(s  + old, l, UTF);
 			if (deltanext == 0 && pos_before == b) {
 			    /* What was added is a constant string */
 			    if (mincount > 1) {
@@ -5256,10 +5253,8 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags) {
     }
 
     if ( flags ) {
-        SV* sv_name = sv_2mortal(Perl_newSVpvn(aTHX_ name_start,
-            (int)(RExC_parse - name_start)));
-	if (UTF)
-            SvUTF8_on(sv_name);
+        SV* sv_name = sv_2mortal(newSVpvn_utf8(name_start,
+            (int)(RExC_parse - name_start), UTF));
         if ( flags == REG_RSN_RETURN_NAME)
             return sv_name;
         else if (flags==REG_RSN_RETURN_DATA) {
@@ -8193,8 +8188,8 @@ parseit:
 
 				  if (!unicode_alternate)
 				      unicode_alternate = newAV();
-				  sv = newSVpvn((char*)foldbuf, foldlen);
-				  SvUTF8_on(sv);
+				  sv = newSVpvn_utf8((char*)foldbuf, foldlen,
+						     TRUE);
 				  av_push(unicode_alternate, sv);
 			      }
 			 }
diff --git a/sv.c b/sv.c
index 551d458fdd..2d73c7f38e 100644
--- a/sv.c
+++ b/sv.c
@@ -6042,8 +6042,7 @@ Perl_sv_eq(pTHX_ register SV *sv1, register SV *sv2)
 	 * invalidate pv1, so we may need to make a copy */
 	if (sv1 == sv2 && (SvTHINKFIRST(sv1) || SvGMAGICAL(sv1))) {
 	    pv1 = SvPV_const(sv1, cur1);
-	    sv1 = sv_2mortal(newSVpvn(pv1, cur1));
-	    if (SvUTF8(sv2)) SvUTF8_on(sv1);
+	    sv1 = sv_2mortal(newSVpvn_flags(pv1, cur1, SvUTF8(sv2)));
 	}
 	pv1 = SvPV_const(sv1, cur1);
     }
@@ -7068,6 +7067,37 @@ Perl_newSVpvn(pTHX_ const char *s, STRLEN len)
     return sv;
 }
 
+/*
+=for apidoc newSVpvn_flags
+
+Creates a new SV and copies a string into it.  The reference count for the
+SV is set to 1.  Note that if C<len> is zero, Perl will create a zero length
+string.  You are responsible for ensuring that the source string is at least
+C<len> bytes long.  If the C<s> argument is NULL the new SV will be undefined.
+Currently the only flag bit accepted is SVf_UTF8. If this is set, then it
+will be set on the new SV. C<newSVpvn_utf8()> is a convenience wrapper for
+this function, defined as
+
+    #define newSVpvn_utf8(s, len, u)			\
+	newSVpvn_flags((s), (len), (u) ? SVf_UTF8 : 0)
+
+=cut
+*/
+
+SV *
+Perl_newSVpvn_flags(pTHX_ const char *s, STRLEN len, U32 flags)
+{
+    dVAR;
+    register SV *sv;
+
+    /* All the flags we don't support must be zero.
+       And we're new code so I'm going to assert this from the start.  */
+    assert(!(flags & ~SVf_UTF8));
+    new_SV(sv);
+    sv_setpvn(sv,s,len);
+    SvFLAGS(sv) |= flags;
+    return sv;
+}
 
 /*
 =for apidoc newSVhek
diff --git a/sv.h b/sv.h
index df42dcfcc8..9d0851c084 100644
--- a/sv.h
+++ b/sv.h
@@ -1911,6 +1911,17 @@ struct clone_params {
 };
 
 /*
+=for apidoc Am|SV*|newSVpvn_utf8|NULLOK const char* s|STRLEN len|U32 utf8
+
+Creates a new SV and copies a string into it.  If utf8 is true, calls
+C<SvUTF8_on> on the new SV.  Implemented as a wrapper around C<newSVpvn_flags>.
+
+=cut
+*/
+
+#define newSVpvn_utf8(s, len, u) newSVpvn_flags((s), (len), (u) ? SVf_UTF8 : 0)
+
+/*
  * Local variables:
  * c-indentation-style: bsd
  * c-basic-offset: 4
diff --git a/toke.c b/toke.c
index 04190c3b45..410e4d6148 100644
--- a/toke.c
+++ b/toke.c
@@ -1347,9 +1347,9 @@ STATIC SV *
 S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len)
 {
     dVAR;
-    SV * const sv = newSVpvn(start,len);
-    if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len))
-	SvUTF8_on(sv);
+    SV * const sv = newSVpvn_utf8(start, len,
+				  UTF && !IN_BYTES
+				  && is_utf8_string((const U8*)start, len));
     return sv;
 }
 
@@ -1570,9 +1570,7 @@ S_tokeq(pTHX_ SV *sv)
 	goto finish;
     d = s;
     if ( PL_hints & HINT_NEW_STRING ) {
-	pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len));
-	if (SvUTF8(sv))
-	    SvUTF8_on(pv);
+	pv = sv_2mortal(newSVpvn_flags(SvPVX_const(pv), len, SvUTF8(sv)));
     }
     while (s < send) {
 	if (*s == '\\') {
@@ -1639,9 +1637,7 @@ S_sublex_start(pTHX)
 	    /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
 	    STRLEN len;
 	    const char * const p = SvPV_const(sv, len);
-	    SV * const nsv = newSVpvn(p, len);
-	    if (SvUTF8(sv))
-		SvUTF8_on(nsv);
+	    SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
 	    SvREFCNT_dec(sv);
 	    sv = nsv;
 	}
@@ -6347,9 +6343,7 @@ Perl_yylex(pTHX)
 			    for (; !isSPACE(*d) && len; --len, ++d)
 				/**/;
 			}
-			sv = newSVpvn(b, d-b);
-			if (DO_UTF8(PL_lex_stuff))
-			    SvUTF8_on(sv);
+			sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
 			words = append_elem(OP_LIST, words,
 					    newSVOP(OP_CONST, 0, tokeq(sv)));
 		    }
diff --git a/util.c b/util.c
index 1710e6f6e3..6c7e338e83 100644
--- a/util.c
+++ b/util.c
@@ -1270,8 +1270,7 @@ S_vdie_common(pTHX_ const char *message, STRLEN msglen, I32 utf8, bool warn)
 	    *hook = NULL;
 	}
 	if (warn || message) {
-	    msg = newSVpvn(message, msglen);
-	    SvFLAGS(msg) |= utf8;
+	    msg = newSVpvn_flags(message, msglen, utf8);
 	    SvREADONLY_on(msg);
 	    SAVEFREESV(msg);
 	}
author	Nicholas Clark <nick@ccl4.org>	2008-01-02 23:41:21 +0000
committer	Nicholas Clark <nick@ccl4.org>	2008-01-02 23:41:21 +0000
commit	740cce10afff4bec3346f61ab3d0f7bfa424948c (patch)
tree	83c031300e2326e8d8ae399e088c4d3a42472a13
parent	e350b669f3dadb9da757b62a20659cbc7eca2190 (diff)
download	perl-740cce10afff4bec3346f61ab3d0f7bfa424948c.tar.gz