diff options
author | Nicholas Clark <nick@ccl4.org> | 2008-01-02 23:41:21 +0000 |
---|---|---|
committer | Nicholas Clark <nick@ccl4.org> | 2008-01-02 23:41:21 +0000 |
commit | 740cce10afff4bec3346f61ab3d0f7bfa424948c (patch) | |
tree | 83c031300e2326e8d8ae399e088c4d3a42472a13 | |
parent | e350b669f3dadb9da757b62a20659cbc7eca2190 (diff) | |
download | perl-740cce10afff4bec3346f61ab3d0f7bfa424948c.tar.gz |
Add a new function newSVpvn_flags(), which takes a third parameter of
flag bits. Right now the only flag bit is SVf_UTF8, which will call
SvUTF8_on() on the new SV for you. Provide a wrapper newSVpvn_utf8(),
which takes a boolean, and passes in SVf_UTF8 if that is true.
Refactor the core to use it where possible. It makes the source code
clearer and smaller, but seems to be swings and roundabouts on object
code size.
p4raw-id: //depot/perl@32807
-rw-r--r-- | embed.fnc | 1 | ||||
-rw-r--r-- | embed.h | 6 | ||||
-rw-r--r-- | hv.c | 18 | ||||
-rw-r--r-- | perl.c | 3 | ||||
-rw-r--r-- | pod/perlapi.pod | 30 | ||||
-rw-r--r-- | pp.c | 39 | ||||
-rw-r--r-- | pp_hot.c | 4 | ||||
-rw-r--r-- | pp_pack.c | 6 | ||||
-rw-r--r-- | proto.h | 4 | ||||
-rw-r--r-- | regcomp.c | 19 | ||||
-rw-r--r-- | sv.c | 34 | ||||
-rw-r--r-- | sv.h | 11 | ||||
-rw-r--r-- | toke.c | 18 | ||||
-rw-r--r-- | util.c | 3 |
14 files changed, 119 insertions, 77 deletions
@@ -590,6 +590,7 @@ Apda |SV* |newSVuv |UV u Apda |SV* |newSVnv |NV n Apda |SV* |newSVpv |NULLOK const char* s|STRLEN len Apda |SV* |newSVpvn |NULLOK const char* s|STRLEN len +Apda |SV* |newSVpvn_flags |NULLOK const char* s|STRLEN len|U32 flags Apda |SV* |newSVhek |NULLOK const HEK *hek Apda |SV* |newSVpvn_share |NULLOK const char* s|I32 len|U32 hash Afpda |SV* |newSVpvf |NN const char* pat|... @@ -572,7 +572,10 @@ #define newSVnv Perl_newSVnv #define newSVpv Perl_newSVpv #define newSVpvn Perl_newSVpvn +#define newSVpvn_flags Perl_newSVpvn_flags +#ifdef PERL_CORE #define newSVhek Perl_newSVhek +#endif #define newSVpvn_share Perl_newSVpvn_share #define newSVpvf Perl_newSVpvf #define vnewSVpvf Perl_vnewSVpvf @@ -2865,7 +2868,10 @@ #define newSVnv(a) Perl_newSVnv(aTHX_ a) #define newSVpv(a,b) Perl_newSVpv(aTHX_ a,b) #define newSVpvn(a,b) Perl_newSVpvn(aTHX_ a,b) +#define newSVpvn_flags(a,b,c) Perl_newSVpvn_flags(aTHX_ a,b,c) +#ifdef PERL_CORE #define newSVhek(a) Perl_newSVhek(aTHX_ a) +#endif #define newSVpvn_share(a,b,c) Perl_newSVpvn_share(aTHX_ a,b,c) #define vnewSVpvf(a,b) Perl_vnewSVpvf(aTHX_ a,b) #define newSVrv(a,b) Perl_newSVrv(aTHX_ a,b) @@ -350,9 +350,8 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, SV* obj = mg->mg_obj; if (!keysv) { - keysv = sv_2mortal(newSVpvn(key, klen)); - if (flags & HVhek_UTF8) - SvUTF8_on(keysv); + keysv = sv_2mortal(newSVpvn_utf8(key, klen, + flags & HVhek_UTF8)); } mg->mg_obj = keysv; /* pass key */ @@ -391,11 +390,8 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, /* FIXME should be able to skimp on the HE/HEK here when HV_FETCH_JUST_SV is true. */ if (!keysv) { - keysv = newSVpvn(key, klen); - if (is_utf8) { - SvUTF8_on(keysv); - } - } else { + keysv = newSVpvn_utf8(key, klen, is_utf8); + } else { keysv = newSVsv(keysv); } sv = sv_newmortal(); @@ -472,8 +468,7 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, if (keysv || is_utf8) { if (!keysv) { - keysv = newSVpvn(key, klen); - SvUTF8_on(keysv); + keysv = newSVpvn_utf8(key, klen, TRUE); } else { keysv = newSVsv(keysv); } @@ -515,8 +510,7 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, const bool save_taint = PL_tainted; if (keysv || is_utf8) { if (!keysv) { - keysv = newSVpvn(key, klen); - SvUTF8_on(keysv); + keysv = newSVpvn_utf8(key, klen, TRUE); } if (PL_tainting) PL_tainted = SvTAINTED(keysv); @@ -2481,8 +2481,7 @@ Perl_get_cvn_flags(pTHX_ const char *name, STRLEN len, I32 flags) * It has the same effect as "sub name;", i.e. just a forward * declaration! */ if ((flags & ~GV_NOADD_MASK) && !GvCVu(gv)) { - SV *const sv = newSVpvn(name,len); - SvFLAGS(sv) |= flags & SVf_UTF8; + SV *const sv = newSVpvn_flags(name, len, flags & SVf_UTF8); return newSUB(start_subparse(FALSE, 0), newSVOP(OP_CONST, 0, sv), NULL, NULL); diff --git a/pod/perlapi.pod b/pod/perlapi.pod index df9572a87f..b7cb7b30d0 100644 --- a/pod/perlapi.pod +++ b/pod/perlapi.pod @@ -3918,6 +3918,17 @@ incremented. =for hackers Found in file sv.h +=item newSVpvn_utf8 +X<newSVpvn_utf8> + +Creates a new SV and copies a string into it. If utf8 is true, calls +C<SvUTF8_on> on the new SV. Implemented as a wrapper around C<newSVpvn_flags>. + + SV* newSVpvn_utf8(NULLOK const char* s, STRLEN len, U32 utf8) + +=for hackers +Found in file sv.h + =item SvCUR X<SvCUR> @@ -5137,6 +5148,25 @@ C<len> bytes long. If the C<s> argument is NULL the new SV will be undefined. =for hackers Found in file sv.c +=item newSVpvn_flags +X<newSVpvn_flags> + +Creates a new SV and copies a string into it. The reference count for the +SV is set to 1. Note that if C<len> is zero, Perl will create a zero length +string. You are responsible for ensuring that the source string is at least +C<len> bytes long. If the C<s> argument is NULL the new SV will be undefined. +Currently the only flag bit accepted is SVf_UTF8. If this is set, then it +will be set on the new SV. C<newSVpvn_utf8()> is a convenience wrapper for +this function, defined as + + #define newSVpvn_utf8(s, len, u) \ + newSVpvn_flags((s), (len), (u) ? SVf_UTF8 : 0) + + SV* newSVpvn_flags(const char* s, STRLEN len, U32 flags) + +=for hackers +Found in file sv.c + =item newSVpvn_share X<newSVpvn_share> @@ -3312,9 +3312,7 @@ PP(pp_index) Otherwise I need to avoid calls to sv_pos_u2b(), which (dangerously) will trigger magic and overloading again, as will fbm_instr() */ - big = sv_2mortal(newSVpvn(big_p, biglen)); - if (big_utf8) - SvUTF8_on(big); + big = sv_2mortal(newSVpvn_utf8(big_p, biglen, big_utf8)); big_p = SvPVX(big); } if (SvGAMAGIC(little) || (is_index && !SvOK(little))) { @@ -3326,9 +3324,7 @@ PP(pp_index) This is all getting to messy. The API isn't quite clean enough, because data access has side effects. */ - little = sv_2mortal(newSVpvn(little_p, llen)); - if (little_utf8) - SvUTF8_on(little); + little = sv_2mortal(newSVpvn_utf8(little_p, llen, little_utf8)); little_p = SvPVX(little); } @@ -4755,11 +4751,9 @@ PP(pp_split) if (m >= strend) break; - dstr = newSVpvn(s, m-s); + dstr = newSVpvn_utf8(s, m-s, do_utf8); if (make_mortal) sv_2mortal(dstr); - if (do_utf8) - (void)SvUTF8_on(dstr); XPUSHs(dstr); /* skip the whitespace found last */ @@ -4788,11 +4782,9 @@ PP(pp_split) m++; if (m >= strend) break; - dstr = newSVpvn(s, m-s); + dstr = newSVpvn_utf8(s, m-s, do_utf8); if (make_mortal) sv_2mortal(dstr); - if (do_utf8) - (void)SvUTF8_on(dstr); XPUSHs(dstr); s = m; } @@ -4817,12 +4809,11 @@ PP(pp_split) /* keep track of how many bytes we skip over */ m = s; s += UTF8SKIP(s); - dstr = newSVpvn(m, s-m); + dstr = newSVpvn_utf8(m, s-m, TRUE); if (make_mortal) sv_2mortal(dstr); - (void)SvUTF8_on(dstr); PUSHs(dstr); if (s >= strend) @@ -4859,11 +4850,9 @@ PP(pp_split) ; if (m >= strend) break; - dstr = newSVpvn(s, m-s); + dstr = newSVpvn_utf8(s, m-s, do_utf8); if (make_mortal) sv_2mortal(dstr); - if (do_utf8) - (void)SvUTF8_on(dstr); XPUSHs(dstr); /* The rx->minlen is in characters but we want to step * s ahead by bytes. */ @@ -4878,11 +4867,9 @@ PP(pp_split) (m = fbm_instr((unsigned char*)s, (unsigned char*)strend, csv, multiline ? FBMrf_MULTILINE : 0)) ) { - dstr = newSVpvn(s, m-s); + dstr = newSVpvn_utf8(s, m-s, do_utf8); if (make_mortal) sv_2mortal(dstr); - if (do_utf8) - (void)SvUTF8_on(dstr); XPUSHs(dstr); /* The rx->minlen is in characters but we want to step * s ahead by bytes. */ @@ -4913,11 +4900,9 @@ PP(pp_split) strend = s + (strend - m); } m = RX_OFFS(rx)[0].start + orig; - dstr = newSVpvn(s, m-s); + dstr = newSVpvn_utf8(s, m-s, do_utf8); if (make_mortal) sv_2mortal(dstr); - if (do_utf8) - (void)SvUTF8_on(dstr); XPUSHs(dstr); if (RX_NPARENS(rx)) { I32 i; @@ -4929,14 +4914,12 @@ PP(pp_split) parens that didn't match -- they should be set to undef, not the empty string */ if (m >= orig && s >= orig) { - dstr = newSVpvn(s, m-s); + dstr = newSVpvn_utf8(s, m-s, do_utf8); } else dstr = &PL_sv_undef; /* undef, not "" */ if (make_mortal) sv_2mortal(dstr); - if (do_utf8) - (void)SvUTF8_on(dstr); XPUSHs(dstr); } } @@ -4951,11 +4934,9 @@ PP(pp_split) /* keep field after final delim? */ if (s < strend || (iters && origlimit)) { const STRLEN l = strend - s; - dstr = newSVpvn(s, l); + dstr = newSVpvn_utf8(s, l, do_utf8); if (make_mortal) sv_2mortal(dstr); - if (do_utf8) - (void)SvUTF8_on(dstr); XPUSHs(dstr); iters++; } @@ -2278,10 +2278,8 @@ PP(pp_subst) have_a_cow: #endif rxtainted |= RX_MATCH_TAINTED(rx); - dstr = newSVpvn(m, s-m); + dstr = newSVpvn_utf8(m, s-m, DO_UTF8(TARG)); SAVEFREESV(dstr); - if (DO_UTF8(TARG)) - SvUTF8_on(dstr); PL_curpm = pm; if (!c) { register PERL_CONTEXT *cx; @@ -2510,9 +2510,9 @@ S_pack_rec(pTHX_ SV *cat, tempsym_t* symptr, SV **beglist, SV **endlist ) by copying it to a temporary. */ STRLEN len; const char *const pv = SvPV_const(*beglist, len); - SV *const temp = sv_2mortal(newSVpvn(pv, len)); - if (SvUTF8(*beglist)) - SvUTF8_on(temp); + SV *const temp + = sv_2mortal(newSVpvn_flags(pv, len, + SvUTF8(*beglist))); *beglist = temp; } count = DO_UTF8(*beglist) ? @@ -1607,6 +1607,10 @@ PERL_CALLCONV SV* Perl_newSVpvn(pTHX_ const char* s, STRLEN len) __attribute__malloc__ __attribute__warn_unused_result__; +PERL_CALLCONV SV* Perl_newSVpvn_flags(pTHX_ const char* s, STRLEN len, U32 flags) + __attribute__malloc__ + __attribute__warn_unused_result__; + PERL_CALLCONV SV* Perl_newSVhek(pTHX_ const HEK *hek) __attribute__malloc__ __attribute__warn_unused_result__; @@ -1232,10 +1232,9 @@ is the recommended Unicode-aware way of saying /* store the word for dumping */ \ SV* tmp; \ if (OP(noper) != NOTHING) \ - tmp = newSVpvn(STRING(noper), STR_LEN(noper)); \ + tmp = newSVpvn_utf8(STRING(noper), STR_LEN(noper), UTF); \ else \ - tmp = newSVpvn( "", 0 ); \ - if ( UTF ) SvUTF8_on( tmp ); \ + tmp = newSVpvn_utf8( "", 0, UTF ); \ av_push( trie_words, tmp ); \ }); \ \ @@ -3320,9 +3319,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, l -= old; /* Get the added string: */ - last_str = newSVpvn(s + old, l); - if (UTF) - SvUTF8_on(last_str); + last_str = newSVpvn_utf8(s + old, l, UTF); if (deltanext == 0 && pos_before == b) { /* What was added is a constant string */ if (mincount > 1) { @@ -5256,10 +5253,8 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags) { } if ( flags ) { - SV* sv_name = sv_2mortal(Perl_newSVpvn(aTHX_ name_start, - (int)(RExC_parse - name_start))); - if (UTF) - SvUTF8_on(sv_name); + SV* sv_name = sv_2mortal(newSVpvn_utf8(name_start, + (int)(RExC_parse - name_start), UTF)); if ( flags == REG_RSN_RETURN_NAME) return sv_name; else if (flags==REG_RSN_RETURN_DATA) { @@ -8193,8 +8188,8 @@ parseit: if (!unicode_alternate) unicode_alternate = newAV(); - sv = newSVpvn((char*)foldbuf, foldlen); - SvUTF8_on(sv); + sv = newSVpvn_utf8((char*)foldbuf, foldlen, + TRUE); av_push(unicode_alternate, sv); } } @@ -6042,8 +6042,7 @@ Perl_sv_eq(pTHX_ register SV *sv1, register SV *sv2) * invalidate pv1, so we may need to make a copy */ if (sv1 == sv2 && (SvTHINKFIRST(sv1) || SvGMAGICAL(sv1))) { pv1 = SvPV_const(sv1, cur1); - sv1 = sv_2mortal(newSVpvn(pv1, cur1)); - if (SvUTF8(sv2)) SvUTF8_on(sv1); + sv1 = sv_2mortal(newSVpvn_flags(pv1, cur1, SvUTF8(sv2))); } pv1 = SvPV_const(sv1, cur1); } @@ -7068,6 +7067,37 @@ Perl_newSVpvn(pTHX_ const char *s, STRLEN len) return sv; } +/* +=for apidoc newSVpvn_flags + +Creates a new SV and copies a string into it. The reference count for the +SV is set to 1. Note that if C<len> is zero, Perl will create a zero length +string. You are responsible for ensuring that the source string is at least +C<len> bytes long. If the C<s> argument is NULL the new SV will be undefined. +Currently the only flag bit accepted is SVf_UTF8. If this is set, then it +will be set on the new SV. C<newSVpvn_utf8()> is a convenience wrapper for +this function, defined as + + #define newSVpvn_utf8(s, len, u) \ + newSVpvn_flags((s), (len), (u) ? SVf_UTF8 : 0) + +=cut +*/ + +SV * +Perl_newSVpvn_flags(pTHX_ const char *s, STRLEN len, U32 flags) +{ + dVAR; + register SV *sv; + + /* All the flags we don't support must be zero. + And we're new code so I'm going to assert this from the start. */ + assert(!(flags & ~SVf_UTF8)); + new_SV(sv); + sv_setpvn(sv,s,len); + SvFLAGS(sv) |= flags; + return sv; +} /* =for apidoc newSVhek @@ -1911,6 +1911,17 @@ struct clone_params { }; /* +=for apidoc Am|SV*|newSVpvn_utf8|NULLOK const char* s|STRLEN len|U32 utf8 + +Creates a new SV and copies a string into it. If utf8 is true, calls +C<SvUTF8_on> on the new SV. Implemented as a wrapper around C<newSVpvn_flags>. + +=cut +*/ + +#define newSVpvn_utf8(s, len, u) newSVpvn_flags((s), (len), (u) ? SVf_UTF8 : 0) + +/* * Local variables: * c-indentation-style: bsd * c-basic-offset: 4 @@ -1347,9 +1347,9 @@ STATIC SV * S_newSV_maybe_utf8(pTHX_ const char *start, STRLEN len) { dVAR; - SV * const sv = newSVpvn(start,len); - if (UTF && !IN_BYTES && is_utf8_string((const U8*)start, len)) - SvUTF8_on(sv); + SV * const sv = newSVpvn_utf8(start, len, + UTF && !IN_BYTES + && is_utf8_string((const U8*)start, len)); return sv; } @@ -1570,9 +1570,7 @@ S_tokeq(pTHX_ SV *sv) goto finish; d = s; if ( PL_hints & HINT_NEW_STRING ) { - pv = sv_2mortal(newSVpvn(SvPVX_const(pv), len)); - if (SvUTF8(sv)) - SvUTF8_on(pv); + pv = sv_2mortal(newSVpvn_flags(SvPVX_const(pv), len, SvUTF8(sv))); } while (s < send) { if (*s == '\\') { @@ -1639,9 +1637,7 @@ S_sublex_start(pTHX) /* Overloaded constants, nothing fancy: Convert to SVt_PV: */ STRLEN len; const char * const p = SvPV_const(sv, len); - SV * const nsv = newSVpvn(p, len); - if (SvUTF8(sv)) - SvUTF8_on(nsv); + SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv)); SvREFCNT_dec(sv); sv = nsv; } @@ -6347,9 +6343,7 @@ Perl_yylex(pTHX) for (; !isSPACE(*d) && len; --len, ++d) /**/; } - sv = newSVpvn(b, d-b); - if (DO_UTF8(PL_lex_stuff)) - SvUTF8_on(sv); + sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff)); words = append_elem(OP_LIST, words, newSVOP(OP_CONST, 0, tokeq(sv))); } @@ -1270,8 +1270,7 @@ S_vdie_common(pTHX_ const char *message, STRLEN msglen, I32 utf8, bool warn) *hook = NULL; } if (warn || message) { - msg = newSVpvn(message, msglen); - SvFLAGS(msg) |= utf8; + msg = newSVpvn_flags(message, msglen, utf8); SvREADONLY_on(msg); SAVEFREESV(msg); } |