#line 2 "op.c" /* op.c * * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. * */ /* * 'You see: Mr. Drogo, he married poor Miss Primula Brandybuck. She was * our Mr. Bilbo's first cousin on the mother's side (her mother being the * youngest of the Old Took's daughters); and Mr. Drogo was his second * cousin. So Mr. Frodo is his first *and* second cousin, once removed * either way, as the saying is, if you follow me.' --the Gaffer * * [p.23 of _The Lord of the Rings_, I/i: "A Long-Expected Party"] */ /* This file contains the functions that create, manipulate and optimize * the OP structures that hold a compiled perl program. * * Note that during the build of miniperl, a temporary copy of this file * is made, called opmini.c. * * A Perl program is compiled into a tree of OP nodes. Each op contains: * * structural OP pointers to its children and siblings (op_sibling, * op_first etc) that define the tree structure; * * execution order OP pointers (op_next, plus sometimes op_other, * op_lastop etc) that define the execution sequence plus variants; * * a pointer to the C "pp" function that would execute the op; * * any data specific to that op. * For example, an OP_CONST op points to the pp_const() function and to an * SV containing the constant value. When pp_const() is executed, its job * is to push that SV onto the stack. * * OPs are mainly created by the newFOO() functions, which are mainly * called from the parser (in perly.y) as the code is parsed. For example * the Perl code $a + $b * $c would cause the equivalent of the following * to be called (oversimplifying a bit): * * newBINOP(OP_ADD, flags, * newSVREF($a), * newBINOP(OP_MULTIPLY, flags, newSVREF($b), newSVREF($c)) * ) * * As the parser reduces low-level rules, it creates little op subtrees; * as higher-level rules are resolved, these subtrees get joined together * as branches on a bigger subtree, until eventually a top-level rule like * a subroutine definition is reduced, at which point there is one large * parse tree left. * * The execution order pointers (op_next) are generated as the subtrees * are joined together. Consider this sub-expression: A*B + C/D: at the * point when it's just been parsed, the op tree looks like: * * [+] * | * [*]------[/] * | | * A---B C---D * * with the intended execution order being: * * [PREV] => A => B => [*] => C => D => [/] => [+] => [NEXT] * * At this point all the nodes' op_next pointers will have been set, * except that: * * we don't know what the [NEXT] node will be yet; * * we don't know what the [PREV] node will be yet, but when it gets * created and needs its op_next set, it needs to be set to point to * A, which is non-obvious. * To handle both those cases, we temporarily set the top node's * op_next to point to the first node to be executed in this subtree (A in * this case). This means that initially a subtree's op_next chain, * starting from the top node, will visit each node in execution sequence * then point back at the top node. * When we embed this subtree in a larger tree, its top op_next is used * to get the start node, then is set to point to its new neighbour. * For example the two separate [*],A,B and [/],C,D subtrees would * initially have had: * [*] => A; A => B; B => [*] * and * [/] => C; C => D; D => [/] * When these two subtrees were joined together to make the [+] subtree, * [+]'s op_next was set to [*]'s op_next, i.e. A; then [*]'s op_next was * set to point to [/]'s op_next, i.e. C. * * This op_next linking is done by the LINKLIST() macro and its underlying * op_linklist() function. Given a top-level op, if its op_next is * non-null, it's already been linked, so leave it. Otherwise link it with * its children as described above, possibly recursively if any of the * children have a null op_next. * * In summary: given a subtree, its top-level node's op_next will either * be: * NULL: the subtree hasn't been LINKLIST()ed yet; * fake: points to the start op for this subtree; * real: once the subtree has been embedded into a larger tree */ /* Here's an older description from Larry. Perl's compiler is essentially a 3-pass compiler with interleaved phases: A bottom-up pass A top-down pass An execution-order pass The bottom-up pass is represented by all the "newOP" routines and the ck_ routines. The bottom-upness is actually driven by yacc. So at the point that a ck_ routine fires, we have no idea what the context is, either upward in the syntax tree, or either forward or backward in the execution order. (The bottom-up parser builds that part of the execution order it knows about, but if you follow the "next" links around, you'll find it's actually a closed loop through the top level node.) Whenever the bottom-up parser gets to a node that supplies context to its components, it invokes that portion of the top-down pass that applies to that part of the subtree (and marks the top node as processed, so if a node further up supplies context, it doesn't have to take the plunge again). As a particular subcase of this, as the new node is built, it takes all the closed execution loops of its subcomponents and links them into a new closed loop for the higher level node. But it's still not the real execution order. The actual execution order is not known till we get a grammar reduction to a top-level unit like a subroutine or file that will be called by "name" rather than via a "next" pointer. At that point, we can call into peep() to do that code's portion of the 3rd pass. It has to be recursive, but it's recursive on basic blocks, not on tree nodes. */ /* To implement user lexical pragmas, there needs to be a way at run time to get the compile time state of %^H for that block. Storing %^H in every block (or even COP) would be very expensive, so a different approach is taken. The (running) state of %^H is serialised into a tree of HE-like structs. Stores into %^H are chained onto the current leaf as a struct refcounted_he * with the key and the value. Deletes from %^H are saved with a value of PL_sv_placeholder. The state of %^H at any point can be turned back into a regular HV by walking back up the tree from that point's leaf, ignoring any key you've already seen (placeholder or not), storing the rest into the HV structure, then removing the placeholders. Hence memory is only used to store the %^H deltas from the enclosing COP, rather than the entire %^H on each COP. To cause actions on %^H to write out the serialisation records, it has magic type 'H'. This magic (itself) does nothing, but its presence causes the values to gain magic type 'h', which has entries for set and clear. C updates C with a store record, with deletes written by C. C saves the current C on the save stack, so that it will be correctly restored when any inner compiling scope is exited. */ #include "EXTERN.h" #define PERL_IN_OP_C #include "perl.h" #include "keywords.h" #include "feature.h" #include "regcomp.h" #include "invlist_inline.h" #define CALL_PEEP(o) PL_peepp(aTHX_ o) #define CALL_RPEEP(o) PL_rpeepp(aTHX_ o) #define CALL_OPFREEHOOK(o) if (PL_opfreehook) PL_opfreehook(aTHX_ o) static const char array_passed_to_stat[] = "Array passed to stat will be coerced to a scalar"; /* remove any leading "empty" ops from the op_next chain whose first * node's address is stored in op_p. Store the updated address of the * first node in op_p. */ STATIC void S_prune_chain_head(OP** op_p) { while (*op_p && ( (*op_p)->op_type == OP_NULL || (*op_p)->op_type == OP_SCOPE || (*op_p)->op_type == OP_SCALAR || (*op_p)->op_type == OP_LINESEQ) ) *op_p = (*op_p)->op_next; } /* See the explanatory comments above struct opslab in op.h. */ #ifdef PERL_DEBUG_READONLY_OPS # define PERL_SLAB_SIZE 128 # define PERL_MAX_SLAB_SIZE 4096 # include #endif #ifndef PERL_SLAB_SIZE # define PERL_SLAB_SIZE 64 #endif #ifndef PERL_MAX_SLAB_SIZE # define PERL_MAX_SLAB_SIZE 2048 #endif /* rounds up to nearest pointer */ #define SIZE_TO_PSIZE(x) (((x) + sizeof(I32 *) - 1)/sizeof(I32 *)) #define DIFF(o,p) ((size_t)((I32 **)(p) - (I32**)(o))) /* requires double parens and aTHX_ */ #define DEBUG_S_warn(args) \ DEBUG_S( \ PerlIO_printf(Perl_debug_log, "%s", SvPVx_nolen(Perl_mess args)) \ ) /* malloc a new op slab (suitable for attaching to PL_compcv). * sz is in units of pointers */ static OPSLAB * S_new_slab(pTHX_ OPSLAB *head, size_t sz) { OPSLAB *slab; /* opslot_offset is only U16 */ assert(sz < U16_MAX); #ifdef PERL_DEBUG_READONLY_OPS slab = (OPSLAB *) mmap(0, sz * sizeof(I32 *), PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0); DEBUG_m(PerlIO_printf(Perl_debug_log, "mapped %lu at %p\n", (unsigned long) sz, slab)); if (slab == MAP_FAILED) { perror("mmap failed"); abort(); } #else slab = (OPSLAB *)PerlMemShared_calloc(sz, sizeof(I32 *)); #endif slab->opslab_size = (U16)sz; #ifndef WIN32 /* The context is unused in non-Windows */ PERL_UNUSED_CONTEXT; #endif slab->opslab_free_space = sz - DIFF(slab, &slab->opslab_slots); slab->opslab_head = head ? head : slab; DEBUG_S_warn((aTHX_ "allocated new op slab sz 0x%x, %p, head slab %p", (unsigned int)slab->opslab_size, (void*)slab, (void*)(slab->opslab_head))); return slab; } /* Returns a sz-sized block of memory (suitable for holding an op) from * a free slot in the chain of op slabs attached to PL_compcv. * Allocates a new slab if necessary. * if PL_compcv isn't compiling, malloc() instead. */ void * Perl_Slab_Alloc(pTHX_ size_t sz) { OPSLAB *head_slab; /* first slab in the chain */ OPSLAB *slab2; OPSLOT *slot; OP *o; size_t opsz; /* We only allocate ops from the slab during subroutine compilation. We find the slab via PL_compcv, hence that must be non-NULL. It could also be pointing to a subroutine which is now fully set up (CvROOT() pointing to the top of the optree for that sub), or a subroutine which isn't using the slab allocator. If our sanity checks aren't met, don't use a slab, but allocate the OP directly from the heap. */ if (!PL_compcv || CvROOT(PL_compcv) || (CvSTART(PL_compcv) && !CvSLABBED(PL_compcv))) { o = (OP*)PerlMemShared_calloc(1, sz); goto gotit; } /* While the subroutine is under construction, the slabs are accessed via CvSTART(), to avoid needing to expand PVCV by one pointer for something unneeded at runtime. Once a subroutine is constructed, the slabs are accessed via CvROOT(). So if CvSTART() is NULL, no slab has been allocated yet. See the commit message for 8be227ab5eaa23f2 for more details. */ if (!CvSTART(PL_compcv)) { CvSTART(PL_compcv) = (OP *)(head_slab = S_new_slab(aTHX_ NULL, PERL_SLAB_SIZE)); CvSLABBED_on(PL_compcv); head_slab->opslab_refcnt = 2; /* one for the CV; one for the new OP */ } else ++(head_slab = (OPSLAB *)CvSTART(PL_compcv))->opslab_refcnt; opsz = SIZE_TO_PSIZE(sz); sz = opsz + OPSLOT_HEADER_P; /* The slabs maintain a free list of OPs. In particular, constant folding will free up OPs, so it makes sense to re-use them where possible. A freed up slot is used in preference to a new allocation. */ if (head_slab->opslab_freed) { OP **too = &head_slab->opslab_freed; o = *too; DEBUG_S_warn((aTHX_ "found free op at %p, slab %p, head slab %p", (void*)o, (I32**)OpSLOT(o) - OpSLOT(o)->opslot_offset, (void*)head_slab)); while (o && OpSLOT(o)->opslot_size < sz) { DEBUG_S_warn((aTHX_ "Alas! too small")); o = *(too = &o->op_next); if (o) { DEBUG_S_warn((aTHX_ "found another free op at %p", (void*)o)); } } if (o) { DEBUG_S_warn((aTHX_ "realloced op at %p, slab %p, head slab %p", (void*)o, (I32**)OpSLOT(o) - OpSLOT(o)->opslot_offset, (void*)head_slab)); *too = o->op_next; Zero(o, opsz, I32 *); o->op_slabbed = 1; goto gotit; } } #define INIT_OPSLOT(s) \ slot->opslot_offset = DIFF(slab2, slot) ; \ slot->opslot_size = s; \ slab2->opslab_free_space -= s; \ o = &slot->opslot_op; \ o->op_slabbed = 1 /* The partially-filled slab is next in the chain. */ slab2 = head_slab->opslab_next ? head_slab->opslab_next : head_slab; if (slab2->opslab_free_space < sz) { /* Remaining space is too small. */ /* If we can fit a BASEOP, add it to the free chain, so as not to waste it. */ if (slab2->opslab_free_space >= SIZE_TO_PSIZE(sizeof(OP)) + OPSLOT_HEADER_P) { slot = &slab2->opslab_slots; INIT_OPSLOT(slab2->opslab_free_space); o->op_type = OP_FREED; o->op_next = head_slab->opslab_freed; head_slab->opslab_freed = o; } /* Create a new slab. Make this one twice as big. */ slab2 = S_new_slab(aTHX_ head_slab, slab2->opslab_size > PERL_MAX_SLAB_SIZE / 2 ? PERL_MAX_SLAB_SIZE : slab2->opslab_size * 2); slab2->opslab_next = head_slab->opslab_next; head_slab->opslab_next = slab2; } assert(slab2->opslab_size >= sz); /* Create a new op slot */ slot = (OPSLOT *) ((I32 **)&slab2->opslab_slots + slab2->opslab_free_space - sz); assert(slot >= &slab2->opslab_slots); INIT_OPSLOT(sz); DEBUG_S_warn((aTHX_ "allocating op at %p, slab %p, head slab %p", (void*)o, (void*)slab2, (void*)head_slab)); gotit: /* moresib == 0, op_sibling == 0 implies a solitary unattached op */ assert(!o->op_moresib); assert(!o->op_sibparent); return (void *)o; } #undef INIT_OPSLOT #ifdef PERL_DEBUG_READONLY_OPS void Perl_Slab_to_ro(pTHX_ OPSLAB *slab) { PERL_ARGS_ASSERT_SLAB_TO_RO; if (slab->opslab_readonly) return; slab->opslab_readonly = 1; for (; slab; slab = slab->opslab_next) { /*DEBUG_U(PerlIO_printf(Perl_debug_log,"mprotect ->ro %lu at %p\n", (unsigned long) slab->opslab_size, slab));*/ if (mprotect(slab, slab->opslab_size * sizeof(I32 *), PROT_READ)) Perl_warn(aTHX_ "mprotect for %p %lu failed with %d", slab, (unsigned long)slab->opslab_size, errno); } } void Perl_Slab_to_rw(pTHX_ OPSLAB *const slab) { OPSLAB *slab2; PERL_ARGS_ASSERT_SLAB_TO_RW; if (!slab->opslab_readonly) return; slab2 = slab; for (; slab2; slab2 = slab2->opslab_next) { /*DEBUG_U(PerlIO_printf(Perl_debug_log,"mprotect ->rw %lu at %p\n", (unsigned long) size, slab2));*/ if (mprotect((void *)slab2, slab2->opslab_size * sizeof(I32 *), PROT_READ|PROT_WRITE)) { Perl_warn(aTHX_ "mprotect RW for %p %lu failed with %d", slab, (unsigned long)slab2->opslab_size, errno); } } slab->opslab_readonly = 0; } #else # define Slab_to_rw(op) NOOP #endif /* This cannot possibly be right, but it was copied from the old slab allocator, to which it was originally added, without explanation, in commit 083fcd5. */ #ifdef NETWARE # define PerlMemShared PerlMem #endif /* make freed ops die if they're inadvertently executed */ #ifdef DEBUGGING static OP * S_pp_freed(pTHX) { DIE(aTHX_ "panic: freed op 0x%p called\n", PL_op); } #endif /* Return the block of memory used by an op to the free list of * the OP slab associated with that op. */ void Perl_Slab_Free(pTHX_ void *op) { OP * const o = (OP *)op; OPSLAB *slab; PERL_ARGS_ASSERT_SLAB_FREE; #ifdef DEBUGGING o->op_ppaddr = S_pp_freed; #endif if (!o->op_slabbed) { if (!o->op_static) PerlMemShared_free(op); return; } slab = OpSLAB(o); /* If this op is already freed, our refcount will get screwy. */ assert(o->op_type != OP_FREED); o->op_type = OP_FREED; o->op_next = slab->opslab_freed; slab->opslab_freed = o; DEBUG_S_warn((aTHX_ "freeing op at %p, slab %p, head slab %p", (void*)o, (I32**)OpSLOT(o) - OpSLOT(o)->opslot_offset, (void*)slab)); OpslabREFCNT_dec_padok(slab); } void Perl_opslab_free_nopad(pTHX_ OPSLAB *slab) { const bool havepad = !!PL_comppad; PERL_ARGS_ASSERT_OPSLAB_FREE_NOPAD; if (havepad) { ENTER; PAD_SAVE_SETNULLPAD(); } opslab_free(slab); if (havepad) LEAVE; } /* Free a chain of OP slabs. Should only be called after all ops contained * in it have been freed. At this point, its reference count should be 1, * because OpslabREFCNT_dec() skips doing rc-- when it detects that rc == 1, * and just directly calls opslab_free(). * (Note that the reference count which PL_compcv held on the slab should * have been removed once compilation of the sub was complete). * * */ void Perl_opslab_free(pTHX_ OPSLAB *slab) { OPSLAB *slab2; PERL_ARGS_ASSERT_OPSLAB_FREE; PERL_UNUSED_CONTEXT; DEBUG_S_warn((aTHX_ "freeing slab %p", (void*)slab)); assert(slab->opslab_refcnt == 1); do { slab2 = slab->opslab_next; #ifdef DEBUGGING slab->opslab_refcnt = ~(size_t)0; #endif #ifdef PERL_DEBUG_READONLY_OPS DEBUG_m(PerlIO_printf(Perl_debug_log, "Deallocate slab at %p\n", (void*)slab)); if (munmap(slab, slab->opslab_size * sizeof(I32 *))) { perror("munmap failed"); abort(); } #else PerlMemShared_free(slab); #endif slab = slab2; } while (slab); } /* like opslab_free(), but first calls op_free() on any ops in the slab * not marked as OP_FREED */ void Perl_opslab_force_free(pTHX_ OPSLAB *slab) { OPSLAB *slab2; #ifdef DEBUGGING size_t savestack_count = 0; #endif PERL_ARGS_ASSERT_OPSLAB_FORCE_FREE; slab2 = slab; do { OPSLOT *slot = (OPSLOT*) ((I32**)&slab2->opslab_slots + slab2->opslab_free_space); OPSLOT *end = (OPSLOT*) ((I32**)slab2 + slab2->opslab_size); for (; slot < end; slot = (OPSLOT*) ((I32**)slot + slot->opslot_size) ) { if (slot->opslot_op.op_type != OP_FREED && !(slot->opslot_op.op_savefree #ifdef DEBUGGING && ++savestack_count #endif ) ) { assert(slot->opslot_op.op_slabbed); op_free(&slot->opslot_op); if (slab->opslab_refcnt == 1) goto free; } } } while ((slab2 = slab2->opslab_next)); /* > 1 because the CV still holds a reference count. */ if (slab->opslab_refcnt > 1) { /* still referenced by the savestack */ #ifdef DEBUGGING assert(savestack_count == slab->opslab_refcnt-1); #endif /* Remove the CV’s reference count. */ slab->opslab_refcnt--; return; } free: opslab_free(slab); } #ifdef PERL_DEBUG_READONLY_OPS OP * Perl_op_refcnt_inc(pTHX_ OP *o) { if(o) { OPSLAB *const slab = o->op_slabbed ? OpSLAB(o) : NULL; if (slab && slab->opslab_readonly) { Slab_to_rw(slab); ++o->op_targ; Slab_to_ro(slab); } else { ++o->op_targ; } } return o; } PADOFFSET Perl_op_refcnt_dec(pTHX_ OP *o) { PADOFFSET result; OPSLAB *const slab = o->op_slabbed ? OpSLAB(o) : NULL; PERL_ARGS_ASSERT_OP_REFCNT_DEC; if (slab && slab->opslab_readonly) { Slab_to_rw(slab); result = --o->op_targ; Slab_to_ro(slab); } else { result = --o->op_targ; } return result; } #endif /* * In the following definition, the ", (OP*)0" is just to make the compiler * think the expression is of the right type: croak actually does a Siglongjmp. */ #define CHECKOP(type,o) \ ((PL_op_mask && PL_op_mask[type]) \ ? ( op_free((OP*)o), \ Perl_croak(aTHX_ "'%s' trapped by operation mask", PL_op_desc[type]), \ (OP*)0 ) \ : PL_check[type](aTHX_ (OP*)o)) #define RETURN_UNLIMITED_NUMBER (PERL_INT_MAX / 2) #define OpTYPE_set(o,type) \ STMT_START { \ o->op_type = (OPCODE)type; \ o->op_ppaddr = PL_ppaddr[type]; \ } STMT_END STATIC OP * S_no_fh_allowed(pTHX_ OP *o) { PERL_ARGS_ASSERT_NO_FH_ALLOWED; yyerror(Perl_form(aTHX_ "Missing comma after first argument to %s function", OP_DESC(o))); return o; } STATIC OP * S_too_few_arguments_pv(pTHX_ OP *o, const char* name, U32 flags) { PERL_ARGS_ASSERT_TOO_FEW_ARGUMENTS_PV; yyerror_pv(Perl_form(aTHX_ "Not enough arguments for %s", name), flags); return o; } STATIC OP * S_too_many_arguments_pv(pTHX_ OP *o, const char *name, U32 flags) { PERL_ARGS_ASSERT_TOO_MANY_ARGUMENTS_PV; yyerror_pv(Perl_form(aTHX_ "Too many arguments for %s", name), flags); return o; } STATIC void S_bad_type_pv(pTHX_ I32 n, const char *t, const OP *o, const OP *kid) { PERL_ARGS_ASSERT_BAD_TYPE_PV; yyerror_pv(Perl_form(aTHX_ "Type of arg %d to %s must be %s (not %s)", (int)n, PL_op_desc[(o)->op_type], t, OP_DESC(kid)), 0); } /* remove flags var, its unused in all callers, move to to right end since gv and kid are always the same */ STATIC void S_bad_type_gv(pTHX_ I32 n, GV *gv, const OP *kid, const char *t) { SV * const namesv = cv_name((CV *)gv, NULL, 0); PERL_ARGS_ASSERT_BAD_TYPE_GV; yyerror_pv(Perl_form(aTHX_ "Type of arg %d to %" SVf " must be %s (not %s)", (int)n, SVfARG(namesv), t, OP_DESC(kid)), SvUTF8(namesv)); } STATIC void S_no_bareword_allowed(pTHX_ OP *o) { PERL_ARGS_ASSERT_NO_BAREWORD_ALLOWED; qerror(Perl_mess(aTHX_ "Bareword \"%" SVf "\" not allowed while \"strict subs\" in use", SVfARG(cSVOPo_sv))); o->op_private &= ~OPpCONST_STRICT; /* prevent warning twice about the same OP */ } /* "register" allocation */ PADOFFSET Perl_allocmy(pTHX_ const char *const name, const STRLEN len, const U32 flags) { PADOFFSET off; const bool is_our = (PL_parser->in_my == KEY_our); PERL_ARGS_ASSERT_ALLOCMY; if (flags & ~SVf_UTF8) Perl_croak(aTHX_ "panic: allocmy illegal flag bits 0x%" UVxf, (UV)flags); /* complain about "my $" etc etc */ if ( len && !( is_our || isALPHA(name[1]) || ( (flags & SVf_UTF8) && isIDFIRST_utf8_safe((U8 *)name+1, name + len)) || (name[1] == '_' && len > 2))) { const char * const type = PL_parser->in_my == KEY_sigvar ? "subroutine signature" : PL_parser->in_my == KEY_state ? "\"state\"" : "\"my\""; if (!(flags & SVf_UTF8 && UTF8_IS_START(name[1])) && isASCII(name[1]) && (!isPRINT(name[1]) || strchr("\t\n\r\f", name[1]))) { /* diag_listed_as: Can't use global %s in %s */ yyerror(Perl_form(aTHX_ "Can't use global %c^%c%.*s in %s", name[0], toCTRL(name[1]), (int)(len - 2), name + 2, type)); } else { yyerror_pv(Perl_form(aTHX_ "Can't use global %.*s in %s", (int) len, name, type), flags & SVf_UTF8); } } /* allocate a spare slot and store the name in that slot */ off = pad_add_name_pvn(name, len, (is_our ? padadd_OUR : PL_parser->in_my == KEY_state ? padadd_STATE : 0), PL_parser->in_my_stash, (is_our /* $_ is always in main::, even with our */ ? (PL_curstash && !memEQs(name,len,"$_") ? PL_curstash : PL_defstash) : NULL ) ); /* anon sub prototypes contains state vars should always be cloned, * otherwise the state var would be shared between anon subs */ if (PL_parser->in_my == KEY_state && CvANON(PL_compcv)) CvCLONE_on(PL_compcv); return off; } /* =head1 Optree Manipulation Functions =for apidoc alloccopstash Available only under threaded builds, this function allocates an entry in C for the stash passed to it. =cut */ #ifdef USE_ITHREADS PADOFFSET Perl_alloccopstash(pTHX_ HV *hv) { PADOFFSET off = 0, o = 1; bool found_slot = FALSE; PERL_ARGS_ASSERT_ALLOCCOPSTASH; if (PL_stashpad[PL_stashpadix] == hv) return PL_stashpadix; for (; o < PL_stashpadmax; ++o) { if (PL_stashpad[o] == hv) return PL_stashpadix = o; if (!PL_stashpad[o] || SvTYPE(PL_stashpad[o]) != SVt_PVHV) found_slot = TRUE, off = o; } if (!found_slot) { Renew(PL_stashpad, PL_stashpadmax + 10, HV *); Zero(PL_stashpad + PL_stashpadmax, 10, HV *); off = PL_stashpadmax; PL_stashpadmax += 10; } PL_stashpad[PL_stashpadix = off] = hv; return off; } #endif /* free the body of an op without examining its contents. * Always use this rather than FreeOp directly */ static void S_op_destroy(pTHX_ OP *o) { FreeOp(o); } /* Destructor */ /* =for apidoc op_free Free an op and its children. Only use this when an op is no longer linked to from any optree. =cut */ void Perl_op_free(pTHX_ OP *o) { dVAR; OPCODE type; OP *top_op = o; OP *next_op = o; bool went_up = FALSE; /* whether we reached the current node by following the parent pointer from a child, and so have already seen this node */ if (!o || o->op_type == OP_FREED) return; if (o->op_private & OPpREFCOUNTED) { /* if base of tree is refcounted, just decrement */ switch (o->op_type) { case OP_LEAVESUB: case OP_LEAVESUBLV: case OP_LEAVEEVAL: case OP_LEAVE: case OP_SCOPE: case OP_LEAVEWRITE: { PADOFFSET refcnt; OP_REFCNT_LOCK; refcnt = OpREFCNT_dec(o); OP_REFCNT_UNLOCK; if (refcnt) { /* Need to find and remove any pattern match ops from * the list we maintain for reset(). */ find_and_forget_pmops(o); return; } } break; default: break; } } while (next_op) { o = next_op; /* free child ops before ourself, (then free ourself "on the * way back up") */ if (!went_up && o->op_flags & OPf_KIDS) { next_op = cUNOPo->op_first; continue; } /* find the next node to visit, *then* free the current node * (can't rely on o->op_* fields being valid after o has been * freed) */ /* The next node to visit will be either the sibling, or the * parent if no siblings left, or NULL if we've worked our way * back up to the top node in the tree */ next_op = (o == top_op) ? NULL : o->op_sibparent; went_up = cBOOL(!OpHAS_SIBLING(o)); /* parents are already visited */ /* Now process the current node */ /* Though ops may be freed twice, freeing the op after its slab is a big no-no. */ assert(!o->op_slabbed || OpSLAB(o)->opslab_refcnt != ~(size_t)0); /* During the forced freeing of ops after compilation failure, kidops may be freed before their parents. */ if (!o || o->op_type == OP_FREED) continue; type = o->op_type; /* an op should only ever acquire op_private flags that we know about. * If this fails, you may need to fix something in regen/op_private. * Don't bother testing if: * * the op_ppaddr doesn't match the op; someone may have * overridden the op and be doing strange things with it; * * we've errored, as op flags are often left in an * inconsistent state then. Note that an error when * compiling the main program leaves PL_parser NULL, so * we can't spot faults in the main code, only * evaled/required code */ #ifdef DEBUGGING if ( o->op_ppaddr == PL_ppaddr[type] && PL_parser && !PL_parser->error_count) { assert(!(o->op_private & ~PL_op_private_valid[type])); } #endif /* Call the op_free hook if it has been set. Do it now so that it's called * at the right time for refcounted ops, but still before all of the kids * are freed. */ CALL_OPFREEHOOK(o); if (type == OP_NULL) type = (OPCODE)o->op_targ; if (o->op_slabbed) Slab_to_rw(OpSLAB(o)); /* COP* is not cleared by op_clear() so that we may track line * numbers etc even after null() */ if (type == OP_NEXTSTATE || type == OP_DBSTATE) { cop_free((COP*)o); } op_clear(o); FreeOp(o); if (PL_op == o) PL_op = NULL; } } /* S_op_clear_gv(): free a GV attached to an OP */ STATIC #ifdef USE_ITHREADS void S_op_clear_gv(pTHX_ OP *o, PADOFFSET *ixp) #else void S_op_clear_gv(pTHX_ OP *o, SV**svp) #endif { GV *gv = (o->op_type == OP_GV || o->op_type == OP_GVSV || o->op_type == OP_MULTIDEREF) #ifdef USE_ITHREADS && PL_curpad ? ((GV*)PAD_SVl(*ixp)) : NULL; #else ? (GV*)(*svp) : NULL; #endif /* It's possible during global destruction that the GV is freed before the optree. Whilst the SvREFCNT_inc is happy to bump from 0 to 1 on a freed SV, the corresponding SvREFCNT_dec from 1 to 0 will trigger an assertion failure, because the entry to sv_clear checks that the scalar is not already freed. A check of for !SvIS_FREED(gv) turns out to be invalid, because during global destruction the reference count can be forced down to zero (with SVf_BREAK set). In which case raising to 1 and then dropping to 0 triggers cleanup before it should happen. I *think* that this might actually be a general, systematic, weakness of the whole idea of SVf_BREAK, in that code *is* allowed to raise and lower references during global destruction, so any *valid* code that happens to do this during global destruction might well trigger premature cleanup. */ bool still_valid = gv && SvREFCNT(gv); if (still_valid) SvREFCNT_inc_simple_void(gv); #ifdef USE_ITHREADS if (*ixp > 0) { pad_swipe(*ixp, TRUE); *ixp = 0; } #else SvREFCNT_dec(*svp); *svp = NULL; #endif if (still_valid) { int try_downgrade = SvREFCNT(gv) == 2; SvREFCNT_dec_NN(gv); if (try_downgrade) gv_try_downgrade(gv); } } void Perl_op_clear(pTHX_ OP *o) { dVAR; PERL_ARGS_ASSERT_OP_CLEAR; switch (o->op_type) { case OP_NULL: /* Was holding old type, if any. */ /* FALLTHROUGH */ case OP_ENTERTRY: case OP_ENTEREVAL: /* Was holding hints. */ case OP_ARGDEFELEM: /* Was holding signature index. */ o->op_targ = 0; break; default: if (!(o->op_flags & OPf_REF) || !OP_IS_STAT(o->op_type)) break; /* FALLTHROUGH */ case OP_GVSV: case OP_GV: case OP_AELEMFAST: #ifdef USE_ITHREADS S_op_clear_gv(aTHX_ o, &(cPADOPx(o)->op_padix)); #else S_op_clear_gv(aTHX_ o, &(cSVOPx(o)->op_sv)); #endif break; case OP_METHOD_REDIR: case OP_METHOD_REDIR_SUPER: #ifdef USE_ITHREADS if (cMETHOPx(o)->op_rclass_targ) { pad_swipe(cMETHOPx(o)->op_rclass_targ, 1); cMETHOPx(o)->op_rclass_targ = 0; } #else SvREFCNT_dec(cMETHOPx(o)->op_rclass_sv); cMETHOPx(o)->op_rclass_sv = NULL; #endif /* FALLTHROUGH */ case OP_METHOD_NAMED: case OP_METHOD_SUPER: SvREFCNT_dec(cMETHOPx(o)->op_u.op_meth_sv); cMETHOPx(o)->op_u.op_meth_sv = NULL; #ifdef USE_ITHREADS if (o->op_targ) { pad_swipe(o->op_targ, 1); o->op_targ = 0; } #endif break; case OP_CONST: case OP_HINTSEVAL: SvREFCNT_dec(cSVOPo->op_sv); cSVOPo->op_sv = NULL; #ifdef USE_ITHREADS /** Bug #15654 Even if op_clear does a pad_free for the target of the op, pad_free doesn't actually remove the sv that exists in the pad; instead it lives on. This results in that it could be reused as a target later on when the pad was reallocated. **/ if(o->op_targ) { pad_swipe(o->op_targ,1); o->op_targ = 0; } #endif break; case OP_DUMP: case OP_GOTO: case OP_NEXT: case OP_LAST: case OP_REDO: if (o->op_flags & (OPf_SPECIAL|OPf_STACKED|OPf_KIDS)) break; /* FALLTHROUGH */ case OP_TRANS: case OP_TRANSR: if ( (o->op_type == OP_TRANS || o->op_type == OP_TRANSR) && (o->op_private & OPpTRANS_USE_SVOP)) { #ifdef USE_ITHREADS if (cPADOPo->op_padix > 0) { pad_swipe(cPADOPo->op_padix, TRUE); cPADOPo->op_padix = 0; } #else SvREFCNT_dec(cSVOPo->op_sv); cSVOPo->op_sv = NULL; #endif } else { PerlMemShared_free(cPVOPo->op_pv); cPVOPo->op_pv = NULL; } break; case OP_SUBST: op_free(cPMOPo->op_pmreplrootu.op_pmreplroot); goto clear_pmop; case OP_SPLIT: if ( (o->op_private & OPpSPLIT_ASSIGN) /* @array = split */ && !(o->op_flags & OPf_STACKED)) /* @{expr} = split */ { if (o->op_private & OPpSPLIT_LEX) pad_free(cPMOPo->op_pmreplrootu.op_pmtargetoff); else #ifdef USE_ITHREADS pad_swipe(cPMOPo->op_pmreplrootu.op_pmtargetoff, TRUE); #else SvREFCNT_dec(MUTABLE_SV(cPMOPo->op_pmreplrootu.op_pmtargetgv)); #endif } /* FALLTHROUGH */ case OP_MATCH: case OP_QR: clear_pmop: if (!(cPMOPo->op_pmflags & PMf_CODELIST_PRIVATE)) op_free(cPMOPo->op_code_list); cPMOPo->op_code_list = NULL; forget_pmop(cPMOPo); cPMOPo->op_pmreplrootu.op_pmreplroot = NULL; /* we use the same protection as the "SAFE" version of the PM_ macros * here since sv_clean_all might release some PMOPs * after PL_regex_padav has been cleared * and the clearing of PL_regex_padav needs to * happen before sv_clean_all */ #ifdef USE_ITHREADS if(PL_regex_pad) { /* We could be in destruction */ const IV offset = (cPMOPo)->op_pmoffset; ReREFCNT_dec(PM_GETRE(cPMOPo)); PL_regex_pad[offset] = &PL_sv_undef; sv_catpvn_nomg(PL_regex_pad[0], (const char *)&offset, sizeof(offset)); } #else ReREFCNT_dec(PM_GETRE(cPMOPo)); PM_SETRE(cPMOPo, NULL); #endif break; case OP_ARGCHECK: PerlMemShared_free(cUNOP_AUXo->op_aux); break; case OP_MULTICONCAT: { UNOP_AUX_item *aux = cUNOP_AUXo->op_aux; /* aux[PERL_MULTICONCAT_IX_PLAIN_PV] and/or * aux[PERL_MULTICONCAT_IX_UTF8_PV] point to plain and/or * utf8 shared strings */ char *p1 = aux[PERL_MULTICONCAT_IX_PLAIN_PV].pv; char *p2 = aux[PERL_MULTICONCAT_IX_UTF8_PV].pv; if (p1) PerlMemShared_free(p1); if (p2 && p1 != p2) PerlMemShared_free(p2); PerlMemShared_free(aux); } break; case OP_MULTIDEREF: { UNOP_AUX_item *items = cUNOP_AUXo->op_aux; UV actions = items->uv; bool last = 0; bool is_hash = FALSE; while (!last) { switch (actions & MDEREF_ACTION_MASK) { case MDEREF_reload: actions = (++items)->uv; continue; case MDEREF_HV_padhv_helem: is_hash = TRUE; /* FALLTHROUGH */ case MDEREF_AV_padav_aelem: pad_free((++items)->pad_offset); goto do_elem; case MDEREF_HV_gvhv_helem: is_hash = TRUE; /* FALLTHROUGH */ case MDEREF_AV_gvav_aelem: #ifdef USE_ITHREADS S_op_clear_gv(aTHX_ o, &((++items)->pad_offset)); #else S_op_clear_gv(aTHX_ o, &((++items)->sv)); #endif goto do_elem; case MDEREF_HV_gvsv_vivify_rv2hv_helem: is_hash = TRUE; /* FALLTHROUGH */ case MDEREF_AV_gvsv_vivify_rv2av_aelem: #ifdef USE_ITHREADS S_op_clear_gv(aTHX_ o, &((++items)->pad_offset)); #else S_op_clear_gv(aTHX_ o, &((++items)->sv)); #endif goto do_vivify_rv2xv_elem; case MDEREF_HV_padsv_vivify_rv2hv_helem: is_hash = TRUE; /* FALLTHROUGH */ case MDEREF_AV_padsv_vivify_rv2av_aelem: pad_free((++items)->pad_offset); goto do_vivify_rv2xv_elem; case MDEREF_HV_pop_rv2hv_helem: case MDEREF_HV_vivify_rv2hv_helem: is_hash = TRUE; /* FALLTHROUGH */ do_vivify_rv2xv_elem: case MDEREF_AV_pop_rv2av_aelem: case MDEREF_AV_vivify_rv2av_aelem: do_elem: switch (actions & MDEREF_INDEX_MASK) { case MDEREF_INDEX_none: last = 1; break; case MDEREF_INDEX_const: if (is_hash) { #ifdef USE_ITHREADS /* see RT #15654 */ pad_swipe((++items)->pad_offset, 1); #else SvREFCNT_dec((++items)->sv); #endif } else items++; break; case MDEREF_INDEX_padsv: pad_free((++items)->pad_offset); break; case MDEREF_INDEX_gvsv: #ifdef USE_ITHREADS S_op_clear_gv(aTHX_ o, &((++items)->pad_offset)); #else S_op_clear_gv(aTHX_ o, &((++items)->sv)); #endif break; } if (actions & MDEREF_FLAG_last) last = 1; is_hash = FALSE; break; default: assert(0); last = 1; break; } /* switch */ actions >>= MDEREF_SHIFT; } /* while */ /* start of malloc is at op_aux[-1], where the length is * stored */ PerlMemShared_free(cUNOP_AUXo->op_aux - 1); } break; } if (o->op_targ > 0) { pad_free(o->op_targ); o->op_targ = 0; } } STATIC void S_cop_free(pTHX_ COP* cop) { PERL_ARGS_ASSERT_COP_FREE; CopFILE_free(cop); if (! specialWARN(cop->cop_warnings)) PerlMemShared_free(cop->cop_warnings); cophh_free(CopHINTHASH_get(cop)); if (PL_curcop == cop) PL_curcop = NULL; } STATIC void S_forget_pmop(pTHX_ PMOP *const o) { HV * const pmstash = PmopSTASH(o); PERL_ARGS_ASSERT_FORGET_PMOP; if (pmstash && !SvIS_FREED(pmstash) && SvMAGICAL(pmstash)) { MAGIC * const mg = mg_find((const SV *)pmstash, PERL_MAGIC_symtab); if (mg) { PMOP **const array = (PMOP**) mg->mg_ptr; U32 count = mg->mg_len / sizeof(PMOP**); U32 i = count; while (i--) { if (array[i] == o) { /* Found it. Move the entry at the end to overwrite it. */ array[i] = array[--count]; mg->mg_len = count * sizeof(PMOP**); /* Could realloc smaller at this point always, but probably not worth it. Probably worth free()ing if we're the last. */ if(!count) { Safefree(mg->mg_ptr); mg->mg_ptr = NULL; } break; } } } } if (PL_curpm == o) PL_curpm = NULL; } STATIC void S_find_and_forget_pmops(pTHX_ OP *o) { OP* top_op = o; PERL_ARGS_ASSERT_FIND_AND_FORGET_PMOPS; while (1) { switch (o->op_type) { case OP_SUBST: case OP_SPLIT: case OP_MATCH: case OP_QR: forget_pmop((PMOP*)o); } if (o->op_flags & OPf_KIDS) { o = cUNOPo->op_first; continue; } while (1) { if (o == top_op) return; /* at top; no parents/siblings to try */ if (OpHAS_SIBLING(o)) { o = o->op_sibparent; /* process next sibling */ break; } o = o->op_sibparent; /*try parent's next sibling */ } } } /* =for apidoc op_null Neutralizes an op when it is no longer needed, but is still linked to from other ops. =cut */ void Perl_op_null(pTHX_ OP *o) { dVAR; PERL_ARGS_ASSERT_OP_NULL; if (o->op_type == OP_NULL) return; op_clear(o); o->op_targ = o->op_type; OpTYPE_set(o, OP_NULL); } void Perl_op_refcnt_lock(pTHX) PERL_TSA_ACQUIRE(PL_op_mutex) { #ifdef USE_ITHREADS dVAR; #endif PERL_UNUSED_CONTEXT; OP_REFCNT_LOCK; } void Perl_op_refcnt_unlock(pTHX) PERL_TSA_RELEASE(PL_op_mutex) { #ifdef USE_ITHREADS dVAR; #endif PERL_UNUSED_CONTEXT; OP_REFCNT_UNLOCK; } /* =for apidoc op_sibling_splice A general function for editing the structure of an existing chain of op_sibling nodes. By analogy with the perl-level C function, allows you to delete zero or more sequential nodes, replacing them with zero or more different nodes. Performs the necessary op_first/op_last housekeeping on the parent node and op_sibling manipulation on the children. The last deleted node will be marked as as the last node by updating the op_sibling/op_sibparent or op_moresib field as appropriate. Note that op_next is not manipulated, and nodes are not freed; that is the responsibility of the caller. It also won't create a new list op for an empty list etc; use higher-level functions like op_append_elem() for that. C is the parent node of the sibling chain. It may passed as C if the splicing doesn't affect the first or last op in the chain. C is the node preceding the first node to be spliced. Node(s) following it will be deleted, and ops will be inserted after it. If it is C, the first node onwards is deleted, and nodes are inserted at the beginning. C is the number of nodes to delete. If zero, no nodes are deleted. If -1 or greater than or equal to the number of remaining kids, all remaining kids are deleted. C is the first of a chain of nodes to be inserted in place of the nodes. If C, no nodes are inserted. The head of the chain of deleted ops is returned, or C if no ops were deleted. For example: action before after returns ------ ----- ----- ------- P P splice(P, A, 2, X-Y-Z) | | B-C A-B-C-D A-X-Y-Z-D P P splice(P, NULL, 1, X-Y) | | A A-B-C-D X-Y-B-C-D P P splice(P, NULL, 3, NULL) | | A-B-C A-B-C-D D P P splice(P, B, 0, X-Y) | | NULL A-B-C-D A-B-X-Y-C-D For lower-level direct manipulation of C and C, see C>, C>, C>. =cut */ OP * Perl_op_sibling_splice(OP *parent, OP *start, int del_count, OP* insert) { OP *first; OP *rest; OP *last_del = NULL; OP *last_ins = NULL; if (start) first = OpSIBLING(start); else if (!parent) goto no_parent; else first = cLISTOPx(parent)->op_first; assert(del_count >= -1); if (del_count && first) { last_del = first; while (--del_count && OpHAS_SIBLING(last_del)) last_del = OpSIBLING(last_del); rest = OpSIBLING(last_del); OpLASTSIB_set(last_del, NULL); } else rest = first; if (insert) { last_ins = insert; while (OpHAS_SIBLING(last_ins)) last_ins = OpSIBLING(last_ins); OpMAYBESIB_set(last_ins, rest, NULL); } else insert = rest; if (start) { OpMAYBESIB_set(start, insert, NULL); } else { assert(parent); cLISTOPx(parent)->op_first = insert; if (insert) parent->op_flags |= OPf_KIDS; else parent->op_flags &= ~OPf_KIDS; } if (!rest) { /* update op_last etc */ U32 type; OP *lastop; if (!parent) goto no_parent; /* ought to use OP_CLASS(parent) here, but that can't handle * ex-foo OP_NULL ops. Also note that XopENTRYCUSTOM() can't * either */ type = parent->op_type; if (type == OP_CUSTOM) { dTHX; type = XopENTRYCUSTOM(parent, xop_class); } else { if (type == OP_NULL) type = parent->op_targ; type = PL_opargs[type] & OA_CLASS_MASK; } lastop = last_ins ? last_ins : start ? start : NULL; if ( type == OA_BINOP || type == OA_LISTOP || type == OA_PMOP || type == OA_LOOP ) cLISTOPx(parent)->op_last = lastop; if (lastop) OpLASTSIB_set(lastop, parent); } return last_del ? first : NULL; no_parent: Perl_croak_nocontext("panic: op_sibling_splice(): NULL parent"); } /* =for apidoc op_parent Returns the parent OP of C, if it has a parent. Returns C otherwise. =cut */ OP * Perl_op_parent(OP *o) { PERL_ARGS_ASSERT_OP_PARENT; while (OpHAS_SIBLING(o)) o = OpSIBLING(o); return o->op_sibparent; } /* replace the sibling following start with a new UNOP, which becomes * the parent of the original sibling; e.g. * * op_sibling_newUNOP(P, A, unop-args...) * * P P * | becomes | * A-B-C A-U-C * | * B * * where U is the new UNOP. * * parent and start args are the same as for op_sibling_splice(); * type and flags args are as newUNOP(). * * Returns the new UNOP. */ STATIC OP * S_op_sibling_newUNOP(pTHX_ OP *parent, OP *start, I32 type, I32 flags) { OP *kid, *newop; kid = op_sibling_splice(parent, start, 1, NULL); newop = newUNOP(type, flags, kid); op_sibling_splice(parent, start, 0, newop); return newop; } /* lowest-level newLOGOP-style function - just allocates and populates * the struct. Higher-level stuff should be done by S_new_logop() / * newLOGOP(). This function exists mainly to avoid op_first assignment * being spread throughout this file. */ LOGOP * Perl_alloc_LOGOP(pTHX_ I32 type, OP *first, OP* other) { dVAR; LOGOP *logop; OP *kid = first; NewOp(1101, logop, 1, LOGOP); OpTYPE_set(logop, type); logop->op_first = first; logop->op_other = other; if (first) logop->op_flags = OPf_KIDS; while (kid && OpHAS_SIBLING(kid)) kid = OpSIBLING(kid); if (kid) OpLASTSIB_set(kid, (OP*)logop); return logop; } /* Contextualizers */ /* =for apidoc op_contextualize Applies a syntactic context to an op tree representing an expression. C is the op tree, and C must be C, C, or C to specify the context to apply. The modified op tree is returned. =cut */ OP * Perl_op_contextualize(pTHX_ OP *o, I32 context) { PERL_ARGS_ASSERT_OP_CONTEXTUALIZE; switch (context) { case G_SCALAR: return scalar(o); case G_ARRAY: return list(o); case G_VOID: return scalarvoid(o); default: Perl_croak(aTHX_ "panic: op_contextualize bad context %ld", (long) context); } } /* =for apidoc op_linklist This function is the implementation of the L macro. It should not be called directly. =cut */ OP * Perl_op_linklist(pTHX_ OP *o) { OP **prevp; OP *kid; OP * top_op = o; PERL_ARGS_ASSERT_OP_LINKLIST; while (1) { /* Descend down the tree looking for any unprocessed subtrees to * do first */ if (!o->op_next) { if (o->op_flags & OPf_KIDS) { o = cUNOPo->op_first; continue; } o->op_next = o; /* leaf node; link to self initially */ } /* if we're at the top level, there either weren't any children * to process, or we've worked our way back to the top. */ if (o == top_op) return o->op_next; /* o is now processed. Next, process any sibling subtrees */ if (OpHAS_SIBLING(o)) { o = OpSIBLING(o); continue; } /* Done all the subtrees at this level. Go back up a level and * link the parent in with all its (processed) children. */ o = o->op_sibparent; assert(!o->op_next); prevp = &(o->op_next); kid = (o->op_flags & OPf_KIDS) ? cUNOPo->op_first : NULL; while (kid) { *prevp = kid->op_next; prevp = &(kid->op_next); kid = OpSIBLING(kid); } *prevp = o; } } static OP * S_scalarkids(pTHX_ OP *o) { if (o && o->op_flags & OPf_KIDS) { OP *kid; for (kid = cLISTOPo->op_first; kid; kid = OpSIBLING(kid)) scalar(kid); } return o; } STATIC OP * S_scalarboolean(pTHX_ OP *o) { PERL_ARGS_ASSERT_SCALARBOOLEAN; if ((o->op_type == OP_SASSIGN && cBINOPo->op_first->op_type == OP_CONST && !(cBINOPo->op_first->op_flags & OPf_SPECIAL)) || (o->op_type == OP_NOT && cUNOPo->op_first->op_type == OP_SASSIGN && cBINOPx(cUNOPo->op_first)->op_first->op_type == OP_CONST && !(cBINOPx(cUNOPo->op_first)->op_first->op_flags & OPf_SPECIAL))) { if (ckWARN(WARN_SYNTAX)) { const line_t oldline = CopLINE(PL_curcop); if (PL_parser && PL_parser->copline != NOLINE) { /* This ensures that warnings are reported at the first line of the conditional, not the last. */ CopLINE_set(PL_curcop, PL_parser->copline); } Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Found = in conditional, should be =="); CopLINE_set(PL_curcop, oldline); } } return scalar(o); } static SV * S_op_varname_subscript(pTHX_ const OP *o, int subscript_type) { assert(o); assert(o->op_type == OP_PADAV || o->op_type == OP_RV2AV || o->op_type == OP_PADHV || o->op_type == OP_RV2HV); { const char funny = o->op_type == OP_PADAV || o->op_type == OP_RV2AV ? '@' : '%'; if (o->op_type == OP_RV2AV || o->op_type == OP_RV2HV) { GV *gv; if (cUNOPo->op_first->op_type != OP_GV || !(gv = cGVOPx_gv(cUNOPo->op_first))) return NULL; return varname(gv, funny, 0, NULL, 0, subscript_type); } return varname(MUTABLE_GV(PL_compcv), funny, o->op_targ, NULL, 0, subscript_type); } } static SV * S_op_varname(pTHX_ const OP *o) { return S_op_varname_subscript(aTHX_ o, 1); } static void S_op_pretty(pTHX_ const OP *o, SV **retsv, const char **retpv) { /* or not so pretty :-) */ if (o->op_type == OP_CONST) { *retsv = cSVOPo_sv; if (SvPOK(*retsv)) { SV *sv = *retsv; *retsv = sv_newmortal(); pv_pretty(*retsv, SvPVX_const(sv), SvCUR(sv), 32, NULL, NULL, PERL_PV_PRETTY_DUMP |PERL_PV_ESCAPE_UNI_DETECT); } else if (!SvOK(*retsv)) *retpv = "undef"; } else *retpv = "..."; } static void S_scalar_slice_warning(pTHX_ const OP *o) { OP *kid; const bool h = o->op_type == OP_HSLICE || (o->op_type == OP_NULL && o->op_targ == OP_HSLICE); const char lbrack = h ? '{' : '['; const char rbrack = h ? '}' : ']'; SV *name; SV *keysv = NULL; /* just to silence compiler warnings */ const char *key = NULL; if (!(o->op_private & OPpSLICEWARNING)) return; if (PL_parser && PL_parser->error_count) /* This warning can be nonsensical when there is a syntax error. */ return; kid = cLISTOPo->op_first; kid = OpSIBLING(kid); /* get past pushmark */ /* weed out false positives: any ops that can return lists */ switch (kid->op_type) { case OP_BACKTICK: case OP_GLOB: case OP_READLINE: case OP_MATCH: case OP_RV2AV: case OP_EACH: case OP_VALUES: case OP_KEYS: case OP_SPLIT: case OP_LIST: case OP_SORT: case OP_REVERSE: case OP_ENTERSUB: case OP_CALLER: case OP_LSTAT: case OP_STAT: case OP_READDIR: case OP_SYSTEM: case OP_TMS: case OP_LOCALTIME: case OP_GMTIME: case OP_ENTEREVAL: return; } /* Don't warn if we have a nulled list either. */ if (kid->op_type == OP_NULL && kid->op_targ == OP_LIST) return; assert(OpSIBLING(kid)); name = S_op_varname(aTHX_ OpSIBLING(kid)); if (!name) /* XS module fiddling with the op tree */ return; S_op_pretty(aTHX_ kid, &keysv, &key); assert(SvPOK(name)); sv_chop(name,SvPVX(name)+1); if (key) /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */ Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Scalar value @%" SVf "%c%s%c better written as $%" SVf "%c%s%c", SVfARG(name), lbrack, key, rbrack, SVfARG(name), lbrack, key, rbrack); else /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */ Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "Scalar value @%" SVf "%c%" SVf "%c better written as $%" SVf "%c%" SVf "%c", SVfARG(name), lbrack, SVfARG(keysv), rbrack, SVfARG(name), lbrack, SVfARG(keysv), rbrack); } /* apply scalar context to the o subtree */ OP * Perl_scalar(pTHX_ OP *o) { OP * top_op = o; while (1) { OP *next_kid = NULL; /* what op (if any) to process next */ OP *kid; /* assumes no premature commitment */ if (!o || (PL_parser && PL_parser->error_count) || (o->op_flags & OPf_WANT) || o->op_type == OP_RETURN) { goto do_next; } o->op_flags = (o->op_flags & ~OPf_WANT) | OPf_WANT_SCALAR; switch (o->op_type) { case OP_REPEAT: scalar(cBINOPo->op_first); /* convert what initially looked like a list repeat into a * scalar repeat, e.g. $s = (1) x $n */ if (o->op_private & OPpREPEAT_DOLIST) { kid = cLISTOPx(cUNOPo->op_first)->op_first; assert(kid->op_type == OP_PUSHMARK); if (OpHAS_SIBLING(kid) && !OpHAS_SIBLING(OpSIBLING(kid))) { op_null(cLISTOPx(cUNOPo->op_first)->op_first); o->op_private &=~ OPpREPEAT_DOLIST; } } break; case OP_OR: case OP_AND: case OP_COND_EXPR: /* impose scalar context on everything except the condition */ next_kid = OpSIBLING(cUNOPo->op_first); break; default: if (o->op_flags & OPf_KIDS) next_kid = cUNOPo->op_first; /* do all kids */ break; /* the children of these ops are usually a list of statements, * except the leaves, whose first child is a corresponding enter */ case OP_SCOPE: case OP_LINESEQ: case OP_LIST: kid = cLISTOPo->op_first; goto do_kids; case OP_LEAVE: case OP_LEAVETRY: kid = cLISTOPo->op_first; scalar(kid); kid = OpSIBLING(kid); do_kids: while (kid) { OP *sib = OpSIBLING(kid); /* Apply void context to all kids except the last, which * is scalar (ignoring a trailing ex-nextstate in determining * if it's the last kid). E.g. * $scalar = do { void; void; scalar } * Except that 'when's are always scalar, e.g. * $scalar = do { given(..) { * when (..) { scalar } * when (..) { scalar } * ... * }} */ if (!sib || ( !OpHAS_SIBLING(sib) && sib->op_type == OP_NULL && ( sib->op_targ == OP_NEXTSTATE || sib->op_targ == OP_DBSTATE ) ) ) { /* tail call optimise calling scalar() on the last kid */ next_kid = kid; goto do_next; } else if (kid->op_type == OP_LEAVEWHEN) scalar(kid); else scalarvoid(kid); kid = sib; } NOT_REACHED; /* NOTREACHED */ break; case OP_SORT: Perl_ck_warner(aTHX_ packWARN(WARN_VOID), "Useless use of sort in scalar context"); break; case OP_KVHSLICE: case OP_KVASLICE: { /* Warn about scalar context */ const char lbrack = o->op_type == OP_KVHSLICE ? '{' : '['; const char rbrack = o->op_type == OP_KVHSLICE ? '}' : ']'; SV *name; SV *keysv; const char *key = NULL; /* This warning can be nonsensical when there is a syntax error. */ if (PL_parser && PL_parser->error_count) break; if (!ckWARN(WARN_SYNTAX)) break; kid = cLISTOPo->op_first; kid = OpSIBLING(kid); /* get past pushmark */ assert(OpSIBLING(kid)); name = S_op_varname(aTHX_ OpSIBLING(kid)); if (!name) /* XS module fiddling with the op tree */ break; S_op_pretty(aTHX_ kid, &keysv, &key); assert(SvPOK(name)); sv_chop(name,SvPVX(name)+1); if (key) /* diag_listed_as: %%s[%s] in scalar context better written as $%s[%s] */ Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%%%" SVf "%c%s%c in scalar context better written " "as $%" SVf "%c%s%c", SVfARG(name), lbrack, key, rbrack, SVfARG(name), lbrack, key, rbrack); else /* diag_listed_as: %%s[%s] in scalar context better written as $%s[%s] */ Perl_warner(aTHX_ packWARN(WARN_SYNTAX), "%%%" SVf "%c%" SVf "%c in scalar context better " "written as $%" SVf "%c%" SVf "%c", SVfARG(name), lbrack, SVfARG(keysv), rbrack, SVfARG(name), lbrack, SVfARG(keysv), rbrack); } } /* switch */ /* If next_kid is set, someone in the code above wanted us to process * that kid and all its remaining siblings. Otherwise, work our way * back up the tree */ do_next: while (!next_kid) { if (o == top_op) return top_op; /* at top; no parents/siblings to try */ if (OpHAS_SIBLING(o)) next_kid = o->op_sibparent; else { o = o->op_sibparent; /*try parent's next sibling */ switch (o->op_type) { case OP_SCOPE: case OP_LINESEQ: case OP_LIST: case OP_LEAVE: case OP_LEAVETRY: /* should really restore PL_curcop to its old value, but * setting it to PL_compiling is better than do nothing */ PL_curcop = &PL_compiling; } } } o = next_kid; } /* while */ } /* apply void context to the optree arg */ OP * Perl_scalarvoid(pTHX_ OP *arg) { dVAR; OP *kid; SV* sv; OP *o = arg; PERL_ARGS_ASSERT_SCALARVOID; while (1) { U8 want; SV *useless_sv = NULL; const char* useless = NULL; OP * next_kid = NULL; if (o->op_type == OP_NEXTSTATE || o->op_type == OP_DBSTATE || (o->op_type == OP_NULL && (o->op_targ == OP_NEXTSTATE || o->op_targ == OP_DBSTATE))) PL_curcop = (COP*)o; /* for warning below */ /* assumes no premature commitment */ want = o->op_flags & OPf_WANT; if ((want && want != OPf_WANT_SCALAR) || (PL_parser && PL_parser->error_count) || o->op_type == OP_RETURN || o->op_type == OP_REQUIRE || o->op_type == OP_LEAVEWHEN) { goto get_next_op; } if ((o->op_private & OPpTARGET_MY) && (PL_opargs[o->op_type] & OA_TARGLEX))/* OPp share the meaning */ { /* newASSIGNOP has already applied scalar context, which we leave, as if this op is inside SASSIGN. */ goto get_next_op; } o->op_flags = (o->op_flags & ~OPf_WANT) | OPf_WANT_VOID; switch (o->op_type) { default: if (!(PL_opargs[o->op_type] & OA_FOLDCONST)) break; /* FALLTHROUGH */ case OP_REPEAT: if (o->op_flags & OPf_STACKED) break; if (o->op_type == OP_REPEAT) scalar(cBINOPo->op_first); goto func_ops; case OP_CONCAT: if ((o->op_flags & OPf_STACKED) && !(o->op_private & OPpCONCAT_NESTED)) break; goto func_ops; case OP_SUBSTR: if (o->op_private == 4) break; /* FALLTHROUGH */ case OP_WANTARRAY: case OP_GV: case OP_SMARTMATCH: case OP_AV2ARYLEN: case OP_REF: case OP_REFGEN: case OP_SREFGEN: case OP_DEFINED: case OP_HEX: case OP_OCT: case OP_LENGTH: case OP_VEC: case OP_INDEX: case OP_RINDEX: case OP_SPRINTF: case OP_KVASLICE: case OP_KVHSLICE: case OP_UNPACK: case OP_PACK: case OP_JOIN: case OP_LSLICE: case OP_ANONLIST: case OP_ANONHASH: case OP_SORT: case OP_REVERSE: case OP_RANGE: case OP_FLIP: case OP_FLOP: case OP_CALLER: case OP_FILENO: case OP_EOF: case OP_TELL: case OP_GETSOCKNAME: case OP_GETPEERNAME: case OP_READLINK: case OP_TELLDIR: case OP_GETPPID: case OP_GETPGRP: case OP_GETPRIORITY: case OP_TIME: case OP_TMS: case OP_LOCALTIME: case OP_GMTIME: case OP_GHBYNAME: case OP_GHBYADDR: case OP_GHOSTENT: case OP_GNBYNAME: case OP_GNBYADDR: case OP_GNETENT: case OP_GPBYNAME: case OP_GPBYNUMBER: case OP_GPROTOENT: case OP_GSBYNAME: case OP_GSBYPORT: case OP_GSERVENT: case OP_GPWNAM: case OP_GPWUID: case OP_GGRNAM: case OP_GGRGID: case OP_GETLOGIN: case OP_PROTOTYPE: case OP_RUNCV: func_ops: useless = OP_DESC(o); break; case OP_GVSV: case OP_PADSV: case OP_PADAV: case OP_PADHV: case OP_PADANY: case OP_AELEM: case OP_AELEMFAST: case OP_AELEMFAST_LEX: case OP_ASLICE: case OP_HELEM: case OP_HSLICE: if (!(o->op_private & (OPpLVAL_INTRO|OPpOUR_INTRO))) /* Otherwise it's "Useless use of grep iterator" */ useless = OP_DESC(o); break; case OP_SPLIT: if (!(o->op_private & OPpSPLIT_ASSIGN)) useless = OP_DESC(o); break; case OP_NOT: kid = cUNOPo->op_first; if (kid->op_type != OP_MATCH && kid->op_type != OP_SUBST && kid->op_type != OP_TRANS && kid->op_type != OP_TRANSR) { goto func_ops; } useless = "negative pattern binding (!~)"; break; case OP_SUBST: if (cPMOPo->op_pmflags & PMf_NONDESTRUCT) useless = "non-destructive substitution (s///r)"; break; case OP_TRANSR: useless = "non-destructive transliteration (tr///r)"; break; case OP_RV2GV: case OP_RV2SV: case OP_RV2AV: case OP_RV2HV: if (!(o->op_private & (OPpLVAL_INTRO|OPpOUR_INTRO)) && (!OpHAS_SIBLING(o) || OpSIBLING(o)->op_type != OP_READLINE)) useless = "a variable"; break; case OP_CONST: sv = cSVOPo_sv; if (cSVOPo->op_private & OPpCONST_STRICT) no_bareword_allowed(o); else { if (ckWARN(WARN_VOID)) { NV nv; /* don't warn on optimised away booleans, eg * use constant Foo, 5; Foo || print; */ if (cSVOPo->op_private & OPpCONST_SHORTCIRCUIT) useless = NULL; /* the constants 0 and 1 are permitted as they are conventionally used as dummies in constructs like 1 while some_condition_with_side_effects; */ else if (SvNIOK(sv) && ((nv = SvNV(sv)) == 0.0 || nv == 1.0)) useless = NULL; else if (SvPOK(sv)) { SV * const dsv = newSVpvs(""); useless_sv = Perl_newSVpvf(aTHX_ "a constant (%s)", pv_pretty(dsv, SvPVX_const(sv), SvCUR(sv), 32, NULL, NULL, PERL_PV_PRETTY_DUMP | PERL_PV_ESCAPE_NOCLEAR | PERL_PV_ESCAPE_UNI_DETECT)); SvREFCNT_dec_NN(dsv); } else if (SvOK(sv)) { useless_sv = Perl_newSVpvf(aTHX_ "a constant (%" SVf ")", SVfARG(sv)); } else useless = "a constant (undef)"; } } op_null(o); /* don't execute or even remember it */ break; case OP_POSTINC: OpTYPE_set(o, OP_PREINC); /* pre-increment is faster */ break; case OP_POSTDEC: OpTYPE_set(o, OP_PREDEC); /* pre-decrement is faster */ break; case OP_I_POSTINC: OpTYPE_set(o, OP_I_PREINC); /* pre-increment is faster */ break; case OP_I_POSTDEC: OpTYPE_set(o, OP_I_PREDEC); /* pre-decrement is faster */ break; case OP_SASSIGN: { OP *rv2gv; UNOP *refgen, *rv2cv; LISTOP *exlist; if ((o->op_private & ~OPpASSIGN_BACKWARDS) != 2) break; rv2gv = ((BINOP *)o)->op_last; if (!rv2gv || rv2gv->op_type != OP_RV2GV) break; refgen = (UNOP *)((BINOP *)o)->op_first; if (!refgen || (refgen->op_type != OP_REFGEN && refgen->op_type != OP_SREFGEN)) break; exlist = (LISTOP *)refgen->op_first; if (!exlist || exlist->op_type != OP_NULL || exlist->op_targ != OP_LIST) break; if (exlist->op_first->op_type != OP_PUSHMARK && exlist->op_first != exlist->op_last) break; rv2cv = (UNOP*)exlist->op_last; if (rv2cv->op_type != OP_RV2CV) break; assert ((rv2gv->op_private & OPpDONT_INIT_GV) == 0); assert ((o->op_private & OPpASSIGN_CV_TO_GV) == 0); assert ((rv2cv->op_private & OPpMAY_RETURN_CONSTANT) == 0); o->op_private |= OPpASSIGN_CV_TO_GV; rv2gv->op_private |= OPpDONT_INIT_GV; rv2cv->op_private |= OPpMAY_RETURN_CONSTANT; break; } case OP_AASSIGN: { inplace_aassign(o); break; } case OP_OR: case OP_AND: kid = cLOGOPo->op_first; if (kid->op_type == OP_NOT && (kid->op_flags & OPf_KIDS)) { if (o->op_type == OP_AND) { OpTYPE_set(o, OP_OR); } else { OpTYPE_set(o, OP_AND); } op_null(kid); } /* FALLTHROUGH */ case OP_DOR: case OP_COND_EXPR: case OP_ENTERGIVEN: case OP_ENTERWHEN: next_kid = OpSIBLING(cUNOPo->op_first); break; case OP_NULL: if (o->op_flags & OPf_STACKED) break; /* FALLTHROUGH */ case OP_NEXTSTATE: case OP_DBSTATE: case OP_ENTERTRY: case OP_ENTER: if (!(o->op_flags & OPf_KIDS)) break; /* FALLTHROUGH */ case OP_SCOPE: case OP_LEAVE: case OP_LEAVETRY: case OP_LEAVELOOP: case OP_LINESEQ: case OP_LEAVEGIVEN: case OP_LEAVEWHEN: kids: next_kid = cLISTOPo->op_first; break; case OP_LIST: /* If the first kid after pushmark is something that the padrange optimisation would reject, then null the list and the pushmark. */ if ((kid = cLISTOPo->op_first)->op_type == OP_PUSHMARK && ( !(kid = OpSIBLING(kid)) || ( kid->op_type != OP_PADSV && kid->op_type != OP_PADAV && kid->op_type != OP_PADHV) || kid->op_private & ~OPpLVAL_INTRO || !(kid = OpSIBLING(kid)) || ( kid->op_type != OP_PADSV && kid->op_type != OP_PADAV && kid->op_type != OP_PADHV) || kid->op_private & ~OPpLVAL_INTRO) ) { op_null(cUNOPo->op_first); /* NULL the pushmark */ op_null(o); /* NULL the list */ } goto kids; case OP_ENTEREVAL: scalarkids(o); break; case OP_SCALAR: scalar(o); break; } if (useless_sv) { /* mortalise it, in case warnings are fatal. */ Perl_ck_warner(aTHX_ packWARN(WARN_VOID), "Useless use of %" SVf " in void context", SVfARG(sv_2mortal(useless_sv))); } else if (useless) { Perl_ck_warner(aTHX_ packWARN(WARN_VOID), "Useless use of %s in void context", useless); } get_next_op: /* if a kid hasn't been nominated to process, continue with the * next sibling, or if no siblings left, go back to the parent's * siblings and so on */ while (!next_kid) { if (o == arg) return arg; /* at top; no parents/siblings to try */ if (OpHAS_SIBLING(o)) next_kid = o->op_sibparent; else o = o->op_sibparent; /*try parent's next sibling */ } o = next_kid; } return arg; } static OP * S_listkids(pTHX_ OP *o) { if (o && o->op_flags & OPf_KIDS) { OP *kid; for (kid = cLISTOPo->op_first; kid; kid = OpSIBLING(kid)) list(kid); } return o; } /* apply list context to the o subtree */ OP * Perl_list(pTHX_ OP *o) { OP * top_op = o; while (1) { OP *next_kid = NULL; /* what op (if any) to process next */ OP *kid; /* assumes no premature commitment */ if (!o || (o->op_flags & OPf_WANT) || (PL_parser && PL_parser->error_count) || o->op_type == OP_RETURN) { goto do_next; } if ((o->op_private & OPpTARGET_MY) && (PL_opargs[o->op_type] & OA_TARGLEX))/* OPp share the meaning */ { goto do_next; /* As if inside SASSIGN */ } o->op_flags = (o->op_flags & ~OPf_WANT) | OPf_WANT_LIST; switch (o->op_type) { case OP_REPEAT: if (o->op_private & OPpREPEAT_DOLIST && !(o->op_flags & OPf_STACKED)) { list(cBINOPo->op_first); kid = cBINOPo->op_last; /* optimise away (.....) x 1 */ if (kid->op_type == OP_CONST && SvIOK(kSVOP_sv) && SvIVX(kSVOP_sv) == 1) { op_null(o); /* repeat */ op_null(cUNOPx(cBINOPo->op_first)->op_first);/* pushmark */ /* const (rhs): */ op_free(op_sibling_splice(o, cBINOPo->op_first, 1, NULL)); } } break; case OP_OR: case OP_AND: case OP_COND_EXPR: /* impose list context on everything except the condition */ next_kid = OpSIBLING(cUNOPo->op_first); break; default: if (!(o->op_flags & OPf_KIDS)) break; /* possibly flatten 1..10 into a constant array */ if (!o->op_next && cUNOPo->op_first->op_type == OP_FLOP) { list(cBINOPo->op_first); gen_constant_list(o); goto do_next; } next_kid = cUNOPo->op_first; /* do all kids */ break; case OP_LIST: if (cLISTOPo->op_first->op_type == OP_PUSHMARK) { op_null(cUNOPo->op_first); /* NULL the pushmark */ op_null(o); /* NULL the list */ } if (o->op_flags & OPf_KIDS) next_kid = cUNOPo->op_first; /* do all kids */ break; /* the children of these ops are usually a list of statements, * except the leaves, whose first child is a corresponding enter */ case OP_SCOPE: case OP_LINESEQ: kid = cLISTOPo->op_first; goto do_kids; case OP_LEAVE: case OP_LEAVETRY: kid = cLISTOPo->op_first; list(kid); kid = OpSIBLING(kid); do_kids: while (kid) { OP *sib = OpSIBLING(kid); /* Apply void context to all kids except the last, which * is list. E.g. * @a = do { void; void; list } * Except that 'when's are always list context, e.g. * @a = do { given(..) { * when (..) { list } * when (..) { list } * ... * }} */ if (!sib) { /* tail call optimise calling list() on the last kid */ next_kid = kid; goto do_next; } else if (kid->op_type == OP_LEAVEWHEN) list(kid); else scalarvoid(kid); kid = sib; } NOT_REACHED; /* NOTREACHED */ break; } /* If next_kid is set, someone in the code above wanted us to process * that kid and all its remaining siblings. Otherwise, work our way * back up the tree */ do_next: while (!next_kid) { if (o == top_op) return top_op; /* at top; no parents/siblings to try */ if (OpHAS_SIBLING(o)) next_kid = o->op_sibparent; else { o = o->op_sibparent; /*try parent's next sibling */ switch (o->op_type) { case OP_SCOPE: case OP_LINESEQ: case OP_LIST: case OP_LEAVE: case OP_LEAVETRY: /* should really restore PL_curcop to its old value, but * setting it to PL_compiling is better than do nothing */ PL_curcop = &PL_compiling; } } } o = next_kid; } /* while */ } static OP * S_scalarseq(pTHX_ OP *o) { if (o) { const OPCODE type = o->op_type; if (type == OP_LINESEQ || type == OP_SCOPE || type == OP_LEAVE || type == OP_LEAVETRY) { OP *kid, *sib; for (kid = cLISTOPo->op_first; kid; kid = sib) { if ((sib = OpSIBLING(kid)) && ( OpHAS_SIBLING(sib) || sib->op_type != OP_NULL || ( sib->op_targ != OP_NEXTSTATE && sib->op_targ != OP_DBSTATE ))) { scalarvoid(kid); } } PL_curcop = &PL_compiling; } o->op_flags &= ~OPf_PARENS; if (PL_hints & HINT_BLOCK_SCOPE) o->op_flags |= OPf_PARENS; } else o = newOP(OP_STUB, 0); return o; } STATIC OP * S_modkids(pTHX_ OP *o, I32 type) { if (o && o->op_flags & OPf_KIDS) { OP *kid; for (kid = cLISTOPo->op_first; kid; kid = OpSIBLING(kid)) op_lvalue(kid, type); } return o; } /* for a helem/hslice/kvslice, if its a fixed hash, croak on invalid * const fields. Also, convert CONST keys to HEK-in-SVs. * rop is the op that retrieves the hash; * key_op is the first key * real if false, only check (and possibly croak); don't update op */ STATIC void S_check_hash_fields_and_hekify(pTHX_ UNOP *rop, SVOP *key_op, int real) { PADNAME *lexname; GV **fields; bool check_fields; /* find the padsv corresponding to $lex->{} or @{$lex}{} */ if (rop) { if (rop->op_first->op_type == OP_PADSV) /* @$hash{qw(keys here)} */ rop = (UNOP*)rop->op_first; else { /* @{$hash}{qw(keys here)} */ if (rop->op_first->op_type == OP_SCOPE && cLISTOPx(rop->op_first)->op_last->op_type == OP_PADSV) { rop = (UNOP*)cLISTOPx(rop->op_first)->op_last; } else rop = NULL; } } lexname = NULL; /* just to silence compiler warnings */ fields = NULL; /* just to silence compiler warnings */ check_fields = rop && (lexname = padnamelist_fetch(PL_comppad_name, rop->op_targ), SvPAD_TYPED(lexname)) && (fields = (GV**)hv_fetchs(PadnameTYPE(lexname), "FIELDS", FALSE)) && isGV(*fields) && GvHV(*fields); for (; key_op; key_op = (SVOP*)OpSIBLING(key_op)) { SV **svp, *sv; if (key_op->op_type != OP_CONST) continue; svp = cSVOPx_svp(key_op); /* make sure it's not a bareword under strict subs */ if (key_op->op_private & OPpCONST_BARE && key_op->op_private & OPpCONST_STRICT) { no_bareword_allowed((OP*)key_op); } /* Make the CONST have a shared SV */ if ( !SvIsCOW_shared_hash(sv = *svp) && SvTYPE(sv) < SVt_PVMG && SvOK(sv) && !SvROK(sv) && real) { SSize_t keylen; const char * const key = SvPV_const(sv, *(STRLEN*)&keylen); SV *nsv = newSVpvn_share(key, SvUTF8(sv) ? -keylen : keylen, 0); SvREFCNT_dec_NN(sv); *svp = nsv; } if ( check_fields && !hv_fetch_ent(GvHV(*fields), *svp, FALSE, 0)) { Perl_croak(aTHX_ "No such class field \"%" SVf "\" " "in variable %" PNf " of type %" HEKf, SVfARG(*svp), PNfARG(lexname), HEKfARG(HvNAME_HEK(PadnameTYPE(lexname)))); } } } /* info returned by S_sprintf_is_multiconcatable() */ struct sprintf_ismc_info { SSize_t nargs; /* num of args to sprintf (not including the format) */ char *start; /* start of raw format string */ char *end; /* bytes after end of raw format string */ STRLEN total_len; /* total length (in bytes) of format string, not including '%s' and half of '%%' */ STRLEN variant; /* number of bytes by which total_len_p would grow if upgraded to utf8 */ bool utf8; /* whether the format is utf8 */ }; /* is the OP_SPRINTF o suitable for converting into a multiconcat op? * i.e. its format argument is a const string with only '%s' and '%%' * formats, and the number of args is known, e.g. * sprintf "a=%s f=%s", $a[0], scalar(f()); * but not * sprintf "i=%d a=%s f=%s", $i, @a, f(); * * If successful, the sprintf_ismc_info struct pointed to by info will be * populated. */ STATIC bool S_sprintf_is_multiconcatable(pTHX_ OP *o,struct sprintf_ismc_info *info) { OP *pm, *constop, *kid; SV *sv; char *s, *e, *p; SSize_t nargs, nformats; STRLEN cur, total_len, variant; bool utf8; /* if sprintf's behaviour changes, die here so that someone * can decide whether to enhance this function or skip optimising * under those new circumstances */ assert(!(o->op_flags & OPf_STACKED)); assert(!(PL_opargs[OP_SPRINTF] & OA_TARGLEX)); assert(!(o->op_private & ~OPpARG4_MASK)); pm = cUNOPo->op_first; if (pm->op_type != OP_PUSHMARK) /* weird coreargs stuff */ return FALSE; constop = OpSIBLING(pm); if (!constop || constop->op_type != OP_CONST) return FALSE; sv = cSVOPx_sv(constop); if (SvMAGICAL(sv) || !SvPOK(sv)) return FALSE; s = SvPV(sv, cur); e = s + cur; /* Scan format for %% and %s and work out how many %s there are. * Abandon if other format types are found. */ nformats = 0; total_len = 0; variant = 0; for (p = s; p < e; p++) { if (*p != '%') { total_len++; if (!UTF8_IS_INVARIANT(*p)) variant++; continue; } p++; if (p >= e) return FALSE; /* lone % at end gives "Invalid conversion" */ if (*p == '%') total_len++; else if (*p == 's') nformats++; else return FALSE; } if (!nformats || nformats > PERL_MULTICONCAT_MAXARG) return FALSE; utf8 = cBOOL(SvUTF8(sv)); if (utf8) variant = 0; /* scan args; they must all be in scalar cxt */ nargs = 0; kid = OpSIBLING(constop); while (kid) { if ((kid->op_flags & OPf_WANT) != OPf_WANT_SCALAR) return FALSE; nargs++; kid = OpSIBLING(kid); } if (nargs != nformats) return FALSE; /* e.g. sprintf("%s%s", $a); */ info->nargs = nargs; info->start = s; info->end = e; info->total_len = total_len; info->variant = variant; info->utf8 = utf8; return TRUE; } /* S_maybe_multiconcat(): * * given an OP_STRINGIFY, OP_SASSIGN, OP_CONCAT or OP_SPRINTF op, possibly * convert it (and its children) into an OP_MULTICONCAT. See the code * comments just before pp_multiconcat() for the full details of what * OP_MULTICONCAT supports. * * Basically we're looking for an optree with a chain of OP_CONCATS down * the LHS (or an OP_SPRINTF), with possibly an OP_SASSIGN, and/or * OP_STRINGIFY, and/or OP_CONCAT acting as '.=' at its head, e.g. * * $x = "$a$b-$c" * * looks like * * SASSIGN * | * STRINGIFY -- PADSV[$x] * | * | * ex-PUSHMARK -- CONCAT/S * | * CONCAT/S -- PADSV[$d] * | * CONCAT -- CONST["-"] * | * PADSV[$a] -- PADSV[$b] * * Note that at this stage the OP_SASSIGN may have already been optimised * away with OPpTARGET_MY set on the OP_STRINGIFY or OP_CONCAT. */ STATIC void S_maybe_multiconcat(pTHX_ OP *o) { dVAR; OP *lastkidop; /* the right-most of any kids unshifted onto o */ OP *topop; /* the top-most op in the concat tree (often equals o, unless there are assign/stringify ops above it */ OP *parentop; /* the parent op of topop (or itself if no parent) */ OP *targmyop; /* the op (if any) with the OPpTARGET_MY flag */ OP *targetop; /* the op corresponding to target=... or target.=... */ OP *stringop; /* the OP_STRINGIFY op, if any */ OP *nextop; /* used for recreating the op_next chain without consts */ OP *kid; /* general-purpose op pointer */ UNOP_AUX_item *aux; UNOP_AUX_item *lenp; char *const_str, *p; struct sprintf_ismc_info sprintf_info; /* store info about each arg in args[]; * toparg is the highest used slot; argp is a general * pointer to args[] slots */ struct { void *p; /* initially points to const sv (or null for op); later, set to SvPV(constsv), with ... */ STRLEN len; /* ... len set to SvPV(..., len) */ } *argp, *toparg, args[PERL_MULTICONCAT_MAXARG*2 + 1]; SSize_t nargs = 0; SSize_t nconst = 0; SSize_t nadjconst = 0; /* adjacent consts - may be demoted to args */ STRLEN variant; bool utf8 = FALSE; bool kid_is_last = FALSE; /* most args will be the RHS kid of a concat op; the last-processed arg will the LHS of one, as args are processed in reverse order */ U8 stacked_last = 0; /* whether the last seen concat op was STACKED */ STRLEN total_len = 0; /* sum of the lengths of the const segments */ U8 flags = 0; /* what will become the op_flags and ... */ U8 private_flags = 0; /* ... op_private of the multiconcat op */ bool is_sprintf = FALSE; /* we're optimising an sprintf */ bool is_targable = FALSE; /* targetop is an OPpTARGET_MY candidate */ bool prev_was_const = FALSE; /* previous arg was a const */ /* ----------------------------------------------------------------- * Phase 1: * * Examine the optree non-destructively to determine whether it's * suitable to be converted into an OP_MULTICONCAT. Accumulate * information about the optree in args[]. */ argp = args; targmyop = NULL; targetop = NULL; stringop = NULL; topop = o; parentop = o; assert( o->op_type == OP_SASSIGN || o->op_type == OP_CONCAT || o->op_type == OP_SPRINTF || o->op_type == OP_STRINGIFY); Zero(&sprintf_info, 1, struct sprintf_ismc_info); /* first see if, at the top of the tree, there is an assign, * append and/or stringify */ if (topop->op_type == OP_SASSIGN) { /* expr = ..... */ if (o->op_ppaddr != PL_ppaddr[OP_SASSIGN]) return; if (o->op_private & (OPpASSIGN_BACKWARDS|OPpASSIGN_CV_TO_GV)) return; assert(!(o->op_private & ~OPpARG2_MASK)); /* barf on unknown flags */ parentop = topop; topop = cBINOPo->op_first; targetop = OpSIBLING(topop); if (!targetop) /* probably some sort of syntax error */ return; } else if ( topop->op_type == OP_CONCAT && (topop->op_flags & OPf_STACKED) && (!(topop->op_private & OPpCONCAT_NESTED)) ) { /* expr .= ..... */ /* OPpTARGET_MY shouldn't be able to be set here. If it is, * decide what to do about it */ assert(!(o->op_private & OPpTARGET_MY)); /* barf on unknown flags */ assert(!(o->op_private & ~(OPpARG2_MASK|OPpTARGET_MY))); private_flags |= OPpMULTICONCAT_APPEND; targetop = cBINOPo->op_first; parentop = topop; topop = OpSIBLING(targetop); /* $x .= gets optimised to rcatline instead */ if (topop->op_type == OP_READLINE) return; } if (targetop) { /* Can targetop (the LHS) if it's a padsv, be be optimised * away and use OPpTARGET_MY instead? */ if ( (targetop->op_type == OP_PADSV) && !(targetop->op_private & OPpDEREF) && !(targetop->op_private & OPpPAD_STATE) /* we don't support 'my $x .= ...' */ && ( o->op_type == OP_SASSIGN || !(targetop->op_private & OPpLVAL_INTRO)) ) is_targable = TRUE; } if (topop->op_type == OP_STRINGIFY) { if (topop->op_ppaddr != PL_ppaddr[OP_STRINGIFY]) return; stringop = topop; /* barf on unknown flags */ assert(!(o->op_private & ~(OPpARG4_MASK|OPpTARGET_MY))); if ((topop->op_private & OPpTARGET_MY)) { if (o->op_type == OP_SASSIGN) return; /* can't have two assigns */ targmyop = topop; } private_flags |= OPpMULTICONCAT_STRINGIFY; parentop = topop; topop = cBINOPx(topop)->op_first; assert(OP_TYPE_IS_OR_WAS_NN(topop, OP_PUSHMARK)); topop = OpSIBLING(topop); } if (topop->op_type == OP_SPRINTF) { if (topop->op_ppaddr != PL_ppaddr[OP_SPRINTF]) return; if (S_sprintf_is_multiconcatable(aTHX_ topop, &sprintf_info)) { nargs = sprintf_info.nargs; total_len = sprintf_info.total_len; variant = sprintf_info.variant; utf8 = sprintf_info.utf8; is_sprintf = TRUE; private_flags |= OPpMULTICONCAT_FAKE; toparg = argp; /* we have an sprintf op rather than a concat optree. * Skip most of the code below which is associated with * processing that optree. We also skip phase 2, determining * whether its cost effective to optimise, since for sprintf, * multiconcat is *always* faster */ goto create_aux; } /* note that even if the sprintf itself isn't multiconcatable, * the expression as a whole may be, e.g. in * $x .= sprintf("%d",...) * the sprintf op will be left as-is, but the concat/S op may * be upgraded to multiconcat */ } else if (topop->op_type == OP_CONCAT) { if (topop->op_ppaddr != PL_ppaddr[OP_CONCAT]) return; if ((topop->op_private & OPpTARGET_MY)) { if (o->op_type == OP_SASSIGN || targmyop) return; /* can't have two assigns */ targmyop = topop; } } /* Is it safe to convert a sassign/stringify/concat op into * a multiconcat? */ assert((PL_opargs[OP_SASSIGN] & OA_CLASS_MASK) == OA_BINOP); assert((PL_opargs[OP_CONCAT] & OA_CLASS_MASK) == OA_BINOP); assert((PL_opargs[OP_STRINGIFY] & OA_CLASS_MASK) == OA_LISTOP); assert((PL_opargs[OP_SPRINTF] & OA_CLASS_MASK) == OA_LISTOP); STATIC_ASSERT_STMT( STRUCT_OFFSET(BINOP, op_last) == STRUCT_OFFSET(UNOP_AUX, op_aux)); STATIC_ASSERT_STMT( STRUCT_OFFSET(LISTOP, op_last) == STRUCT_OFFSET(UNOP_AUX, op_aux)); /* Now scan the down the tree looking for a series of * CONCAT/OPf_STACKED ops on the LHS (with the last one not * stacked). For example this tree: * * | * CONCAT/STACKED * | * CONCAT/STACKED -- EXPR5 * | * CONCAT/STACKED -- EXPR4 * | * CONCAT -- EXPR3 * | * EXPR1 -- EXPR2 * * corresponds to an expression like * * (EXPR1 . EXPR2 . EXPR3 . EXPR4 . EXPR5) * * Record info about each EXPR in args[]: in particular, whether it is * a stringifiable OP_CONST and if so what the const sv is. * * The reason why the last concat can't be STACKED is the difference * between * * ((($a .= $a) .= $a) .= $a) .= $a * * and * $a . $a . $a . $a . $a * * The main difference between the optrees for those two constructs * is the presence of the last STACKED. As well as modifying $a, * the former sees the changed $a between each concat, so if $s is * initially 'a', the first returns 'a' x 16, while the latter returns * 'a' x 5. And pp_multiconcat can't handle that kind of thing. */ kid = topop; for (;;) { OP *argop; SV *sv; bool last = FALSE; if ( kid->op_type == OP_CONCAT && !kid_is_last ) { OP *k1, *k2; k1 = cUNOPx(kid)->op_first; k2 = OpSIBLING(k1); /* shouldn't happen except maybe after compile err? */ if (!k2) return; /* avoid turning (A . B . ($lex = C) ...) into (A . B . C ...) */ if (kid->op_private & OPpTARGET_MY) kid_is_last = TRUE; stacked_last = (kid->op_flags & OPf_STACKED); if (!stacked_last) kid_is_last = TRUE; kid = k1; argop = k2; } else { argop = kid; last = TRUE; } if ( nargs + nadjconst > PERL_MULTICONCAT_MAXARG - 2 || (argp - args + 1) > (PERL_MULTICONCAT_MAXARG*2 + 1) - 2) { /* At least two spare slots are needed to decompose both * concat args. If there are no slots left, continue to * examine the rest of the optree, but don't push new values * on args[]. If the optree as a whole is legal for conversion * (in particular that the last concat isn't STACKED), then * the first PERL_MULTICONCAT_MAXARG elements of the optree * can be converted into an OP_MULTICONCAT now, with the first * child of that op being the remainder of the optree - * which may itself later be converted to a multiconcat op * too. */ if (last) { /* the last arg is the rest of the optree */ argp++->p = NULL; nargs++; } } else if ( argop->op_type == OP_CONST && ((sv = cSVOPx_sv(argop))) /* defer stringification until runtime of 'constant' * things that might stringify variantly, e.g. the radix * point of NVs, or overloaded RVs */ && (SvPOK(sv) || SvIOK(sv)) && (!SvGMAGICAL(sv)) ) { if (argop->op_private & OPpCONST_STRICT) no_bareword_allowed(argop); argp++->p = sv; utf8 |= cBOOL(SvUTF8(sv)); nconst++; if (prev_was_const) /* this const may be demoted back to a plain arg later; * make sure we have enough arg slots left */ nadjconst++; prev_was_const = !prev_was_const; } else { argp++->p = NULL; nargs++; prev_was_const = FALSE; } if (last) break; } toparg = argp - 1; if (stacked_last) return; /* we don't support ((A.=B).=C)...) */ /* look for two adjacent consts and don't fold them together: * $o . "a" . "b" * should do * $o->concat("a")->concat("b") * rather than * $o->concat("ab") * (but $o .= "a" . "b" should still fold) */ { bool seen_nonconst = FALSE; for (argp = toparg; argp >= args; argp--) { if (argp->p == NULL) { seen_nonconst = TRUE; continue; } if (!seen_nonconst) continue; if (argp[1].p) { /* both previous and current arg were constants; * leave the current OP_CONST as-is */ argp->p = NULL; nconst--; nargs++; } } } /* ----------------------------------------------------------------- * Phase 2: * * At this point we have determined that the optree *can* be converted * into a multiconcat. Having gathered all the evidence, we now decide * whether it *should*. */ /* we need at least one concat action, e.g.: * * Y . Z * X = Y . Z * X .= Y * * otherwise we could be doing something like $x = "foo", which * if treated as as a concat, would fail to COW. */ if (nargs + nconst + cBOOL(private_flags & OPpMULTICONCAT_APPEND) < 2) return; /* Benchmarking seems to indicate that we gain if: * * we optimise at least two actions into a single multiconcat * (e.g concat+concat, sassign+concat); * * or if we can eliminate at least 1 OP_CONST; * * or if we can eliminate a padsv via OPpTARGET_MY */ if ( /* eliminated at least one OP_CONST */ nconst >= 1 /* eliminated an OP_SASSIGN */ || o->op_type == OP_SASSIGN /* eliminated an OP_PADSV */ || (!targmyop && is_targable) ) /* definitely a net gain to optimise */ goto optimise; /* ... if not, what else? */ /* special-case '$lex1 = expr . $lex1' (where expr isn't lex1): * multiconcat is faster (due to not creating a temporary copy of * $lex1), whereas for a general $lex1 = $lex2 . $lex3, concat is * faster. */ if ( nconst == 0 && nargs == 2 && targmyop && topop->op_type == OP_CONCAT ) { PADOFFSET t = targmyop->op_targ; OP *k1 = cBINOPx(topop)->op_first; OP *k2 = cBINOPx(topop)->op_last; if ( k2->op_type == OP_PADSV && k2->op_targ == t && ( k1->op_type != OP_PADSV || k1->op_targ != t) ) goto optimise; } /* need at least two concats */ if (nargs + nconst + cBOOL(private_flags & OPpMULTICONCAT_APPEND) < 3) return; /* ----------------------------------------------------------------- * Phase 3: * * At this point the optree has been verified as ok to be optimised * into an OP_MULTICONCAT. Now start changing things. */ optimise: /* stringify all const args and determine utf8ness */ variant = 0; for (argp = args; argp <= toparg; argp++) { SV *sv = (SV*)argp->p; if (!sv) continue; /* not a const op */ if (utf8 && !SvUTF8(sv)) sv_utf8_upgrade_nomg(sv); argp->p = SvPV_nomg(sv, argp->len); total_len += argp->len; /* see if any strings would grow if converted to utf8 */ if (!utf8) { variant += variant_under_utf8_count((U8 *) argp->p, (U8 *) argp->p + argp->len); } } /* create and populate aux struct */ create_aux: aux = (UNOP_AUX_item*)PerlMemShared_malloc( sizeof(UNOP_AUX_item) * ( PERL_MULTICONCAT_HEADER_SIZE + ((nargs + 1) * (variant ? 2 : 1)) ) ); const_str = (char *)PerlMemShared_malloc(total_len ? total_len : 1); /* Extract all the non-const expressions from the concat tree then * dispose of the old tree, e.g. convert the tree from this: * * o => SASSIGN * | * STRINGIFY -- TARGET * | * ex-PUSHMARK -- CONCAT * | * CONCAT -- EXPR5 * | * CONCAT -- EXPR4 * | * CONCAT -- EXPR3 * | * EXPR1 -- EXPR2 * * * to: * * o => MULTICONCAT * | * ex-PUSHMARK -- EXPR1 -- EXPR2 -- EXPR3 -- EXPR4 -- EXPR5 -- TARGET * * except that if EXPRi is an OP_CONST, it's discarded. * * During the conversion process, EXPR ops are stripped from the tree * and unshifted onto o. Finally, any of o's remaining original * childen are discarded and o is converted into an OP_MULTICONCAT. * * In this middle of this, o may contain both: unshifted args on the * left, and some remaining original args on the right. lastkidop * is set to point to the right-most unshifted arg to delineate * between the two sets. */ if (is_sprintf) { /* create a copy of the format with the %'s removed, and record * the sizes of the const string segments in the aux struct */ char *q, *oldq; lenp = aux + PERL_MULTICONCAT_IX_LENGTHS; p = sprintf_info.start; q = const_str; oldq = q; for (; p < sprintf_info.end; p++) { if (*p == '%') { p++; if (*p != '%') { (lenp++)->ssize = q - oldq; oldq = q; continue; } } *q++ = *p; } lenp->ssize = q - oldq; assert((STRLEN)(q - const_str) == total_len); /* Attach all the args (i.e. the kids of the sprintf) to o (which * may or may not be topop) The pushmark and const ops need to be * kept in case they're an op_next entry point. */ lastkidop = cLISTOPx(topop)->op_last; kid = cUNOPx(topop)->op_first; /* pushmark */ op_null(kid); op_null(OpSIBLING(kid)); /* const */ if (o != topop) { kid = op_sibling_splice(topop, NULL, -1, NULL); /* cut all args */ op_sibling_splice(o, NULL, 0, kid); /* and attach to o */ lastkidop->op_next = o; } } else { p = const_str; lenp = aux + PERL_MULTICONCAT_IX_LENGTHS; lenp->ssize = -1; /* Concatenate all const strings into const_str. * Note that args[] contains the RHS args in reverse order, so * we scan args[] from top to bottom to get constant strings * in L-R order */ for (argp = toparg; argp >= args; argp--) { if (!argp->p) /* not a const op */ (++lenp)->ssize = -1; else { STRLEN l = argp->len; Copy(argp->p, p, l, char); p += l; if (lenp->ssize == -1) lenp->ssize = l; else lenp->ssize += l; } } kid = topop; nextop = o; lastkidop = NULL; for (argp = args; argp <= toparg; argp++) { /* only keep non-const args, except keep the first-in-next-chain * arg no matter what it is (but nulled if OP_CONST), because it * may be the entry point to this subtree from the previous * op_next. */ bool last = (argp == toparg); OP *prev; /* set prev to the sibling *before* the arg to be cut out, * e.g. when cutting EXPR: * * | * kid= CONCAT * | * prev= CONCAT -- EXPR * | */ if (argp == args && kid->op_type != OP_CONCAT) { /* in e.g. '$x .= f(1)' there's no RHS concat tree * so the expression to be cut isn't kid->op_last but * kid itself */ OP *o1, *o2; /* find the op before kid */ o1 = NULL; o2 = cUNOPx(parentop)->op_first; while (o2 && o2 != kid) { o1 = o2; o2 = OpSIBLING(o2); } assert(o2 == kid); prev = o1; kid = parentop; } else if (kid == o && lastkidop) prev = last ? lastkidop : OpSIBLING(lastkidop); else prev = last ? NULL : cUNOPx(kid)->op_first; if (!argp->p || last) { /* cut RH op */ OP *aop = op_sibling_splice(kid, prev, 1, NULL); /* and unshift to front of o */ op_sibling_splice(o, NULL, 0, aop); /* record the right-most op added to o: later we will * free anything to the right of it */ if (!lastkidop) lastkidop = aop; aop->op_next = nextop; if (last) { if (argp->p) /* null the const at start of op_next chain */ op_null(aop); } else if (prev) nextop = prev->op_next; } /* the last two arguments are both attached to the same concat op */ if (argp < toparg - 1) kid = prev; } } /* Populate the aux struct */ aux[PERL_MULTICONCAT_IX_NARGS].ssize = nargs; aux[PERL_MULTICONCAT_IX_PLAIN_PV].pv = utf8 ? NULL : const_str; aux[PERL_MULTICONCAT_IX_PLAIN_LEN].ssize = utf8 ? 0 : total_len; aux[PERL_MULTICONCAT_IX_UTF8_PV].pv = const_str; aux[PERL_MULTICONCAT_IX_UTF8_LEN].ssize = total_len; /* if variant > 0, calculate a variant const string and lengths where * the utf8 version of the string will take 'variant' more bytes than * the plain one. */ if (variant) { char *p = const_str; STRLEN ulen = total_len + variant; UNOP_AUX_item *lens = aux + PERL_MULTICONCAT_IX_LENGTHS; UNOP_AUX_item *ulens = lens + (nargs + 1); char *up = (char*)PerlMemShared_malloc(ulen); SSize_t n; aux[PERL_MULTICONCAT_IX_UTF8_PV].pv = up; aux[PERL_MULTICONCAT_IX_UTF8_LEN].ssize = ulen; for (n = 0; n < (nargs + 1); n++) { SSize_t i; char * orig_up = up; for (i = (lens++)->ssize; i > 0; i--) { U8 c = *p++; append_utf8_from_native_byte(c, (U8**)&up); } (ulens++)->ssize = (i < 0) ? i : up - orig_up; } } if (stringop) { /* if there was a top(ish)-level OP_STRINGIFY, we need to keep * that op's first child - an ex-PUSHMARK - because the op_next of * the previous op may point to it (i.e. it's the entry point for * the o optree) */ OP *pmop = (stringop == o) ? op_sibling_splice(o, lastkidop, 1, NULL) : op_sibling_splice(stringop, NULL, 1, NULL); assert(OP_TYPE_IS_OR_WAS_NN(pmop, OP_PUSHMARK)); op_sibling_splice(o, NULL, 0, pmop); if (!lastkidop) lastkidop = pmop; } /* Optimise * target = A.B.C... * target .= A.B.C... */ if (targetop) { assert(!targmyop); if (o->op_type == OP_SASSIGN) { /* Move the target subtree from being the last of o's children * to being the last of o's preserved children. * Note the difference between 'target = ...' and 'target .= ...': * for the former, target is executed last; for the latter, * first. */ kid = OpSIBLING(lastkidop); op_sibling_splice(o, kid, 1, NULL); /* cut target op */ op_sibling_splice(o, lastkidop, 0, targetop); /* and paste */ lastkidop->op_next = kid->op_next; lastkidop = targetop; } else { /* Move the target subtree from being the first of o's * original children to being the first of *all* o's children. */ if (lastkidop) { op_sibling_splice(o, lastkidop, 1, NULL); /* cut target op */ op_sibling_splice(o, NULL, 0, targetop); /* and paste*/ } else { /* if the RHS of .= doesn't contain a concat (e.g. * $x .= "foo"), it gets missed by the "strip ops from the * tree and add to o" loop earlier */ assert(topop->op_type != OP_CONCAT); if (stringop) { /* in e.g. $x .= "$y", move the $y expression * from being a child of OP_STRINGIFY to being the * second child of the OP_CONCAT */ assert(cUNOPx(stringop)->op_first == topop); op_sibling_splice(stringop, NULL, 1, NULL); op_sibling_splice(o, cUNOPo->op_first, 0, topop); } assert(topop == OpSIBLING(cBINOPo->op_first)); if (toparg->p) op_null(topop); lastkidop = topop; } } if (is_targable) { /* optimise * my $lex = A.B.C... * $lex = A.B.C... * $lex .= A.B.C... * The original padsv op is kept but nulled in case it's the * entry point for the optree (which it will be for * '$lex .= ... ' */ private_flags |= OPpTARGET_MY; private_flags |= (targetop->op_private & OPpLVAL_INTRO); o->op_targ = targetop->op_targ; targetop->op_targ = 0; op_null(targetop); } else flags |= OPf_STACKED; } else if (targmyop) { private_flags |= OPpTARGET_MY; if (o != targmyop) { o->op_targ = targmyop->op_targ; targmyop->op_targ = 0; } } /* detach the emaciated husk of the sprintf/concat optree and free it */ for (;;) { kid = op_sibling_splice(o, lastkidop, 1, NULL); if (!kid) break; op_free(kid); } /* and convert o into a multiconcat */ o->op_flags = (flags|OPf_KIDS|stacked_last |(o->op_flags & (OPf_WANT|OPf_PARENS))); o->op_private = private_flags; o->op_type = OP_MULTICONCAT; o->op_ppaddr = PL_ppaddr[OP_MULTICONCAT]; cUNOP_AUXo->op_aux = aux; } /* do all the final processing on an optree (e.g. running the peephole * optimiser on it), then attach it to cv (if cv is non-null) */ static void S_process_optree(pTHX_ CV *cv, OP *optree, OP* start) { OP **startp; /* XXX for some reason, evals, require and main optrees are * never attached to their CV; instead they just hang off * PL_main_root + PL_main_start or PL_eval_root + PL_eval_start * and get manually freed when appropriate */ if (cv) startp = &CvSTART(cv); else startp = PL_in_eval? &PL_eval_start : &PL_main_start; *startp = start; optree->op_private |= OPpREFCOUNTED; OpREFCNT_set(optree, 1); optimize_optree(optree); CALL_PEEP(*startp); finalize_optree(optree); S_prune_chain_head(startp); if (cv) { /* now that optimizer has done its work, adjust pad values */ pad_tidy(optree->op_type == OP_LEAVEWRITE ? padtidy_FORMAT : CvCLONE(cv) ? padtidy_SUBCLONE : padtidy_SUB); } } /* =for apidoc optimize_optree This function applies some optimisations to the optree in top-down order. It is called before the peephole optimizer, which processes ops in execution order. Note that finalize_optree() also does a top-down scan, but is called *after* the peephole optimizer. =cut */ void Perl_optimize_optree(pTHX_ OP* o) { PERL_ARGS_ASSERT_OPTIMIZE_OPTREE; ENTER; SAVEVPTR(PL_curcop); optimize_op(o); LEAVE; } /* helper for optimize_optree() which optimises one op then recurses * to optimise any children. */ STATIC void S_optimize_op(pTHX_ OP* o) { OP *top_op = o; PERL_ARGS_ASSERT_OPTIMIZE_OP; while (1) { OP * next_kid = NULL; assert(o->op_type != OP_FREED); switch (o->op_type) { case OP_NEXTSTATE: case OP_DBSTATE: PL_curcop = ((COP*)o); /* for warnings */ break; case OP_CONCAT: case OP_SASSIGN: case OP_STRINGIFY: case OP_SPRINTF: S_maybe_multiconcat(aTHX_ o); break; case OP_SUBST: if (cPMOPo->op_pmreplrootu.op_pmreplroot) { /* we can't assume that op_pmreplroot->op_sibparent == o * and that it is thus possible to walk back up the tree * past op_pmreplroot. So, although we try to avoid * recursing through op trees, do it here. After all, * there are unlikely to be many nested s///e's within * the replacement part of a s///e. */ optimize_op(cPMOPo->op_pmreplrootu.op_pmreplroot); } break; default: break; } if (o->op_flags & OPf_KIDS) next_kid = cUNOPo->op_first; /* if a kid hasn't been nominated to process, continue with the * next sibling, or if no siblings left, go back to the parent's * siblings and so on */ while (!next_kid) { if (o == top_op) return; /* at top; no parents/siblings to try */ if (OpHAS_SIBLING(o)) next_kid = o->op_sibparent; else o = o->op_sibparent; /*try parent's next sibling */ } /* this label not yet used. Goto here if any code above sets * next-kid get_next_op: */ o = next_kid; } } /* =for apidoc finalize_optree This function finalizes the optree. Should be called directly after the complete optree is built. It does some additional checking which can't be done in the normal Cxxx functions and makes the tree thread-safe. =cut */ void Perl_finalize_optree(pTHX_ OP* o) { PERL_ARGS_ASSERT_FINALIZE_OPTREE; ENTER; SAVEVPTR(PL_curcop); finalize_op(o); LEAVE; } #ifdef USE_ITHREADS /* Relocate sv to the pad for thread safety. * Despite being a "constant", the SV is written to, * for reference counts, sv_upgrade() etc. */ PERL_STATIC_INLINE void S_op_relocate_sv(pTHX_ SV** svp, PADOFFSET* targp) { PADOFFSET ix; PERL_ARGS_ASSERT_OP_RELOCATE_SV; if (!*svp) return; ix = pad_alloc(OP_CONST, SVf_READONLY); SvREFCNT_dec(PAD_SVl(ix)); PAD_SETSV(ix, *svp); /* XXX I don't know how this isn't readonly already. */ if (!SvIsCOW(PAD_SVl(ix))) SvREADONLY_on(PAD_SVl(ix)); *svp = NULL; *targp = ix; } #endif /* =for apidoc traverse_op_tree Return the next op in a depth-first traversal of the op tree, returning NULL when the traversal is complete. The initial call must supply the root of the tree as both top and o. For now it's static, but it may be exposed to the API in the future. =cut */ STATIC OP* S_traverse_op_tree(pTHX_ OP *top, OP *o) { OP *sib; PERL_ARGS_ASSERT_TRAVERSE_OP_TREE; if ((o->op_flags & OPf_KIDS) && cUNOPo->op_first) { return cUNOPo->op_first; } else if ((sib = OpSIBLING(o))) { return sib; } else { OP *parent = o->op_sibparent; assert(!(o->op_moresib)); while (parent && parent != top) { OP *sib = OpSIBLING(parent); if (sib) return sib; parent = parent->op_sibparent; } return NULL; } } STATIC void S_finalize_op(pTHX_ OP* o) { OP * const top = o; PERL_ARGS_ASSERT_FINALIZE_OP; do { assert(o->op_type != OP_FREED); switch (o->op_type) { case OP_NEXTSTATE: case OP_DBSTATE: PL_curcop = ((COP*)o); /* for warnings */ break; case OP_EXEC: if (OpHAS_SIBLING(o)) { OP *sib = OpSIBLING(o); if (( sib->op_type == OP_NEXTSTATE || sib->op_type == OP_DBSTATE) && ckWARN(WARN_EXEC) && OpHAS_SIBLING(sib)) { const OPCODE type = OpSIBLING(sib)->op_type; if (type != OP_EXIT && type != OP_WARN && type != OP_DIE) { const line_t oldline = CopLINE(PL_curcop); CopLINE_set(PL_curcop, CopLINE((COP*)sib)); Perl_warner(aTHX_ packWARN(WARN_EXEC), "Statement unlikely to be reached"); Perl_warner(aTHX_ packWARN(WARN_EXEC), "\t(Maybe you meant system() when you said exec()?)\n"); CopLINE_set(PL_curcop, oldline); } } } break; case OP_GV: if ((o->op_private & OPpEARLY_CV) && ckWARN(WARN_PROTOTYPE)) { GV * const gv = cGVOPo_gv; if (SvTYPE(gv) == SVt_PVGV && GvCV(gv) && SvPVX_const(GvCV(gv))) { /* XXX could check prototype here instead of just carping */ SV * const sv = sv_newmortal(); gv_efullname3(sv, gv, NULL); Perl_warner(aTHX_ packWARN(WARN_PROTOTYPE), "%" SVf "() called too early to check prototype", SVfARG(sv)); } } break; case OP_CONST: if (cSVOPo->op_private & OPpCONST_STRICT) no_bareword_allowed(o); #ifdef USE_ITHREADS /* FALLTHROUGH */ case OP_HINTSEVAL: op_relocate_sv(&cSVOPo->op_sv, &o->op_targ); #endif break; #ifdef USE_ITHREADS /* Relocate all the METHOP's SVs to the pad for thread safety. */ case OP_METHOD_NAMED: case OP_METHOD_SUPER: case OP_METHOD_REDIR: case OP_METHOD_REDIR_SUPER: op_relocate_sv(&cMETHOPx(o)->op_u.op_meth_sv, &o->op_targ); break; #endif case OP_HELEM: { UNOP *rop; SVOP *key_op; OP *kid; if ((key_op = cSVOPx(((BINOP*)o)->op_last))->op_type != OP_CONST) break; rop = (UNOP*)((BINOP*)o)->op_first; goto check_keys; case OP_HSLICE: S_scalar_slice_warning(aTHX_ o); /* FALLTHROUGH */ case OP_KVHSLICE: kid = OpSIBLING(cLISTOPo->op_first); if (/* I bet there's always a pushmark... */ OP_TYPE_ISNT_AND_WASNT_NN(kid, OP_LIST) && OP_TYPE_ISNT_NN(kid, OP_CONST)) { break; } key_op = (SVOP*)(kid->op_type == OP_CONST ? kid : OpSIBLING(kLISTOP->op_first)); rop = (UNOP*)((LISTOP*)o)->op_last; check_keys: if (o->op_private & OPpLVAL_INTRO || rop->op_type != OP_RV2HV) rop = NULL; S_check_hash_fields_and_hekify(aTHX_ rop, key_op, 1); break; } case OP_NULL: if (o->op_targ != OP_HSLICE && o->op_targ != OP_ASLICE) break; /* FALLTHROUGH */ case OP_ASLICE: S_scalar_slice_warning(aTHX_ o); break; case OP_SUBST: { if (cPMOPo->op_pmreplrootu.op_pmreplroot) finalize_op(cPMOPo->op_pmreplrootu.op_pmreplroot); break; } default: break; } #ifdef DEBUGGING if (o->op_flags & OPf_KIDS) { OP *kid; /* check that op_last points to the last sibling, and that * the last op_sibling/op_sibparent field points back to the * parent, and that the only ops with KIDS are those which are * entitled to them */ U32 type = o->op_type; U32 family; bool has_last; if (type == OP_NULL) { type = o->op_targ; /* ck_glob creates a null UNOP with ex-type GLOB * (which is a list op. So pretend it wasn't a listop */ if (type == OP_GLOB) type = OP_NULL; } family = PL_opargs[type] & OA_CLASS_MASK; has_last = ( family == OA_BINOP || family == OA_LISTOP || family == OA_PMOP || family == OA_LOOP ); assert( has_last /* has op_first and op_last, or ... ... has (or may have) op_first: */ || family == OA_UNOP || family == OA_UNOP_AUX || family == OA_LOGOP || family == OA_BASEOP_OR_UNOP || family == OA_FILESTATOP || family == OA_LOOPEXOP || family == OA_METHOP || type == OP_CUSTOM || type == OP_NULL /* new_logop does this */ ); for (kid = cUNOPo->op_first; kid; kid = OpSIBLING(kid)) { if (!OpHAS_SIBLING(kid)) { if (has_last) assert(kid == cLISTOPo->op_last); assert(kid->op_sibparent == o); } } } #endif } while (( o = traverse_op_tree(top, o)) != NULL); } static void S_mark_padname_lvalue(pTHX_ PADNAME *pn) { CV *cv = PL_compcv; PadnameLVALUE_on(pn); while (PadnameOUTER(pn) && PARENT_PAD_INDEX(pn)) { cv = CvOUTSIDE(cv); /* RT #127786: cv can be NULL due to an eval within the DB package * called from an anon sub - anon subs don't have CvOUTSIDE() set * unless they contain an eval, but calling eval within DB * pretends the eval was done in the caller's scope. */ if (!cv) break; assert(CvPADLIST(cv)); pn = PadlistNAMESARRAY(CvPADLIST(cv))[PARENT_PAD_INDEX(pn)]; assert(PadnameLEN(pn)); PadnameLVALUE_on(pn); } } static bool S_vivifies(const OPCODE type) { switch(type) { case OP_RV2AV: case OP_ASLICE: case OP_RV2HV: case OP_KVASLICE: case OP_RV2SV: case OP_HSLICE: case OP_AELEMFAST: case OP_KVHSLICE: case OP_HELEM: case OP_AELEM: return 1; } return 0; } /* apply lvalue reference (aliasing) context to the optree o. * E.g. in * \($x,$y) = (...) * o would be the list ($x,$y) and type would be OP_AASSIGN. * It may descend and apply this to children too, for example in * \( $cond ? $x, $y) = (...) */ static void S_lvref(pTHX_ OP *o, I32 type) { dVAR; OP *kid; OP * top_op = o; while (1) { switch (o->op_type) { case OP_COND_EXPR: o = OpSIBLING(cUNOPo->op_first); continue; case OP_PUSHMARK: goto do_next; case OP_RV2AV: if (cUNOPo->op_first->op_type != OP_GV) goto badref; o->op_flags |= OPf_STACKED; if (o->op_flags & OPf_PARENS) { if (o->op_private & OPpLVAL_INTRO) { yyerror(Perl_form(aTHX_ "Can't modify reference to " "localized parenthesized array in list assignment")); goto do_next; } slurpy: OpTYPE_set(o, OP_LVAVREF); o->op_private &= OPpLVAL_INTRO|OPpPAD_STATE; o->op_flags |= OPf_MOD|OPf_REF; goto do_next; } o->op_private |= OPpLVREF_AV; goto checkgv; case OP_RV2CV: kid = cUNOPo->op_first; if (kid->op_type == OP_NULL) kid = cUNOPx(OpSIBLING(kUNOP->op_first)) ->op_first; o->op_private = OPpLVREF_CV; if (kid->op_type == OP_GV) o->op_flags |= OPf_STACKED; else if (kid->op_type == OP_PADCV) { o->op_targ = kid->op_targ; kid->op_targ = 0; op_free(cUNOPo->op_first); cUNOPo->op_first = NULL; o->op_flags &=~ OPf_KIDS; } else goto badref; break; case OP_RV2HV: if (o->op_flags & OPf_PARENS) { parenhash: yyerror(Perl_form(aTHX_ "Can't modify reference to " "parenthesized hash in list assignment")); goto do_next; } o->op_private |= OPpLVREF_HV; /* FALLTHROUGH */ case OP_RV2SV: checkgv: if (cUNOPo->op_first->op_type != OP_GV) goto badref; o->op_flags |= OPf_STACKED; break; case OP_PADHV: if (o->op_flags & OPf_PARENS) goto parenhash; o->op_private |= OPpLVREF_HV; /* FALLTHROUGH */ case OP_PADSV: PAD_COMPNAME_GEN_set(o->op_targ, PERL_INT_MAX); break; case OP_PADAV: PAD_COMPNAME_GEN_set(o->op_targ, PERL_INT_MAX); if (o->op_flags & OPf_PARENS) goto slurpy; o->op_private |= OPpLVREF_AV; break; case OP_AELEM: case OP_HELEM: o->op_private |= OPpLVREF_ELEM; o->op_flags |= OPf_STACKED; break; case OP_ASLICE: case OP_HSLICE: OpTYPE_set(o, OP_LVREFSLICE); o->op_private &= OPpLVAL_INTRO; goto do_next; case OP_NULL: if (o->op_flags & OPf_SPECIAL) /* do BLOCK */ goto badref; else if (!(o->op_flags & OPf_KIDS)) goto do_next; /* the code formerly only recursed into the first child of * a non ex-list OP_NULL. if we ever encounter such a null op with * more than one child, need to decide whether its ok to process * *all* its kids or not */ assert(o->op_targ == OP_LIST || !(OpHAS_SIBLING(cBINOPo->op_first))); /* FALLTHROUGH */ case OP_LIST: o = cLISTOPo->op_first; continue; case OP_STUB: if (o->op_flags & OPf_PARENS) goto do_next; /* FALLTHROUGH */ default: badref: /* diag_listed_as: Can't modify reference to %s in %s assignment */ yyerror(Perl_form(aTHX_ "Can't modify reference to %s in %s", o->op_type == OP_NULL && o->op_flags & OPf_SPECIAL ? "do block" : OP_DESC(o), PL_op_desc[type])); goto do_next; } OpTYPE_set(o, OP_LVREF); o->op_private &= OPpLVAL_INTRO|OPpLVREF_ELEM|OPpLVREF_TYPE|OPpPAD_STATE; if (type == OP_ENTERLOOP) o->op_private |= OPpLVREF_ITER; do_next: while (1) { if (o == top_op) return; /* at top; no parents/siblings to try */ if (OpHAS_SIBLING(o)) { o = o->op_sibparent; break; } o = o->op_sibparent; /*try parent's next sibling */ } } /* while */ } PERL_STATIC_INLINE bool S_potential_mod_type(I32 type) { /* Types that only potentially result in modification. */ return type == OP_GREPSTART || type == OP_ENTERSUB || type == OP_REFGEN || type == OP_LEAVESUBLV; } /* =for apidoc op_lvalue Propagate lvalue ("modifiable") context to an op and its children. C represents the context type, roughly based on the type of op that would do the modifying, although C is represented by C, because it has no op type of its own (it is signalled by a flag on the lvalue op). This function detects things that can't be modified, such as C<$x+1>, and generates errors for them. For example, C<$x+1 = 2> would cause it to be called with an op of type C and a C argument of C. It also flags things that need to behave specially in an lvalue context, such as C<$$x = 5> which might have to vivify a reference in C<$x>. =cut Perl_op_lvalue_flags() is a non-API lower-level interface to op_lvalue(). The flags param has these bits: OP_LVALUE_NO_CROAK: return rather than croaking on error */ OP * Perl_op_lvalue_flags(pTHX_ OP *o, I32 type, U32 flags) { dVAR; OP *top_op = o; if (!o || (PL_parser && PL_parser->error_count)) return o; while (1) { OP *kid; /* -1 = error on localize, 0 = ignore localize, 1 = ok to localize */ int localize = -1; OP *next_kid = NULL; if ((o->op_private & OPpTARGET_MY) && (PL_opargs[o->op_type] & OA_TARGLEX))/* OPp share the meaning */ { goto do_next; } /* elements of a list might be in void context because the list is in scalar context or because they are attribute sub calls */ if ((o->op_flags & OPf_WANT) == OPf_WANT_VOID) goto do_next; if (type == OP_PRTF || type == OP_SPRINTF) type = OP_ENTERSUB; switch (o->op_type) { case OP_UNDEF: PL_modcount++; goto do_next; case OP_STUB: if ((o->op_flags & OPf_PARENS)) break; goto nomod; case OP_ENTERSUB: if ((type == OP_UNDEF || type == OP_REFGEN || type == OP_LOCK) && !(o->op_flags & OPf_STACKED)) { OpTYPE_set(o, OP_RV2CV); /* entersub => rv2cv */ assert(cUNOPo->op_first->op_type == OP_NULL); op_null(((LISTOP*)cUNOPo->op_first)->op_first);/* disable pushmark */ break; } else { /* lvalue subroutine call */ o->op_private |= OPpLVAL_INTRO; PL_modcount = RETURN_UNLIMITED_NUMBER; if (S_potential_mod_type(type)) { o->op_private |= OPpENTERSUB_INARGS; break; } else { /* Compile-time error message: */ OP *kid = cUNOPo->op_first; CV *cv; GV *gv; SV *namesv; if (kid->op_type != OP_PUSHMARK) { if (kid->op_type != OP_NULL || kid->op_targ != OP_LIST) Perl_croak(aTHX_ "panic: unexpected lvalue entersub " "args: type/targ %ld:%" UVuf, (long)kid->op_type, (UV)kid->op_targ); kid = kLISTOP->op_first; } while (OpHAS_SIBLING(kid)) kid = OpSIBLING(kid); if (!(kid->op_type == OP_NULL && kid->op_targ == OP_RV2CV)) { break; /* Postpone until runtime */ } kid = kUNOP->op_first; if (kid->op_type == OP_NULL && kid->op_targ == OP_RV2SV) kid = kUNOP->op_first; if (kid->op_type == OP_NULL) Perl_croak(aTHX_ "Unexpected constant lvalue entersub " "entry via type/targ %ld:%" UVuf, (long)kid->op_type, (UV)kid->op_targ); if (kid->op_type != OP_GV) { break; } gv = kGVOP_gv; cv = isGV(gv) ? GvCV(gv) : SvROK(gv) && SvTYPE(SvRV(gv)) == SVt_PVCV ? MUTABLE_CV(SvRV(gv)) : NULL; if (!cv) break; if (CvLVALUE(cv)) break; if (flags & OP_LVALUE_NO_CROAK) return NULL; namesv = cv_name(cv, NULL, 0); yyerror_pv(Perl_form(aTHX_ "Can't modify non-lvalue " "subroutine call of &%" SVf " in %s", SVfARG(namesv), PL_op_desc[type]), SvUTF8(namesv)); goto do_next; } } /* FALLTHROUGH */ default: nomod: if (flags & OP_LVALUE_NO_CROAK) return NULL; /* grep, foreach, subcalls, refgen */ if (S_potential_mod_type(type)) break; yyerror(Perl_form(aTHX_ "Can't modify %s in %s", (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL) ? "do block" : OP_DESC(o)), type ? PL_op_desc[type] : "local")); goto do_next; case OP_PREINC: case OP_PREDEC: case OP_POW: case OP_MULTIPLY: case OP_DIVIDE: case OP_MODULO: case OP_ADD: case OP_SUBTRACT: case OP_CONCAT: case OP_LEFT_SHIFT: case OP_RIGHT_SHIFT: case OP_BIT_AND: case OP_BIT_XOR: case OP_BIT_OR: case OP_I_MULTIPLY: case OP_I_DIVIDE: case OP_I_MODULO: case OP_I_ADD: case OP_I_SUBTRACT: if (!(o->op_flags & OPf_STACKED)) goto nomod; PL_modcount++; break; case OP_REPEAT: if (o->op_flags & OPf_STACKED) { PL_modcount++; break; } if (!(o->op_private & OPpREPEAT_DOLIST)) goto nomod; else { const I32 mods = PL_modcount; /* we recurse rather than iterate here because we need to * calculate and use the delta applied to PL_modcount by the * first child. So in something like * ($x, ($y) x 3) = split; * split knows that 4 elements are wanted */ modkids(cBINOPo->op_first, type); if (type != OP_AASSIGN) goto nomod; kid = cBINOPo->op_last; if (kid->op_type == OP_CONST && SvIOK(kSVOP_sv)) { const IV iv = SvIV(kSVOP_sv); if (PL_modcount != RETURN_UNLIMITED_NUMBER) PL_modcount = mods + (PL_modcount - mods) * (iv < 0 ? 0 : iv); } else PL_modcount = RETURN_UNLIMITED_NUMBER; } break; case OP_COND_EXPR: localize = 1; next_kid = OpSIBLING(cUNOPo->op_first); break; case OP_RV2AV: case OP_RV2HV: if (type == OP_REFGEN && o->op_flags & OPf_PARENS) { PL_modcount = RETURN_UNLIMITED_NUMBER; /* Treat \(@foo) like ordinary list, but still mark it as modi- fiable since some contexts need to know. */ o->op_flags |= OPf_MOD; goto do_next; } /* FALLTHROUGH */ case OP_RV2GV: if (scalar_mod_type(o, type)) goto nomod; ref(cUNOPo->op_first, o->op_type); /* FALLTHROUGH */ case OP_ASLICE: case OP_HSLICE: localize = 1; /* FALLTHROUGH */ case OP_AASSIGN: /* Do not apply the lvsub flag for rv2[ah]v in scalar context. */ if (type == OP_LEAVESUBLV && ( (o->op_type != OP_RV2AV && o->op_type != OP_RV2HV) || (o->op_flags & OPf_WANT) != OPf_WANT_SCALAR )) o->op_private |= OPpMAYBE_LVSUB; /* FALLTHROUGH */ case OP_NEXTSTATE: case OP_DBSTATE: PL_modcount = RETURN_UNLIMITED_NUMBER; break; case OP_KVHSLICE: case OP_KVASLICE: case OP_AKEYS: if (type == OP_LEAVESUBLV) o->op_private |= OPpMAYBE_LVSUB; goto nomod; case OP_AVHVSWITCH: if (type == OP_LEAVESUBLV && (o->op_private & OPpAVHVSWITCH_MASK) + OP_EACH == OP_KEYS) o->op_private |= OPpMAYBE_LVSUB; goto nomod; case OP_AV2ARYLEN: PL_hints |= HINT_BLOCK_SCOPE; if (type == OP_LEAVESUBLV) o->op_private |= OPpMAYBE_LVSUB; PL_modcount++; break; case OP_RV2SV: ref(cUNOPo->op_first, o->op_type); localize = 1; /* FALLTHROUGH */ case OP_GV: PL_hints |= HINT_BLOCK_SCOPE; /* FALLTHROUGH */ case OP_SASSIGN: case OP_ANDASSIGN: case OP_ORASSIGN: case OP_DORASSIGN: PL_modcount++; break; case OP_AELEMFAST: case OP_AELEMFAST_LEX: localize = -1; PL_modcount++; break; case OP_PADAV: case OP_PADHV: PL_modcount = RETURN_UNLIMITED_NUMBER; if (type == OP_REFGEN && o->op_flags & OPf_PARENS) { /* Treat \(@foo) like ordinary list, but still mark it as modi- fiable since some contexts need to know. */ o->op_flags |= OPf_MOD; goto do_next; } if (scalar_mod_type(o, type)) goto nomod; if ((o->op_flags & OPf_WANT) != OPf_WANT_SCALAR && type == OP_LEAVESUBLV) o->op_private |= OPpMAYBE_LVSUB; /* FALLTHROUGH */ case OP_PADSV: PL_modcount++; if (!type) /* local() */ Perl_croak(aTHX_ "Can't localize lexical variable %" PNf, PNfARG(PAD_COMPNAME(o->op_targ))); if (!(o->op_private & OPpLVAL_INTRO) || ( type != OP_SASSIGN && type != OP_AASSIGN && PadnameIsSTATE(PAD_COMPNAME_SV(o->op_targ)) )) S_mark_padname_lvalue(aTHX_ PAD_COMPNAME_SV(o->op_targ)); break; case OP_PUSHMARK: localize = 0; break; case OP_KEYS: if (type != OP_LEAVESUBLV && !scalar_mod_type(NULL, type)) goto nomod; goto lvalue_func; case OP_SUBSTR: if (o->op_private == 4) /* don't allow 4 arg substr as lvalue */ goto nomod; /* FALLTHROUGH */ case OP_POS: case OP_VEC: lvalue_func: if (type == OP_LEAVESUBLV) o->op_private |= OPpMAYBE_LVSUB; if (o->op_flags & OPf_KIDS && OpHAS_SIBLING(cBINOPo->op_first)) { /* we recurse rather than iterate here because the child * needs to be processed with a different 'type' parameter */ /* substr and vec */ /* If this op is in merely potential (non-fatal) modifiable context, then apply OP_ENTERSUB context to the kid op (to avoid croaking). Other- wise pass this op’s own type so the correct op is mentioned in error messages. */ op_lvalue(OpSIBLING(cBINOPo->op_first), S_potential_mod_type(type) ? (I32)OP_ENTERSUB : o->op_type); } break; case OP_AELEM: case OP_HELEM: ref(cBINOPo->op_first, o->op_type); if (type == OP_ENTERSUB && !(o->op_private & (OPpLVAL_INTRO | OPpDEREF))) o->op_private |= OPpLVAL_DEFER; if (type == OP_LEAVESUBLV) o->op_private |= OPpMAYBE_LVSUB; localize = 1; PL_modcount++; break; case OP_LEAVE: case OP_LEAVELOOP: o->op_private |= OPpLVALUE; /* FALLTHROUGH */ case OP_SCOPE: case OP_ENTER: case OP_LINESEQ: localize = 0; if (o->op_flags & OPf_KIDS) next_kid = cLISTOPo->op_last; break; case OP_NULL: localize = 0; if (o->op_flags & OPf_SPECIAL) /* do BLOCK */ goto nomod; else if (!(o->op_flags & OPf_KIDS)) break; if (o->op_targ != OP_LIST) { OP *sib = OpSIBLING(cLISTOPo->op_first); /* OP_TRANS and OP_TRANSR with argument have a weird optree * that looks like * * null * arg * trans * * compared with things like OP_MATCH which have the argument * as a child: * * match * arg * * so handle specially to correctly get "Can't modify" croaks etc */ if (sib && (sib->op_type == OP_TRANS || sib->op_type == OP_TRANSR)) { /* this should trigger a "Can't modify transliteration" err */ op_lvalue(sib, type); } next_kid = cBINOPo->op_first; /* we assume OP_NULLs which aren't ex-list have no more than 2 * children. If this assumption is wrong, increase the scan * limit below */ assert( !OpHAS_SIBLING(next_kid) || !OpHAS_SIBLING(OpSIBLING(next_kid))); break; } /* FALLTHROUGH */ case OP_LIST: localize = 0; next_kid = cLISTOPo->op_first; break; case OP_COREARGS: goto do_next; case OP_AND: case OP_OR: if (type == OP_LEAVESUBLV || !S_vivifies(cLOGOPo->op_first->op_type)) next_kid = cLOGOPo->op_first; else if (type == OP_LEAVESUBLV || !S_vivifies(OpSIBLING(cLOGOPo->op_first)->op_type)) next_kid = OpSIBLING(cLOGOPo->op_first); goto nomod; case OP_SREFGEN: if (type == OP_NULL) { /* local */ local_refgen: if (!FEATURE_MYREF_IS_ENABLED) Perl_croak(aTHX_ "The experimental declared_refs " "feature is not enabled"); Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__DECLARED_REFS), "Declaring references is experimental"); next_kid = cUNOPo->op_first; goto do_next; } if (type != OP_AASSIGN && type != OP_SASSIGN && type != OP_ENTERLOOP) goto nomod; /* Don’t bother applying lvalue context to the ex-list. */ kid = cUNOPx(cUNOPo->op_first)->op_first; assert (!OpHAS_SIBLING(kid)); goto kid_2lvref; case OP_REFGEN: if (type == OP_NULL) /* local */ goto local_refgen; if (type != OP_AASSIGN) goto nomod; kid = cUNOPo->op_first; kid_2lvref: { const U8 ec = PL_parser ? PL_parser->error_count : 0; S_lvref(aTHX_ kid, type); if (!PL_parser || PL_parser->error_count == ec) { if (!FEATURE_REFALIASING_IS_ENABLED) Perl_croak(aTHX_ "Experimental aliasing via reference not enabled"); Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__REFALIASING), "Aliasing via reference is experimental"); } } if (o->op_type == OP_REFGEN) op_null(cUNOPx(cUNOPo->op_first)->op_first); /* pushmark */ op_null(o); goto do_next; case OP_SPLIT: if ((o->op_private & OPpSPLIT_ASSIGN)) { /* This is actually @array = split. */ PL_modcount = RETURN_UNLIMITED_NUMBER; break; } goto nomod; case OP_SCALAR: op_lvalue(cUNOPo->op_first, OP_ENTERSUB); goto nomod; } /* [20011101.069 (#7861)] File test operators interpret OPf_REF to mean that their argument is a filehandle; thus \stat(".") should not set it. AMS 20011102 */ if (type == OP_REFGEN && OP_IS_STAT(o->op_type)) goto do_next; if (type != OP_LEAVESUBLV) o->op_flags |= OPf_MOD; if (type == OP_AASSIGN || type == OP_SASSIGN) o->op_flags |= OPf_SPECIAL |(o->op_type == OP_ENTERSUB ? 0 : OPf_REF); else if (!type) { /* local() */ switch (localize) { case 1: o->op_private |= OPpLVAL_INTRO; o->op_flags &= ~OPf_SPECIAL; PL_hints |= HINT_BLOCK_SCOPE; break; case 0: break; case -1: Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Useless localization of %s", OP_DESC(o)); } } else if (type != OP_GREPSTART && type != OP_ENTERSUB && type != OP_LEAVESUBLV && o->op_type != OP_ENTERSUB) o->op_flags |= OPf_REF; do_next: while (!next_kid) { if (o == top_op) return top_op; /* at top; no parents/siblings to try */ if (OpHAS_SIBLING(o)) { next_kid = o->op_sibparent; if (!OpHAS_SIBLING(next_kid)) { /* a few node types don't recurse into their second child */ OP *parent = next_kid->op_sibparent; I32 ptype = parent->op_type; if ( (ptype == OP_NULL && parent->op_targ != OP_LIST) || ( (ptype == OP_AND || ptype == OP_OR) && (type != OP_LEAVESUBLV && S_vivifies(next_kid->op_type)) ) ) { /*try parent's next sibling */ o = parent; next_kid = NULL; } } } else o = o->op_sibparent; /*try parent's next sibling */ } o = next_kid; } /* while */ } STATIC bool S_scalar_mod_type(const OP *o, I32 type) { switch (type) { case OP_POS: case OP_SASSIGN: if (o && o->op_type == OP_RV2GV) return FALSE; /* FALLTHROUGH */ case OP_PREINC: case OP_PREDEC: case OP_POSTINC: case OP_POSTDEC: case OP_I_PREINC: case OP_I_PREDEC: case OP_I_POSTINC: case OP_I_POSTDEC: case OP_POW: case OP_MULTIPLY: case OP_DIVIDE: case OP_MODULO: case OP_REPEAT: case OP_ADD: case OP_SUBTRACT: case OP_I_MULTIPLY: case OP_I_DIVIDE: case OP_I_MODULO: case OP_I_ADD: case OP_I_SUBTRACT: case OP_LEFT_SHIFT: case OP_RIGHT_SHIFT: case OP_BIT_AND: case OP_BIT_XOR: case OP_BIT_OR: case OP_NBIT_AND: case OP_NBIT_XOR: case OP_NBIT_OR: case OP_SBIT_AND: case OP_SBIT_XOR: case OP_SBIT_OR: case OP_CONCAT: case OP_SUBST: case OP_TRANS: case OP_TRANSR: case OP_READ: case OP_SYSREAD: case OP_RECV: case OP_ANDASSIGN: case OP_ORASSIGN: case OP_DORASSIGN: case OP_VEC: case OP_SUBSTR: return TRUE; default: return FALSE; } } STATIC bool S_is_handle_constructor(const OP *o, I32 numargs) { PERL_ARGS_ASSERT_IS_HANDLE_CONSTRUCTOR; switch (o->op_type) { case OP_PIPE_OP: case OP_SOCKPAIR: if (numargs == 2) return TRUE; /* FALLTHROUGH */ case OP_SYSOPEN: case OP_OPEN: case OP_SELECT: /* XXX c.f. SelectSaver.pm */ case OP_SOCKET: case OP_OPEN_DIR: case OP_ACCEPT: if (numargs == 1) return TRUE; /* FALLTHROUGH */ default: return FALSE; } } static OP * S_refkids(pTHX_ OP *o, I32 type) { if (o && o->op_flags & OPf_KIDS) { OP *kid; for (kid = cLISTOPo->op_first; kid; kid = OpSIBLING(kid)) ref(kid, type); } return o; } /* Apply reference (autovivification) context to the subtree at o. * For example in * push @{expression}, ....; * o will be the head of 'expression' and type will be OP_RV2AV. * It marks the op o (or a suitable child) as autovivifying, e.g. by * setting OPf_MOD. * For OP_RV2AV/OP_PADAV and OP_RV2HV/OP_PADHV sets OPf_REF too if * set_op_ref is true. * * Also calls scalar(o). */ OP * Perl_doref(pTHX_ OP *o, I32 type, bool set_op_ref) { dVAR; OP * top_op = o; PERL_ARGS_ASSERT_DOREF; if (PL_parser && PL_parser->error_count) return o; while (1) { switch (o->op_type) { case OP_ENTERSUB: if ((type == OP_EXISTS || type == OP_DEFINED) && !(o->op_flags & OPf_STACKED)) { OpTYPE_set(o, OP_RV2CV); /* entersub => rv2cv */ assert(cUNOPo->op_first->op_type == OP_NULL); /* disable pushmark */ op_null(((LISTOP*)cUNOPo->op_first)->op_first); o->op_flags |= OPf_SPECIAL; } else if (type == OP_RV2SV || type == OP_RV2AV || type == OP_RV2HV){ o->op_private |= (type == OP_RV2AV ? OPpDEREF_AV : type == OP_RV2HV ? OPpDEREF_HV : OPpDEREF_SV); o->op_flags |= OPf_MOD; } break; case OP_COND_EXPR: o = OpSIBLING(cUNOPo->op_first); continue; case OP_RV2SV: if (type == OP_DEFINED) o->op_flags |= OPf_SPECIAL; /* don't create GV */ /* FALLTHROUGH */ case OP_PADSV: if (type == OP_RV2SV || type == OP_RV2AV || type == OP_RV2HV) { o->op_private |= (type == OP_RV2AV ? OPpDEREF_AV : type == OP_RV2HV ? OPpDEREF_HV : OPpDEREF_SV); o->op_flags |= OPf_MOD; } if (o->op_flags & OPf_KIDS) { type = o->op_type; o = cUNOPo->op_first; continue; } break; case OP_RV2AV: case OP_RV2HV: if (set_op_ref) o->op_flags |= OPf_REF; /* FALLTHROUGH */ case OP_RV2GV: if (type == OP_DEFINED) o->op_flags |= OPf_SPECIAL; /* don't create GV */ type = o->op_type; o = cUNOPo->op_first; continue; case OP_PADAV: case OP_PADHV: if (set_op_ref) o->op_flags |= OPf_REF; break; case OP_SCALAR: case OP_NULL: if (!(o->op_flags & OPf_KIDS) || type == OP_DEFINED) break; o = cBINOPo->op_first; continue; case OP_AELEM: case OP_HELEM: if (type == OP_RV2SV || type == OP_RV2AV || type == OP_RV2HV) { o->op_private |= (type == OP_RV2AV ? OPpDEREF_AV : type == OP_RV2HV ? OPpDEREF_HV : OPpDEREF_SV); o->op_flags |= OPf_MOD; } type = o->op_type; o = cBINOPo->op_first; continue;; case OP_SCOPE: case OP_LEAVE: set_op_ref = FALSE; /* FALLTHROUGH */ case OP_ENTER: case OP_LIST: if (!(o->op_flags & OPf_KIDS)) break; o = cLISTOPo->op_last; continue; default: break; } /* switch */ while (1) { if (o == top_op) return scalar(top_op); /* at top; no parents/siblings to try */ if (OpHAS_SIBLING(o)) { o = o->op_sibparent; /* Normally skip all siblings and go straight to the parent; * the only op that requires two children to be processed * is OP_COND_EXPR */ if (!OpHAS_SIBLING(o) && o->op_sibparent->op_type == OP_COND_EXPR) break; continue; } o = o->op_sibparent; /*try parent's next sibling */ } } /* while */ } STATIC OP * S_dup_attrlist(pTHX_ OP *o) { OP *rop; PERL_ARGS_ASSERT_DUP_ATTRLIST; /* An attrlist is either a simple OP_CONST or an OP_LIST with kids, * where the first kid is OP_PUSHMARK and the remaining ones * are OP_CONST. We need to push the OP_CONST values. */ if (o->op_type == OP_CONST) rop = newSVOP(OP_CONST, o->op_flags, SvREFCNT_inc_NN(cSVOPo->op_sv)); else { assert((o->op_type == OP_LIST) && (o->op_flags & OPf_KIDS)); rop = NULL; for (o = cLISTOPo->op_first; o; o = OpSIBLING(o)) { if (o->op_type == OP_CONST) rop = op_append_elem(OP_LIST, rop, newSVOP(OP_CONST, o->op_flags, SvREFCNT_inc_NN(cSVOPo->op_sv))); } } return rop; } STATIC void S_apply_attrs(pTHX_ HV *stash, SV *target, OP *attrs) { PERL_ARGS_ASSERT_APPLY_ATTRS; { SV * const stashsv = newSVhek(HvNAME_HEK(stash)); /* fake up C */ #define ATTRSMODULE "attributes" #define ATTRSMODULE_PM "attributes.pm" Perl_load_module( aTHX_ PERL_LOADMOD_IMPORT_OPS, newSVpvs(ATTRSMODULE), NULL, op_prepend_elem(OP_LIST, newSVOP(OP_CONST, 0, stashsv), op_prepend_elem(OP_LIST, newSVOP(OP_CONST, 0, newRV(target)), dup_attrlist(attrs)))); } } STATIC void S_apply_attrs_my(pTHX_ HV *stash, OP *target, OP *attrs, OP **imopsp) { OP *pack, *imop, *arg; SV *meth, *stashsv, **svp; PERL_ARGS_ASSERT_APPLY_ATTRS_MY; if (!attrs) return; assert(target->op_type == OP_PADSV || target->op_type == OP_PADHV || target->op_type == OP_PADAV); /* Ensure that attributes.pm is loaded. */ /* Don't force the C if we don't need it. */ svp = hv_fetchs(GvHVn(PL_incgv), ATTRSMODULE_PM, FALSE); if (svp && *svp != &PL_sv_undef) NOOP; /* already in %INC */ else Perl_load_module(aTHX_ PERL_LOADMOD_NOIMPORT, newSVpvs(ATTRSMODULE), NULL); /* Need package name for method call. */ pack = newSVOP(OP_CONST, 0, newSVpvs(ATTRSMODULE)); /* Build up the real arg-list. */ stashsv = newSVhek(HvNAME_HEK(stash)); arg = newOP(OP_PADSV, 0); arg->op_targ = target->op_targ; arg = op_prepend_elem(OP_LIST, newSVOP(OP_CONST, 0, stashsv), op_prepend_elem(OP_LIST, newUNOP(OP_REFGEN, 0, arg), dup_attrlist(attrs))); /* Fake up a method call to import */ meth = newSVpvs_share("import"); imop = op_convert_list(OP_ENTERSUB, OPf_STACKED|OPf_SPECIAL|OPf_WANT_VOID, op_append_elem(OP_LIST, op_prepend_elem(OP_LIST, pack, arg), newMETHOP_named(OP_METHOD_NAMED, 0, meth))); /* Combine the ops. */ *imopsp = op_append_elem(OP_LIST, *imopsp, imop); } /* =notfor apidoc apply_attrs_string Attempts to apply a list of attributes specified by the C and C arguments to the subroutine identified by the C argument which is expected to be associated with the package identified by the C argument (see L). It gets this wrong, though, in that it does not correctly identify the boundaries of the individual attribute specifications within C. This is not really intended for the public API, but has to be listed here for systems such as AIX which need an explicit export list for symbols. (It's called from XS code in support of the C keyword from F.) Patches to fix it to respect attribute syntax properly would be welcome. =cut */ void Perl_apply_attrs_string(pTHX_ const char *stashpv, CV *cv, const char *attrstr, STRLEN len) { OP *attrs = NULL; PERL_ARGS_ASSERT_APPLY_ATTRS_STRING; if (!len) { len = strlen(attrstr); } while (len) { for (; isSPACE(*attrstr) && len; --len, ++attrstr) ; if (len) { const char * const sstr = attrstr; for (; !isSPACE(*attrstr) && len; --len, ++attrstr) ; attrs = op_append_elem(OP_LIST, attrs, newSVOP(OP_CONST, 0, newSVpvn(sstr, attrstr-sstr))); } } Perl_load_module(aTHX_ PERL_LOADMOD_IMPORT_OPS, newSVpvs(ATTRSMODULE), NULL, op_prepend_elem(OP_LIST, newSVOP(OP_CONST, 0, newSVpv(stashpv,0)), op_prepend_elem(OP_LIST, newSVOP(OP_CONST, 0, newRV(MUTABLE_SV(cv))), attrs))); } STATIC void S_move_proto_attr(pTHX_ OP **proto, OP **attrs, const GV * name, bool curstash) { OP *new_proto = NULL; STRLEN pvlen; char *pv; OP *o; PERL_ARGS_ASSERT_MOVE_PROTO_ATTR; if (!*attrs) return; o = *attrs; if (o->op_type == OP_CONST) { pv = SvPV(cSVOPo_sv, pvlen); if (memBEGINs(pv, pvlen, "prototype(")) { SV * const tmpsv = newSVpvn_flags(pv + 10, pvlen - 11, SvUTF8(cSVOPo_sv)); SV ** const tmpo = cSVOPx_svp(o); SvREFCNT_dec(cSVOPo_sv); *tmpo = tmpsv; new_proto = o; *attrs = NULL; } } else if (o->op_type == OP_LIST) { OP * lasto; assert(o->op_flags & OPf_KIDS); lasto = cLISTOPo->op_first; assert(lasto->op_type == OP_PUSHMARK); for (o = OpSIBLING(lasto); o; o = OpSIBLING(o)) { if (o->op_type == OP_CONST) { pv = SvPV(cSVOPo_sv, pvlen); if (memBEGINs(pv, pvlen, "prototype(")) { SV * const tmpsv = newSVpvn_flags(pv + 10, pvlen - 11, SvUTF8(cSVOPo_sv)); SV ** const tmpo = cSVOPx_svp(o); SvREFCNT_dec(cSVOPo_sv); *tmpo = tmpsv; if (new_proto && ckWARN(WARN_MISC)) { STRLEN new_len; const char * newp = SvPV(cSVOPo_sv, new_len); Perl_warner(aTHX_ packWARN(WARN_MISC), "Attribute prototype(%" UTF8f ") discards earlier prototype attribute in same sub", UTF8fARG(SvUTF8(cSVOPo_sv), new_len, newp)); op_free(new_proto); } else if (new_proto) op_free(new_proto); new_proto = o; /* excise new_proto from the list */ op_sibling_splice(*attrs, lasto, 1, NULL); o = lasto; continue; } } lasto = o; } /* If the list is now just the PUSHMARK, scrap the whole thing; otherwise attributes.xs would get pulled in with no real need */ if (!OpHAS_SIBLING(cLISTOPx(*attrs)->op_first)) { op_free(*attrs); *attrs = NULL; } } if (new_proto) { SV *svname; if (isGV(name)) { svname = sv_newmortal(); gv_efullname3(svname, name, NULL); } else if (SvPOK(name) && *SvPVX((SV *)name) == '&') svname = newSVpvn_flags(SvPVX((SV *)name)+1, SvCUR(name)-1, SvUTF8(name)|SVs_TEMP); else svname = (SV *)name; if (ckWARN(WARN_ILLEGALPROTO)) (void)validate_proto(svname, cSVOPx_sv(new_proto), TRUE, curstash); if (*proto && ckWARN(WARN_PROTOTYPE)) { STRLEN old_len, new_len; const char * oldp = SvPV(cSVOPx_sv(*proto), old_len); const char * newp = SvPV(cSVOPx_sv(new_proto), new_len); if (curstash && svname == (SV *)name && !memchr(SvPVX(svname), ':', SvCUR(svname))) { svname = sv_2mortal(newSVsv(PL_curstname)); sv_catpvs(svname, "::"); sv_catsv(svname, (SV *)name); } Perl_warner(aTHX_ packWARN(WARN_PROTOTYPE), "Prototype '%" UTF8f "' overridden by attribute 'prototype(%" UTF8f ")'" " in %" SVf, UTF8fARG(SvUTF8(cSVOPx_sv(*proto)), old_len, oldp), UTF8fARG(SvUTF8(cSVOPx_sv(new_proto)), new_len, newp), SVfARG(svname)); } if (*proto) op_free(*proto); *proto = new_proto; } } static void S_cant_declare(pTHX_ OP *o) { if (o->op_type == OP_NULL && (o->op_flags & (OPf_SPECIAL|OPf_KIDS)) == OPf_KIDS) o = cUNOPo->op_first; yyerror(Perl_form(aTHX_ "Can't declare %s in \"%s\"", o->op_type == OP_NULL && o->op_flags & OPf_SPECIAL ? "do block" : OP_DESC(o), PL_parser->in_my == KEY_our ? "our" : PL_parser->in_my == KEY_state ? "state" : "my")); } STATIC OP * S_my_kid(pTHX_ OP *o, OP *attrs, OP **imopsp) { I32 type; const bool stately = PL_parser && PL_parser->in_my == KEY_state; PERL_ARGS_ASSERT_MY_KID; if (!o || (PL_parser && PL_parser->error_count)) return o; type = o->op_type; if (OP_TYPE_IS_OR_WAS(o, OP_LIST)) { OP *kid; for (kid = cLISTOPo->op_first; kid; kid = OpSIBLING(kid)) my_kid(kid, attrs, imopsp); return o; } else if (type == OP_UNDEF || type == OP_STUB) { return o; } else if (type == OP_RV2SV || /* "our" declaration */ type == OP_RV2AV || type == OP_RV2HV) { if (cUNOPo->op_first->op_type != OP_GV) { /* MJD 20011224 */ S_cant_declare(aTHX_ o); } else if (attrs) { GV * const gv = cGVOPx_gv(cUNOPo->op_first); assert(PL_parser); PL_parser->in_my = FALSE; PL_parser->in_my_stash = NULL; apply_attrs(GvSTASH(gv), (type == OP_RV2SV ? GvSVn(gv) : type == OP_RV2AV ? MUTABLE_SV(GvAVn(gv)) : type == OP_RV2HV ? MUTABLE_SV(GvHVn(gv)) : MUTABLE_SV(gv)), attrs); } o->op_private |= OPpOUR_INTRO; return o; } else if (type == OP_REFGEN || type == OP_SREFGEN) { if (!FEATURE_MYREF_IS_ENABLED) Perl_croak(aTHX_ "The experimental declared_refs " "feature is not enabled"); Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__DECLARED_REFS), "Declaring references is experimental"); /* Kid is a nulled OP_LIST, handled above. */ my_kid(cUNOPo->op_first, attrs, imopsp); return o; } else if (type != OP_PADSV && type != OP_PADAV && type != OP_PADHV && type != OP_PUSHMARK) { S_cant_declare(aTHX_ o); return o; } else if (attrs && type != OP_PUSHMARK) { HV *stash; assert(PL_parser); PL_parser->in_my = FALSE; PL_parser->in_my_stash = NULL; /* check for C when deciding package */ stash = PAD_COMPNAME_TYPE(o->op_targ); if (!stash) stash = PL_curstash; apply_attrs_my(stash, o, attrs, imopsp); } o->op_flags |= OPf_MOD; o->op_private |= OPpLVAL_INTRO; if (stately) o->op_private |= OPpPAD_STATE; return o; } OP * Perl_my_attrs(pTHX_ OP *o, OP *attrs) { OP *rops; int maybe_scalar = 0; PERL_ARGS_ASSERT_MY_ATTRS; /* [perl #17376]: this appears to be premature, and results in code such as C< our(%x); > executing in list mode rather than void mode */ #if 0 if (o->op_flags & OPf_PARENS) list(o); else maybe_scalar = 1; #else maybe_scalar = 1; #endif if (attrs) SAVEFREEOP(attrs); rops = NULL; o = my_kid(o, attrs, &rops); if (rops) { if (maybe_scalar && o->op_type == OP_PADSV) { o = scalar(op_append_list(OP_LIST, rops, o)); o->op_private |= OPpLVAL_INTRO; } else { /* The listop in rops might have a pushmark at the beginning, which will mess up list assignment. */ LISTOP * const lrops = (LISTOP *)rops; /* for brevity */ if (rops->op_type == OP_LIST && lrops->op_first && lrops->op_first->op_type == OP_PUSHMARK) { OP * const pushmark = lrops->op_first; /* excise pushmark */ op_sibling_splice(rops, NULL, 1, NULL); op_free(pushmark); } o = op_append_list(OP_LIST, o, rops); } } PL_parser->in_my = FALSE; PL_parser->in_my_stash = NULL; return o; } OP * Perl_sawparens(pTHX_ OP *o) { PERL_UNUSED_CONTEXT; if (o) o->op_flags |= OPf_PARENS; return o; } OP * Perl_bind_match(pTHX_ I32 type, OP *left, OP *right) { OP *o; bool ismatchop = 0; const OPCODE ltype = left->op_type; const OPCODE rtype = right->op_type; PERL_ARGS_ASSERT_BIND_MATCH; if ( (ltype == OP_RV2AV || ltype == OP_RV2HV || ltype == OP_PADAV || ltype == OP_PADHV) && ckWARN(WARN_MISC)) { const char * const desc = PL_op_desc[( rtype == OP_SUBST || rtype == OP_TRANS || rtype == OP_TRANSR ) ? (int)rtype : OP_MATCH]; const bool isary = ltype == OP_RV2AV || ltype == OP_PADAV; SV * const name = S_op_varname(aTHX_ left); if (name) Perl_warner(aTHX_ packWARN(WARN_MISC), "Applying %s to %" SVf " will act on scalar(%" SVf ")", desc, SVfARG(name), SVfARG(name)); else { const char * const sample = (isary ? "@array" : "%hash"); Perl_warner(aTHX_ packWARN(WARN_MISC), "Applying %s to %s will act on scalar(%s)", desc, sample, sample); } } if (rtype == OP_CONST && cSVOPx(right)->op_private & OPpCONST_BARE && cSVOPx(right)->op_private & OPpCONST_STRICT) { no_bareword_allowed(right); } /* !~ doesn't make sense with /r, so error on it for now */ if (rtype == OP_SUBST && (cPMOPx(right)->op_pmflags & PMf_NONDESTRUCT) && type == OP_NOT) /* diag_listed_as: Using !~ with %s doesn't make sense */ yyerror("Using !~ with s///r doesn't make sense"); if (rtype == OP_TRANSR && type == OP_NOT) /* diag_listed_as: Using !~ with %s doesn't make sense */ yyerror("Using !~ with tr///r doesn't make sense"); ismatchop = (rtype == OP_MATCH || rtype == OP_SUBST || rtype == OP_TRANS || rtype == OP_TRANSR) && !(right->op_flags & OPf_SPECIAL); if (ismatchop && right->op_private & OPpTARGET_MY) { right->op_targ = 0; right->op_private &= ~OPpTARGET_MY; } if (!(right->op_flags & OPf_STACKED) && !right->op_targ && ismatchop) { if (left->op_type == OP_PADSV && !(left->op_private & OPpLVAL_INTRO)) { right->op_targ = left->op_targ; op_free(left); o = right; } else { right->op_flags |= OPf_STACKED; if (rtype != OP_MATCH && rtype != OP_TRANSR && ! (rtype == OP_TRANS && right->op_private & OPpTRANS_IDENTICAL) && ! (rtype == OP_SUBST && (cPMOPx(right)->op_pmflags & PMf_NONDESTRUCT))) left = op_lvalue(left, rtype); if (right->op_type == OP_TRANS || right->op_type == OP_TRANSR) o = newBINOP(OP_NULL, OPf_STACKED, scalar(left), right); else o = op_prepend_elem(rtype, scalar(left), right); } if (type == OP_NOT) return newUNOP(OP_NOT, 0, scalar(o)); return o; } else return bind_match(type, left, pmruntime(newPMOP(OP_MATCH, 0), right, NULL, 0, 0)); } OP * Perl_invert(pTHX_ OP *o) { if (!o) return NULL; return newUNOP(OP_NOT, OPf_SPECIAL, scalar(o)); } /* =for apidoc op_scope Wraps up an op tree with some additional ops so that at runtime a dynamic scope will be created. The original ops run in the new dynamic scope, and then, provided that they exit normally, the scope will be unwound. The additional ops used to create and unwind the dynamic scope will normally be an C/C pair, but a C op may be used instead if the ops are simple enough to not need the full dynamic scope structure. =cut */ OP * Perl_op_scope(pTHX_ OP *o) { dVAR; if (o) { if (o->op_flags & OPf_PARENS || PERLDB_NOOPT || TAINTING_get) { o = op_prepend_elem(OP_LINESEQ, newOP(OP_ENTER, (o->op_flags & OPf_WANT)), o); OpTYPE_set(o, OP_LEAVE); } else if (o->op_type == OP_LINESEQ) { OP *kid; OpTYPE_set(o, OP_SCOPE); kid = ((LISTOP*)o)->op_first; if (kid->op_type == OP_NEXTSTATE || kid->op_type == OP_DBSTATE) { op_null(kid); /* The following deals with things like 'do {1 for 1}' */ kid = OpSIBLING(kid); if (kid && (kid->op_type == OP_NEXTSTATE || kid->op_type == OP_DBSTATE)) op_null(kid); } } else o = newLISTOP(OP_SCOPE, 0, o, NULL); } return o; } OP * Perl_op_unscope(pTHX_ OP *o) { if (o && o->op_type == OP_LINESEQ) { OP *kid = cLISTOPo->op_first; for(; kid; kid = OpSIBLING(kid)) if (kid->op_type == OP_NEXTSTATE || kid->op_type == OP_DBSTATE) op_null(kid); } return o; } /* =for apidoc block_start Handles compile-time scope entry. Arranges for hints to be restored on block exit and also handles pad sequence numbers to make lexical variables scope right. Returns a savestack index for use with C. =cut */ int Perl_block_start(pTHX_ int full) { const int retval = PL_savestack_ix; PL_compiling.cop_seq = PL_cop_seqmax; COP_SEQMAX_INC; pad_block_start(full); SAVEHINTS(); PL_hints &= ~HINT_BLOCK_SCOPE; SAVECOMPILEWARNINGS(); PL_compiling.cop_warnings = DUP_WARNINGS(PL_compiling.cop_warnings); SAVEI32(PL_compiling.cop_seq); PL_compiling.cop_seq = 0; CALL_BLOCK_HOOKS(bhk_start, full); return retval; } /* =for apidoc block_end Handles compile-time scope exit. C is the savestack index returned by C, and C is the body of the block. Returns the block, possibly modified. =cut */ OP* Perl_block_end(pTHX_ I32 floor, OP *seq) { const int needblockscope = PL_hints & HINT_BLOCK_SCOPE; OP* retval = scalarseq(seq); OP *o; /* XXX Is the null PL_parser check necessary here? */ assert(PL_parser); /* Let’s find out under debugging builds. */ if (PL_parser && PL_parser->parsed_sub) { o = newSTATEOP(0, NULL, NULL); op_null(o); retval = op_append_elem(OP_LINESEQ, retval, o); } CALL_BLOCK_HOOKS(bhk_pre_end, &retval); LEAVE_SCOPE(floor); if (needblockscope) PL_hints |= HINT_BLOCK_SCOPE; /* propagate out */ o = pad_leavemy(); if (o) { /* pad_leavemy has created a sequence of introcv ops for all my subs declared in the block. We have to replicate that list with clonecv ops, to deal with this situation: sub { my sub s1; my sub s2; sub s1 { state sub foo { \&s2 } } }->() Originally, I was going to have introcv clone the CV and turn off the stale flag. Since &s1 is declared before &s2, the introcv op for &s1 is executed (on sub entry) before the one for &s2. But the &foo sub inside &s1 (which is cloned when &s1 is cloned, since it is a state sub) closes over &s2 and expects to see it in its outer CV’s pad. If the introcv op clones &s1, then &s2 is still marked stale. Since &s1 is not active, and &foo closes over &s1’s implicit entry for &s2, we get a ‘Varia- ble will not stay shared’ warning. Because it is the same stub that will be used when the introcv op for &s2 is executed, clos- ing over it is safe. Hence, we have to turn off the stale flag on all lexical subs in the block before we clone any of them. Hence, having introcv clone the sub cannot work. So we create a list of ops like this: lineseq | +-- introcv | +-- introcv | +-- introcv | . . . | +-- clonecv | +-- clonecv | +-- clonecv | . . . */ OP *kid = o->op_flags & OPf_KIDS ? cLISTOPo->op_first : o; OP * const last = o->op_flags & OPf_KIDS ? cLISTOPo->op_last : o; for (;; kid = OpSIBLING(kid)) { OP *newkid = newOP(OP_CLONECV, 0); newkid->op_targ = kid->op_targ; o = op_append_elem(OP_LINESEQ, o, newkid); if (kid == last) break; } retval = op_prepend_elem(OP_LINESEQ, o, retval); } CALL_BLOCK_HOOKS(bhk_post_end, &retval); return retval; } /* =head1 Compile-time scope hooks =for apidoc blockhook_register Register a set of hooks to be called when the Perl lexical scope changes at compile time. See L. =cut */ void Perl_blockhook_register(pTHX_ BHK *hk) { PERL_ARGS_ASSERT_BLOCKHOOK_REGISTER; Perl_av_create_and_push(aTHX_ &PL_blockhooks, newSViv(PTR2IV(hk))); } void Perl_newPROG(pTHX_ OP *o) { OP *start; PERL_ARGS_ASSERT_NEWPROG; if (PL_in_eval) { PERL_CONTEXT *cx; I32 i; if (PL_eval_root) return; PL_eval_root = newUNOP(OP_LEAVEEVAL, ((PL_in_eval & EVAL_KEEPERR) ? OPf_SPECIAL : 0), o); cx = CX_CUR(); assert(CxTYPE(cx) == CXt_EVAL); if ((cx->blk_gimme & G_WANT) == G_VOID) scalarvoid(PL_eval_root); else if ((cx->blk_gimme & G_WANT) == G_ARRAY) list(PL_eval_root); else scalar(PL_eval_root); start = op_linklist(PL_eval_root); PL_eval_root->op_next = 0; i = PL_savestack_ix; SAVEFREEOP(o); ENTER; S_process_optree(aTHX_ NULL, PL_eval_root, start); LEAVE; PL_savestack_ix = i; } else { if (o->op_type == OP_STUB) { /* This block is entered if nothing is compiled for the main program. This will be the case for an genuinely empty main program, or one which only has BEGIN blocks etc, so already run and freed. Historically (5.000) the guard above was !o. However, commit f8a08f7b8bd67b28 (Jun 2001), integrated to blead as c71fccf11fde0068, changed perly.y so that newPROG() is now called with the output of block_end(), which returns a new OP_STUB for the case of an empty optree. ByteLoader (and maybe other things) also take this path, because they set up PL_main_start and PL_main_root directly, without generating an optree. If the parsing the main program aborts (due to parse errors, or due to BEGIN or similar calling exit), then newPROG() isn't even called, and hence this code path and its cleanups are skipped. This shouldn't make a make a difference: * a non-zero return from perl_parse is a failure, and perl_destruct() should be called immediately. * however, if exit(0) is called during the parse, then perl_parse() returns 0, and perl_run() is called. As PL_main_start will be NULL, perl_run() will return promptly, and the exit code will remain 0. */ PL_comppad_name = 0; PL_compcv = 0; S_op_destroy(aTHX_ o); return; } PL_main_root = op_scope(sawparens(scalarvoid(o))); PL_curcop = &PL_compiling; start = LINKLIST(PL_main_root); PL_main_root->op_next = 0; S_process_optree(aTHX_ NULL, PL_main_root, start); if (!PL_parser->error_count) /* on error, leave CV slabbed so that ops left lying around * will eb cleaned up. Else unslab */ cv_forget_slab(PL_compcv); PL_compcv = 0; /* Register with debugger */ if (PERLDB_INTER) { CV * const cv = get_cvs("DB::postponed", 0); if (cv) { dSP; PUSHMARK(SP); XPUSHs(MUTABLE_SV(CopFILEGV(&PL_compiling))); PUTBACK; call_sv(MUTABLE_SV(cv), G_DISCARD); } } } } OP * Perl_localize(pTHX_ OP *o, I32 lex) { PERL_ARGS_ASSERT_LOCALIZE; if (o->op_flags & OPf_PARENS) /* [perl #17376]: this appears to be premature, and results in code such as C< our(%x); > executing in list mode rather than void mode */ #if 0 list(o); #else NOOP; #endif else { if ( PL_parser->bufptr > PL_parser->oldbufptr && PL_parser->bufptr[-1] == ',' && ckWARN(WARN_PARENTHESIS)) { char *s = PL_parser->bufptr; bool sigil = FALSE; /* some heuristics to detect a potential error */ while (*s && (strchr(", \t\n", *s))) s++; while (1) { if (*s && (strchr("@$%", *s) || (!lex && *s == '*')) && *++s && (isWORDCHAR(*s) || UTF8_IS_CONTINUED(*s))) { s++; sigil = TRUE; while (*s && (isWORDCHAR(*s) || UTF8_IS_CONTINUED(*s))) s++; while (*s && (strchr(", \t\n", *s))) s++; } else break; } if (sigil && (*s == ';' || *s == '=')) { Perl_warner(aTHX_ packWARN(WARN_PARENTHESIS), "Parentheses missing around \"%s\" list", lex ? (PL_parser->in_my == KEY_our ? "our" : PL_parser->in_my == KEY_state ? "state" : "my") : "local"); } } } if (lex) o = my(o); else o = op_lvalue(o, OP_NULL); /* a bit kludgey */ PL_parser->in_my = FALSE; PL_parser->in_my_stash = NULL; return o; } OP * Perl_jmaybe(pTHX_ OP *o) { PERL_ARGS_ASSERT_JMAYBE; if (o->op_type == OP_LIST) { OP * const o2 = newSVREF(newGVOP(OP_GV, 0, gv_fetchpvs(";", GV_ADD|GV_NOTQUAL, SVt_PV))); o = op_convert_list(OP_JOIN, 0, op_prepend_elem(OP_LIST, o2, o)); } return o; } PERL_STATIC_INLINE OP * S_op_std_init(pTHX_ OP *o) { I32 type = o->op_type; PERL_ARGS_ASSERT_OP_STD_INIT; if (PL_opargs[type] & OA_RETSCALAR) scalar(o); if (PL_opargs[type] & OA_TARGET && !o->op_targ) o->op_targ = pad_alloc(type, SVs_PADTMP); return o; } PERL_STATIC_INLINE OP * S_op_integerize(pTHX_ OP *o) { I32 type = o->op_type; PERL_ARGS_ASSERT_OP_INTEGERIZE; /* integerize op. */ if ((PL_opargs[type] & OA_OTHERINT) && (PL_hints & HINT_INTEGER)) { dVAR; o->op_ppaddr = PL_ppaddr[++(o->op_type)]; } if (type == OP_NEGATE) /* XXX might want a ck_negate() for this */ cUNOPo->op_first->op_private &= ~OPpCONST_STRICT; return o; } /* This function exists solely to provide a scope to limit setjmp/longjmp() messing with auto variables. */ PERL_STATIC_INLINE int S_fold_constants_eval(pTHX) { int ret = 0; dJMPENV; JMPENV_PUSH(ret); if (ret == 0) { CALLRUNOPS(aTHX); } JMPENV_POP; return ret; } static OP * S_fold_constants(pTHX_ OP *const o) { dVAR; OP *curop; OP *newop; I32 type = o->op_type; bool is_stringify; SV *sv = NULL; int ret = 0; OP *old_next; SV * const oldwarnhook = PL_warnhook; SV * const olddiehook = PL_diehook; COP not_compiling; U8 oldwarn = PL_dowarn; I32 old_cxix; PERL_ARGS_ASSERT_FOLD_CONSTANTS; if (!(PL_opargs[type] & OA_FOLDCONST)) goto nope; switch (type) { case OP_UCFIRST: case OP_LCFIRST: case OP_UC: case OP_LC: case OP_FC: #ifdef USE_LOCALE_CTYPE if (IN_LC_COMPILETIME(LC_CTYPE)) goto nope; #endif break; case OP_SLT: case OP_SGT: case OP_SLE: case OP_SGE: case OP_SCMP: #ifdef USE_LOCALE_COLLATE if (IN_LC_COMPILETIME(LC_COLLATE)) goto nope; #endif break; case OP_SPRINTF: /* XXX what about the numeric ops? */ #ifdef USE_LOCALE_NUMERIC if (IN_LC_COMPILETIME(LC_NUMERIC)) goto nope; #endif break; case OP_PACK: if (!OpHAS_SIBLING(cLISTOPo->op_first) || OpSIBLING(cLISTOPo->op_first)->op_type != OP_CONST) goto nope; { SV * const sv = cSVOPx_sv(OpSIBLING(cLISTOPo->op_first)); if (!SvPOK(sv) || SvGMAGICAL(sv)) goto nope; { const char *s = SvPVX_const(sv); while (s < SvEND(sv)) { if (isALPHA_FOLD_EQ(*s, 'p')) goto nope; s++; } } } break; case OP_REPEAT: if (o->op_private & OPpREPEAT_DOLIST) goto nope; break; case OP_SREFGEN: if (cUNOPx(cUNOPo->op_first)->op_first->op_type != OP_CONST || SvPADTMP(cSVOPx_sv(cUNOPx(cUNOPo->op_first)->op_first))) goto nope; } if (PL_parser && PL_parser->error_count) goto nope; /* Don't try to run w/ errors */ for (curop = LINKLIST(o); curop != o; curop = LINKLIST(curop)) { switch (curop->op_type) { case OP_CONST: if ( (curop->op_private & OPpCONST_BARE) && (curop->op_private & OPpCONST_STRICT)) { no_bareword_allowed(curop); goto nope; } /* FALLTHROUGH */ case OP_LIST: case OP_SCALAR: case OP_NULL: case OP_PUSHMARK: /* Foldable; move to next op in list */ break; default: /* No other op types are considered foldable */ goto nope; } } curop = LINKLIST(o); old_next = o->op_next; o->op_next = 0; PL_op = curop; old_cxix = cxstack_ix; create_eval_scope(NULL, G_FAKINGEVAL); /* Verify that we don't need to save it: */ assert(PL_curcop == &PL_compiling); StructCopy(&PL_compiling, ¬_compiling, COP); PL_curcop = ¬_compiling; /* The above ensures that we run with all the correct hints of the currently compiling COP, but that IN_PERL_RUNTIME is true. */ assert(IN_PERL_RUNTIME); PL_warnhook = PERL_WARNHOOK_FATAL; PL_diehook = NULL; /* Effective $^W=1. */ if ( ! (PL_dowarn & G_WARN_ALL_MASK)) PL_dowarn |= G_WARN_ON; ret = S_fold_constants_eval(aTHX); switch (ret) { case 0: sv = *(PL_stack_sp--); if (o->op_targ && sv == PAD_SV(o->op_targ)) { /* grab pad temp? */ pad_swipe(o->op_targ, FALSE); } else if (SvTEMP(sv)) { /* grab mortal temp? */ SvREFCNT_inc_simple_void(sv); SvTEMP_off(sv); } else { assert(SvIMMORTAL(sv)); } break; case 3: /* Something tried to die. Abandon constant folding. */ /* Pretend the error never happened. */ CLEAR_ERRSV(); o->op_next = old_next; break; default: /* Don't expect 1 (setjmp failed) or 2 (something called my_exit) */ PL_warnhook = oldwarnhook; PL_diehook = olddiehook; /* XXX note that this croak may fail as we've already blown away * the stack - eg any nested evals */ Perl_croak(aTHX_ "panic: fold_constants JMPENV_PUSH returned %d", ret); } PL_dowarn = oldwarn; PL_warnhook = oldwarnhook; PL_diehook = olddiehook; PL_curcop = &PL_compiling; /* if we croaked, depending on how we croaked the eval scope * may or may not have already been popped */ if (cxstack_ix > old_cxix) { assert(cxstack_ix == old_cxix + 1); assert(CxTYPE(CX_CUR()) == CXt_EVAL); delete_eval_scope(); } if (ret) goto nope; /* OP_STRINGIFY and constant folding are used to implement qq. Here the constant folding is an implementation detail that we want to hide. If the stringify op is itself already marked folded, however, then it is actually a folded join. */ is_stringify = type == OP_STRINGIFY && !o->op_folded; op_free(o); assert(sv); if (is_stringify) SvPADTMP_off(sv); else if (!SvIMMORTAL(sv)) { SvPADTMP_on(sv); SvREADONLY_on(sv); } newop = newSVOP(OP_CONST, 0, MUTABLE_SV(sv)); if (!is_stringify) newop->op_folded = 1; return newop; nope: return o; } /* convert a constant range in list context into an OP_RV2AV, OP_CONST pair; * the constant value being an AV holding the flattened range. */ static void S_gen_constant_list(pTHX_ OP *o) { dVAR; OP *curop, *old_next; SV * const oldwarnhook = PL_warnhook; SV * const olddiehook = PL_diehook; COP *old_curcop; U8 oldwarn = PL_dowarn; SV **svp; AV *av; I32 old_cxix; COP not_compiling; int ret = 0; dJMPENV; bool op_was_null; list(o); if (PL_parser && PL_parser->error_count) return; /* Don't attempt to run with errors */ curop = LINKLIST(o); old_next = o->op_next; o->op_next = 0; op_was_null = o->op_type == OP_NULL; if (op_was_null) /* b3698342565fb462291fba4b432cfcd05b6eb4e1 */ o->op_type = OP_CUSTOM; CALL_PEEP(curop); if (op_was_null) o->op_type = OP_NULL; S_prune_chain_head(&curop); PL_op = curop; old_cxix = cxstack_ix; create_eval_scope(NULL, G_FAKINGEVAL); old_curcop = PL_curcop; StructCopy(old_curcop, ¬_compiling, COP); PL_curcop = ¬_compiling; /* The above ensures that we run with all the correct hints of the current COP, but that IN_PERL_RUNTIME is true. */ assert(IN_PERL_RUNTIME); PL_warnhook = PERL_WARNHOOK_FATAL; PL_diehook = NULL; JMPENV_PUSH(ret); /* Effective $^W=1. */ if ( ! (PL_dowarn & G_WARN_ALL_MASK)) PL_dowarn |= G_WARN_ON; switch (ret) { case 0: #if defined DEBUGGING && !defined DEBUGGING_RE_ONLY PL_curstackinfo->si_stack_hwm = 0; /* stop valgrind complaining */ #endif Perl_pp_pushmark(aTHX); CALLRUNOPS(aTHX); PL_op = curop; assert (!(curop->op_flags & OPf_SPECIAL)); assert(curop->op_type == OP_RANGE); Perl_pp_anonlist(aTHX); break; case 3: CLEAR_ERRSV(); o->op_next = old_next; break; default: JMPENV_POP; PL_warnhook = oldwarnhook; PL_diehook = olddiehook; Perl_croak(aTHX_ "panic: gen_constant_list JMPENV_PUSH returned %d", ret); } JMPENV_POP; PL_dowarn = oldwarn; PL_warnhook = oldwarnhook; PL_diehook = olddiehook; PL_curcop = old_curcop; if (cxstack_ix > old_cxix) { assert(cxstack_ix == old_cxix + 1); assert(CxTYPE(CX_CUR()) == CXt_EVAL); delete_eval_scope(); } if (ret) return; OpTYPE_set(o, OP_RV2AV); o->op_flags &= ~OPf_REF; /* treat \(1..2) like an ordinary list */ o->op_flags |= OPf_PARENS; /* and flatten \(1..2,3) */ o->op_opt = 0; /* needs to be revisited in rpeep() */ av = (AV *)SvREFCNT_inc_NN(*PL_stack_sp--); /* replace subtree with an OP_CONST */ curop = ((UNOP*)o)->op_first; op_sibling_splice(o, NULL, -1, newSVOP(OP_CONST, 0, (SV *)av)); op_free(curop); if (AvFILLp(av) != -1) for (svp = AvARRAY(av) + AvFILLp(av); svp >= AvARRAY(av); --svp) { SvPADTMP_on(*svp); SvREADONLY_on(*svp); } LINKLIST(o); list(o); return; } /* =head1 Optree Manipulation Functions */ /* List constructors */ /* =for apidoc op_append_elem Append an item to the list of ops contained directly within a list-type op, returning the lengthened list. C is the list-type op, and C is the op to append to the list. C specifies the intended opcode for the list. If C is not already a list of the right type, it will be upgraded into one. If either C or C is null, the other is returned unchanged. =cut */ OP * Perl_op_append_elem(pTHX_ I32 type, OP *first, OP *last) { if (!first) return last; if (!last) return first; if (first->op_type != (unsigned)type || (type == OP_LIST && (first->op_flags & OPf_PARENS))) { return newLISTOP(type, 0, first, last); } op_sibling_splice(first, ((LISTOP*)first)->op_last, 0, last); first->op_flags |= OPf_KIDS; return first; } /* =for apidoc op_append_list Concatenate the lists of ops contained directly within two list-type ops, returning the combined list. C and C are the list-type ops to concatenate. C specifies the intended opcode for the list. If either C or C is not already a list of the right type, it will be upgraded into one. If either C or C is null, the other is returned unchanged. =cut */ OP * Perl_op_append_list(pTHX_ I32 type, OP *first, OP *last) { if (!first) return last; if (!last) return first; if (first->op_type != (unsigned)type) return op_prepend_elem(type, first, last); if (last->op_type != (unsigned)type) return op_append_elem(type, first, last); OpMORESIB_set(((LISTOP*)first)->op_last, ((LISTOP*)last)->op_first); ((LISTOP*)first)->op_last = ((LISTOP*)last)->op_last; OpLASTSIB_set(((LISTOP*)first)->op_last, first); first->op_flags |= (last->op_flags & OPf_KIDS); S_op_destroy(aTHX_ last); return first; } /* =for apidoc op_prepend_elem Prepend an item to the list of ops contained directly within a list-type op, returning the lengthened list. C is the op to prepend to the list, and C is the list-type op. C specifies the intended opcode for the list. If C is not already a list of the right type, it will be upgraded into one. If either C or C is null, the other is returned unchanged. =cut */ OP * Perl_op_prepend_elem(pTHX_ I32 type, OP *first, OP *last) { if (!first) return last; if (!last) return first; if (last->op_type == (unsigned)type) { if (type == OP_LIST) { /* already a PUSHMARK there */ /* insert 'first' after pushmark */ op_sibling_splice(last, cLISTOPx(last)->op_first, 0, first); if (!(first->op_flags & OPf_PARENS)) last->op_flags &= ~OPf_PARENS; } else op_sibling_splice(last, NULL, 0, first); last->op_flags |= OPf_KIDS; return last; } return newLISTOP(type, 0, first, last); } /* =for apidoc op_convert_list Converts C into a list op if it is not one already, and then converts it into the specified C, calling its check function, allocating a target if it needs one, and folding constants. A list-type op is usually constructed one kid at a time via C, C and C. Then finally it is passed to C to make it the right type. =cut */ OP * Perl_op_convert_list(pTHX_ I32 type, I32 flags, OP *o) { dVAR; if (type < 0) type = -type, flags |= OPf_SPECIAL; if (!o || o->op_type != OP_LIST) o = force_list(o, 0); else { o->op_flags &= ~OPf_WANT; o->op_private &= ~OPpLVAL_INTRO; } if (!(PL_opargs[type] & OA_MARK)) op_null(cLISTOPo->op_first); else { OP * const kid2 = OpSIBLING(cLISTOPo->op_first); if (kid2 && kid2->op_type == OP_COREARGS) { op_null(cLISTOPo->op_first); kid2->op_private |= OPpCOREARGS_PUSHMARK; } } if (type != OP_SPLIT) /* At this point o is a LISTOP, but OP_SPLIT is a PMOP; let * ck_split() create a real PMOP and leave the op's type as listop * for now. Otherwise op_free() etc will crash. */ OpTYPE_set(o, type); o->op_flags |= flags; if (flags & OPf_FOLDED) o->op_folded = 1; o = CHECKOP(type, o); if (o->op_type != (unsigned)type) return o; return fold_constants(op_integerize(op_std_init(o))); } /* Constructors */ /* =head1 Optree construction =for apidoc newNULLLIST Constructs, checks, and returns a new C op, which represents an empty list expression. =cut */ OP * Perl_newNULLLIST(pTHX) { return newOP(OP_STUB, 0); } /* promote o and any siblings to be a list if its not already; i.e. * * o - A - B * * becomes * * list * | * pushmark - o - A - B * * If nullit it true, the list op is nulled. */ static OP * S_force_list(pTHX_ OP *o, bool nullit) { if (!o || o->op_type != OP_LIST) { OP *rest = NULL; if (o) { /* manually detach any siblings then add them back later */ rest = OpSIBLING(o); OpLASTSIB_set(o, NULL); } o = newLISTOP(OP_LIST, 0, o, NULL); if (rest) op_sibling_splice(o, cLISTOPo->op_last, 0, rest); } if (nullit) op_null(o); return o; } /* =for apidoc newLISTOP Constructs, checks, and returns an op of any list type. C is the opcode. C gives the eight bits of C, except that C will be set automatically if required. C and C supply up to two ops to be direct children of the list op; they are consumed by this function and become part of the constructed op tree. For most list operators, the check function expects all the kid ops to be present already, so calling C (e.g.) is not appropriate. What you want to do in that case is create an op of type C, append more children to it, and then call L. See L for more information. =cut */ OP * Perl_newLISTOP(pTHX_ I32 type, I32 flags, OP *first, OP *last) { dVAR; LISTOP *listop; /* Note that allocating an OP_PUSHMARK can die under Safe.pm if * pushmark is banned. So do it now while existing ops are in a * consistent state, in case they suddenly get freed */ OP* const pushop = type == OP_LIST ? newOP(OP_PUSHMARK, 0) : NULL; assert((PL_opargs[type] & OA_CLASS_MASK) == OA_LISTOP || type == OP_CUSTOM); NewOp(1101, listop, 1, LISTOP); OpTYPE_set(listop, type); if (first || last) flags |= OPf_KIDS; listop->op_flags = (U8)flags; if (!last && first) last = first; else if (!first && last) first = last; else if (first) OpMORESIB_set(first, last); listop->op_first = first; listop->op_last = last; if (pushop) { OpMORESIB_set(pushop, first); listop->op_first = pushop; listop->op_flags |= OPf_KIDS; if (!last) listop->op_last = pushop; } if (listop->op_last) OpLASTSIB_set(listop->op_last, (OP*)listop); return CHECKOP(type, listop); } /* =for apidoc newOP Constructs, checks, and returns an op of any base type (any type that has no extra fields). C is the opcode. C gives the eight bits of C, and, shifted up eight bits, the eight bits of C. =cut */ OP * Perl_newOP(pTHX_ I32 type, I32 flags) { dVAR; OP *o; if (type == -OP_ENTEREVAL) { type = OP_ENTEREVAL; flags |= OPpEVAL_BYTES<<8; } assert((PL_opargs[type] & OA_CLASS_MASK) == OA_BASEOP || (PL_opargs[type] & OA_CLASS_MASK) == OA_BASEOP_OR_UNOP || (PL_opargs[type] & OA_CLASS_MASK) == OA_FILESTATOP || (PL_opargs[type] & OA_CLASS_MASK) == OA_LOOPEXOP); NewOp(1101, o, 1, OP); OpTYPE_set(o, type); o->op_flags = (U8)flags; o->op_next = o; o->op_private = (U8)(0 | (flags >> 8)); if (PL_opargs[type] & OA_RETSCALAR) scalar(o); if (PL_opargs[type] & OA_TARGET) o->op_targ = pad_alloc(type, SVs_PADTMP); return CHECKOP(type, o); } /* =for apidoc newUNOP Constructs, checks, and returns an op of any unary type. C is the opcode. C gives the eight bits of C, except that C will be set automatically if required, and, shifted up eight bits, the eight bits of C, except that the bit with value 1 is automatically set. C supplies an optional op to be the direct child of the unary op; it is consumed by this function and become part of the constructed op tree. =cut */ OP * Perl_newUNOP(pTHX_ I32 type, I32 flags, OP *first) { dVAR; UNOP *unop; if (type == -OP_ENTEREVAL) { type = OP_ENTEREVAL; flags |= OPpEVAL_BYTES<<8; } assert((PL_opargs[type] & OA_CLASS_MASK) == OA_UNOP || (PL_opargs[type] & OA_CLASS_MASK) == OA_BASEOP_OR_UNOP || (PL_opargs[type] & OA_CLASS_MASK) == OA_FILESTATOP || (PL_opargs[type] & OA_CLASS_MASK) == OA_LOOPEXOP || type == OP_SASSIGN || type == OP_ENTERTRY || type == OP_CUSTOM || type == OP_NULL ); if (!first) first = newOP(OP_STUB, 0); if (PL_opargs[type] & OA_MARK) first = force_list(first, 1); NewOp(1101, unop, 1, UNOP); OpTYPE_set(unop, type); unop->op_first = first; unop->op_flags = (U8)(flags | OPf_KIDS); unop->op_private = (U8)(1 | (flags >> 8)); if (!OpHAS_SIBLING(first)) /* true unless weird syntax error */ OpLASTSIB_set(first, (OP*)unop); unop = (UNOP*) CHECKOP(type, unop); if (unop->op_next) return (OP*)unop; return fold_constants(op_integerize(op_std_init((OP *) unop))); } /* =for apidoc newUNOP_AUX Similar to C, but creates an C struct instead, with C initialised to C =cut */ OP * Perl_newUNOP_AUX(pTHX_ I32 type, I32 flags, OP *first, UNOP_AUX_item *aux) { dVAR; UNOP_AUX *unop; assert((PL_opargs[type] & OA_CLASS_MASK) == OA_UNOP_AUX || type == OP_CUSTOM); NewOp(1101, unop, 1, UNOP_AUX); unop->op_type = (OPCODE)type; unop->op_ppaddr = PL_ppaddr[type]; unop->op_first = first; unop->op_flags = (U8)(flags | (first ? OPf_KIDS : 0)); unop->op_private = (U8)((first ? 1 : 0) | (flags >> 8)); unop->op_aux = aux; if (first && !OpHAS_SIBLING(first)) /* true unless weird syntax error */ OpLASTSIB_set(first, (OP*)unop); unop = (UNOP_AUX*) CHECKOP(type, unop); return op_std_init((OP *) unop); } /* =for apidoc newMETHOP Constructs, checks, and returns an op of method type with a method name evaluated at runtime. C is the opcode. C gives the eight bits of C, except that C will be set automatically, and, shifted up eight bits, the eight bits of C, except that the bit with value 1 is automatically set. C supplies an op which evaluates method name; it is consumed by this function and become part of the constructed op tree. Supported optypes: C. =cut */ static OP* S_newMETHOP_internal(pTHX_ I32 type, I32 flags, OP* dynamic_meth, SV* const_meth) { dVAR; METHOP *methop; assert((PL_opargs[type] & OA_CLASS_MASK) == OA_METHOP || type == OP_CUSTOM); NewOp(1101, methop, 1, METHOP); if (dynamic_meth) { if (PL_opargs[type] & OA_MARK) dynamic_meth = force_list(dynamic_meth, 1); methop->op_flags = (U8)(flags | OPf_KIDS); methop->op_u.op_first = dynamic_meth; methop->op_private = (U8)(1 | (flags >> 8)); if (!OpHAS_SIBLING(dynamic_meth)) OpLASTSIB_set(dynamic_meth, (OP*)methop); } else { assert(const_meth); methop->op_flags = (U8)(flags & ~OPf_KIDS); methop->op_u.op_meth_sv = const_meth; methop->op_private = (U8)(0 | (flags >> 8)); methop->op_next = (OP*)methop; } #ifdef USE_ITHREADS methop->op_rclass_targ = 0; #else methop->op_rclass_sv = NULL; #endif OpTYPE_set(methop, type); return CHECKOP(type, methop); } OP * Perl_newMETHOP (pTHX_ I32 type, I32 flags, OP* dynamic_meth) { PERL_ARGS_ASSERT_NEWMETHOP; return newMETHOP_internal(type, flags, dynamic_meth, NULL); } /* =for apidoc newMETHOP_named Constructs, checks, and returns an op of method type with a constant method name. C is the opcode. C gives the eight bits of C, and, shifted up eight bits, the eight bits of C. C supplies a constant method name; it must be a shared COW string. Supported optypes: C. =cut */ OP * Perl_newMETHOP_named (pTHX_ I32 type, I32 flags, SV* const_meth) { PERL_ARGS_ASSERT_NEWMETHOP_NAMED; return newMETHOP_internal(type, flags, NULL, const_meth); } /* =for apidoc newBINOP Constructs, checks, and returns an op of any binary type. C is the opcode. C gives the eight bits of C, except that C will be set automatically, and, shifted up eight bits, the eight bits of C, except that the bit with value 1 or 2 is automatically set as required. C and C supply up to two ops to be the direct children of the binary op; they are consumed by this function and become part of the constructed op tree. =cut */ OP * Perl_newBINOP(pTHX_ I32 type, I32 flags, OP *first, OP *last) { dVAR; BINOP *binop; ASSUME((PL_opargs[type] & OA_CLASS_MASK) == OA_BINOP || type == OP_NULL || type == OP_CUSTOM); NewOp(1101, binop, 1, BINOP); if (!first) first = newOP(OP_NULL, 0); OpTYPE_set(binop, type); binop->op_first = first; binop->op_flags = (U8)(flags | OPf_KIDS); if (!last) { last = first; binop->op_private = (U8)(1 | (flags >> 8)); } else { binop->op_private = (U8)(2 | (flags >> 8)); OpMORESIB_set(first, last); } if (!OpHAS_SIBLING(last)) /* true unless weird syntax error */ OpLASTSIB_set(last, (OP*)binop); binop->op_last = OpSIBLING(binop->op_first); if (binop->op_last) OpLASTSIB_set(binop->op_last, (OP*)binop); binop = (BINOP*)CHECKOP(type, binop); if (binop->op_next || binop->op_type != (OPCODE)type) return (OP*)binop; return fold_constants(op_integerize(op_std_init((OP *)binop))); } void Perl_invmap_dump(pTHX_ SV* invlist, UV *map) { const char indent[] = " "; UV len = _invlist_len(invlist); UV * array = invlist_array(invlist); UV i; PERL_ARGS_ASSERT_INVMAP_DUMP; for (i = 0; i < len; i++) { UV start = array[i]; UV end = (i + 1 < len) ? array[i+1] - 1 : IV_MAX; PerlIO_printf(Perl_debug_log, "%s[%" UVuf "] 0x%04" UVXf, indent, i, start); if (end == IV_MAX) { PerlIO_printf(Perl_debug_log, " .. INFTY"); } else if (end != start) { PerlIO_printf(Perl_debug_log, " .. 0x%04" UVXf, end); } else { PerlIO_printf(Perl_debug_log, " "); } PerlIO_printf(Perl_debug_log, "\t"); if (map[i] == TR_UNLISTED) { PerlIO_printf(Perl_debug_log, "TR_UNLISTED\n"); } else if (map[i] == TR_SPECIAL_HANDLING) { PerlIO_printf(Perl_debug_log, "TR_SPECIAL_HANDLING\n"); } else { PerlIO_printf(Perl_debug_log, "0x%04" UVXf "\n", map[i]); } } } /* Given an OP_TRANS / OP_TRANSR op o, plus OP_CONST ops expr and repl * containing the search and replacement strings, assemble into * a translation table attached as o->op_pv. * Free expr and repl. * It expects the toker to have already set the * OPpTRANS_COMPLEMENT * OPpTRANS_SQUASH * OPpTRANS_DELETE * flags as appropriate; this function may add * OPpTRANS_USE_SVOP * OPpTRANS_CAN_FORCE_UTF8 * OPpTRANS_IDENTICAL * OPpTRANS_GROWS * flags */ static OP * S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) { /* This function compiles a tr///, from data gathered from toke.c, into a * form suitable for use by do_trans() in doop.c at runtime. * * It first normalizes the data, while discarding extraneous inputs; then * writes out the compiled data. The normalization allows for complete * analysis, and avoids some false negatives and positives earlier versions * of this code had. * * The normalization form is an inversion map (described below in detail). * This is essentially the compiled form for tr///'s that require UTF-8, * and its easy to use it to write the 257-byte table for tr///'s that * don't need UTF-8. That table is identical to what's been in use for * many perl versions, except that it doesn't handle some edge cases that * it used to, involving code points above 255. The UTF-8 form now handles * these. (This could be changed with extra coding should it shown to be * desirable.) * * If the complement (/c) option is specified, the lhs string (tstr) is * parsed into an inversion list. Complementing these is trivial. Then a * complemented tstr is built from that, and used thenceforth. This hides * the fact that it was complemented from almost all successive code. * * One of the important characteristics to know about the input is whether * the transliteration may be done in place, or does a temporary need to be * allocated, then copied. If the replacement for every character in every * possible string takes up no more bytes than the the character it * replaces, then it can be edited in place. Otherwise the replacement * could "grow", depending on the strings being processed. Some inputs * won't grow, and might even shrink under /d, but some inputs could grow, * so we have to assume any given one might grow. On very long inputs, the * temporary could eat up a lot of memory, so we want to avoid it if * possible. For non-UTF-8 inputs, everything is single-byte, so can be * edited in place, unless there is something in the pattern that could * force it into UTF-8. The inversion map makes it feasible to determine * this. Previous versions of this code pretty much punted on determining * if UTF-8 could be edited in place. Now, this code is rigorous in making * that determination. * * Another characteristic we need to know is whether the lhs and rhs are * identical. If so, and no other flags are present, the only effect of * the tr/// is to count the characters present in the input that are * mentioned in the lhs string. The implementation of that is easier and * runs faster than the more general case. Normalizing here allows for * accurate determination of this. Previously there were false negatives * possible. * * Instead of 'transliterated', the comments here use 'unmapped' for the * characters that are left unchanged by the operation; otherwise they are * 'mapped' * * The lhs of the tr/// is here referred to as the t side. * The rhs of the tr/// is here referred to as the r side. */ SV * const tstr = ((SVOP*)expr)->op_sv; SV * const rstr = ((SVOP*)repl)->op_sv; STRLEN tlen; STRLEN rlen; const U8 * t0 = (U8*)SvPV_const(tstr, tlen); const U8 * r0 = (U8*)SvPV_const(rstr, rlen); const U8 * t = t0; const U8 * r = r0; Size_t t_count = 0, r_count = 0; /* Number of characters in search and replacement lists */ /* khw thinks some of the private flags for this op are quaintly named. * OPpTRANS_GROWS for example is TRUE if the replacement for some lhs * character when represented in UTF-8 is longer than the original * character's UTF-8 representation */ const bool complement = cBOOL(o->op_private & OPpTRANS_COMPLEMENT); const bool squash = cBOOL(o->op_private & OPpTRANS_SQUASH); const bool del = cBOOL(o->op_private & OPpTRANS_DELETE); /* Set to true if there is some character < 256 in the lhs that maps to > * 255. If so, a non-UTF-8 match string can be forced into requiring to be * in UTF-8 by a tr/// operation. */ bool can_force_utf8 = FALSE; /* What is the maximum expansion factor in UTF-8 transliterations. If a * 2-byte UTF-8 encoded character is to be replaced by a 3-byte one, its * expansion factor is 1.5. This number is used at runtime to calculate * how much space to allocate for non-inplace transliterations. Without * this number, the worst case is 14, which is extremely unlikely to happen * in real life, and would require significant memory overhead. */ NV max_expansion = 1.; SSize_t t_range_count, r_range_count, min_range_count; UV* t_array; SV* t_invlist; UV* r_map; UV r_cp, t_cp; IV t_cp_end = -1; UV r_cp_end; Size_t len; AV* invmap; UV final_map = TR_UNLISTED; /* The final character in the replacement list, updated as we go along. Initialize to something illegal */ bool rstr_utf8 = cBOOL(SvUTF8(rstr)); bool tstr_utf8 = cBOOL(SvUTF8(tstr)); const U8* tend = t + tlen; const U8* rend = r + rlen; SV * inverted_tstr = NULL; Size_t i; unsigned int pass2; /* This routine implements detection of a transliteration having a longer * UTF-8 representation than its source, by partitioning all the possible * code points of the platform into equivalence classes of the same UTF-8 * byte length in the first pass. As it constructs the mappings, it carves * these up into smaller chunks, but doesn't merge any together. This * makes it easy to find the instances it's looking for. A second pass is * done after this has been determined which merges things together to * shrink the table for runtime. For ASCII platforms, the table is * trivial, given below, and uses the fundamental characteristics of UTF-8 * to construct the values. For EBCDIC, it isn't so, and we rely on a * table constructed by the perl script that generates these kinds of * things */ #ifndef EBCDIC UV PL_partition_by_byte_length[] = { 0, 0x80, (32 * (1UL << ( UTF_ACCUMULATION_SHIFT))), (32 * (1UL << ( UTF_ACCUMULATION_SHIFT))), (16 * (1UL << (2 * UTF_ACCUMULATION_SHIFT))), ( 8 * (1UL << (3 * UTF_ACCUMULATION_SHIFT))), ( 4 * (1UL << (4 * UTF_ACCUMULATION_SHIFT))), ( 2 * (1UL << (5 * UTF_ACCUMULATION_SHIFT))) # ifdef UV_IS_QUAD , ( ((UV) 1U << (6 * UTF_ACCUMULATION_SHIFT))) # endif #endif }; PERL_ARGS_ASSERT_PMTRANS; PL_hints |= HINT_BLOCK_SCOPE; /* If /c, the search list is sorted and complemented. This is now done by * creating an inversion list from it, and then trivially inverting that. * The previous implementation used qsort, but creating the list * automatically keeps it sorted as we go along */ if (complement) { UV start, end; SV * inverted_tlist = _new_invlist(tlen); Size_t temp_len; while (t < tend) { /* Non-utf8 strings don't have ranges, so each character is listed * out */ if (! tstr_utf8) { inverted_tlist = add_cp_to_invlist(inverted_tlist, *t); t++; } else { /* But UTF-8 strings have been parsed in toke.c to have * ranges if appropriate. */ UV t_cp; Size_t t_char_len; /* Get the first character */ t_cp = valid_utf8_to_uvchr(t, &t_char_len); t += t_char_len; /* If the next byte indicates that this wasn't the first * element of a range, the range is just this one */ if (t >= tend || *t != RANGE_INDICATOR) { inverted_tlist = add_cp_to_invlist(inverted_tlist, t_cp); } else { /* Otherwise, ignore the indicator byte, and get the final element, and add the whole range */ t++; t_cp_end = valid_utf8_to_uvchr(t, &t_char_len); t += t_char_len; inverted_tlist = _add_range_to_invlist(inverted_tlist, t_cp, t_cp_end); } } } /* End of parse through tstr */ /* The inversion list is done; now invert it */ _invlist_invert(inverted_tlist); /* Now go through the inverted list and create a new tstr for the rest * of the routine to use. Since the UTF-8 version can have ranges, and * can be much more compact than the non-UTF-8 version, we create the * string in UTF-8 even if not necessary. (This is just an intermediate * value that gets thrown away anyway.) */ invlist_iterinit(inverted_tlist); inverted_tstr = newSVpvs(""); while (invlist_iternext(inverted_tlist, &start, &end)) { U8 temp[UTF8_MAXBYTES]; U8 * temp_end_pos; /* IV_MAX keeps things from going out of bounds */ start = MIN(IV_MAX, start); end = MIN(IV_MAX, end); temp_end_pos = uvchr_to_utf8(temp, start); sv_catpvn(inverted_tstr, (char *) temp, temp_end_pos - temp); if (start != end) { Perl_sv_catpvf(aTHX_ inverted_tstr, "%c", RANGE_INDICATOR); temp_end_pos = uvchr_to_utf8(temp, end); sv_catpvn(inverted_tstr, (char *) temp, temp_end_pos - temp); } } /* Set up so the remainder of the routine uses this complement, instead * of the actual input */ t0 = t = (U8*)SvPV_const(inverted_tstr, temp_len); tend = t0 + temp_len; tstr_utf8 = TRUE; SvREFCNT_dec_NN(inverted_tlist); } /* For non-/d, an empty rhs means to use the lhs */ if (rlen == 0 && ! del) { r0 = t0; rend = tend; rstr_utf8 = tstr_utf8; } t_invlist = _new_invlist(1); /* Parse the (potentially adjusted) input, creating the inversion map. * This is done in two passes. The first pass is to determine if the * transliteration can be done in place. The inversion map it creates * could be used, but generally would be larger and slower to run than the * output of the second pass, which starts with a more compact table and * allows more ranges to be merged */ for (pass2 = 0; pass2 < 2; pass2++) { /* Initialize to a single range */ t_invlist = _add_range_to_invlist(t_invlist, 0, UV_MAX); /* In the second pass, we just have the single range */ if (pass2) { len = 1; t_array = invlist_array(t_invlist); } else { /* But in the first pass, the lhs is partitioned such that the * number of UTF-8 bytes required to represent a code point in each * partition is the same as the number for any other code point in * that partion. We copy the pre-compiled partion. */ len = C_ARRAY_LENGTH(PL_partition_by_byte_length); invlist_extend(t_invlist, len); t_array = invlist_array(t_invlist); Copy(PL_partition_by_byte_length, t_array, len, UV); invlist_set_len(t_invlist, len, *(get_invlist_offset_addr(t_invlist))); Newx(r_map, len + 1, UV); } /* And the mapping of each of the ranges is initialized. Initially, * everything is TR_UNLISTED. */ for (i = 0; i < len; i++) { r_map[i] = TR_UNLISTED; } t = t0; t_count = 0; r = r0; r_count = 0; t_range_count = r_range_count = 0; /* Now go through the search list constructing an inversion map. The * input is not necessarily in any particular order. Making it an * inversion map orders it, potentially simplifying, and makes it easy * to deal with at run time. This is the only place in core that * generates an inversion map; if others were introduced, it might be * better to create general purpose routines to handle them. * (Inversion maps are created in perl in other places.) * * An inversion map consists of two parallel arrays. One is * essentially an inversion list: an ordered list of code points such * that each element gives the first code point of a range of * consecutive code points that map to the element in the other array * that has the same index as this one (in other words, the * corresponding element). Thus the range extends up to (but not * including) the code point given by the next higher element. In a * true inversion map, the corresponding element in the other array * gives the mapping of the first code point in the range, with the * understanding that the next higher code point in the inversion * list's range will map to the next higher code point in the map. * * So if at element [i], let's say we have: * * t_invlist r_map * [i] A a * * This means that A => a, B => b, C => c.... Let's say that the * situation is such that: * * [i+1] L -1 * * This means the sequence that started at [i] stops at K => k. This * illustrates that you need to look at the next element to find where * a sequence stops. Except, the highest element in the inversion list * begins a range that is understood to extend to the platform's * infinity. * * This routine modifies traditional inversion maps to reserve two * mappings: * * TR_UNLISTED (or -1) indicates that the no code point in the range * is listed in the tr/// searchlist. At runtime, these are * always passed through unchanged. In the inversion map, all * points in the range are mapped to -1, instead of increasing, * like the 'L' in the example above. * * We start the parse with every code point mapped to this, and as * we parse and find ones that are listed in the search list, we * carve out ranges as we go along that override that. * * TR_SPECIAL_HANDLING (or -2) indicates that every code point in the * range needs special handling. Again, all code points in the * range are mapped to -2, instead of increasing. * * Under /d this value means the code point should be deleted from * the transliteration when encountered. * * Otherwise, it marks that every code point in the range is to * map to the final character in the replacement list. This * happens only when the replacement list is shorter than the * search one, so there are things in the search list that have no * correspondence in the replacement list. For example, in * tr/a-z/A/, 'A' is the final value, and the inversion map * generated for this would be like this: * \0 => -1 * a => A * b-z => -2 * z+1 => -1 * 'A' appears once, then the remainder of the range maps to -2. * The use of -2 isn't strictly necessary, as an inversion map is * capable of representing this situation, but not nearly so * compactly, and this is actually quite commonly encountered. * Indeed, the original design of this code used a full inversion * map for this. But things like * tr/\0-\x{FFFF}/A/ * generated huge data structures, slowly, and the execution was * also slow. So the current scheme was implemented. * * So, if the next element in our example is: * * [i+2] Q q * * Then all of L, M, N, O, and P map to TR_UNLISTED. If the next * elements are * * [i+3] R z * [i+4] S TR_UNLISTED * * Then Q => q; R => z; and S => TR_UNLISTED. If [i+4] (the 'S') is * the final element in the arrays, every code point from S to infinity * maps to TR_UNLISTED. * */ /* Finish up range started in what otherwise would * have been the final iteration */ while (t < tend || t_range_count > 0) { bool adjacent_to_range_above = FALSE; bool adjacent_to_range_below = FALSE; bool merge_with_range_above = FALSE; bool merge_with_range_below = FALSE; SSize_t i, span, invmap_range_length_remaining; /* If we are in the middle of processing a range in the 'target' * side, the previous iteration has set us up. Otherwise, look at * the next character in the search list */ if (t_range_count <= 0) { if (! tstr_utf8) { /* Here, not in the middle of a range, and not UTF-8. The * next code point is the single byte where we're at */ t_cp = *t; t_range_count = 1; t++; } else { Size_t t_char_len; /* Here, not in the middle of a range, and is UTF-8. The * next code point is the next UTF-8 char in the input. We * know the input is valid, because the toker constructed * it */ t_cp = valid_utf8_to_uvchr(t, &t_char_len); t += t_char_len; /* UTF-8 strings (only) have been parsed in toke.c to have * ranges. See if the next byte indicates that this was * the first element of a range. If so, get the final * element and calculate the range size. If not, the range * size is 1 */ if (t < tend && *t == RANGE_INDICATOR) { t++; t_range_count = valid_utf8_to_uvchr(t, &t_char_len) - t_cp + 1; t += t_char_len; } else { t_range_count = 1; } } /* Count the total number of listed code points * */ t_count += t_range_count; } /* Similarly, get the next character in the replacement list */ if (r_range_count <= 0) { if (r >= rend) { /* But if we've exhausted the rhs, there is nothing to map * to, except the special handling one, and we make the * range the same size as the lhs one. */ r_cp = TR_SPECIAL_HANDLING; r_range_count = t_range_count; } else { if (! rstr_utf8) { r_cp = *r; r_range_count = 1; r++; } else { Size_t r_char_len; r_cp = valid_utf8_to_uvchr(r, &r_char_len); r += r_char_len; if (r < rend && *r == RANGE_INDICATOR) { r++; r_range_count = valid_utf8_to_uvchr(r, &r_char_len) - r_cp + 1; r += r_char_len; } else { r_range_count = 1; } } if (r_cp == TR_SPECIAL_HANDLING) { r_range_count = t_range_count; } /* This is the final character so far */ final_map = r_cp + r_range_count - 1; r_count += r_range_count; } } /* Here, we have the next things ready in both sides. They are * potentially ranges. We try to process as big a chunk as * possible at once, but the lhs and rhs must be synchronized, so * things like tr/A-Z/a-ij-z/ will need to be processed in 2 chunks * */ min_range_count = MIN(t_range_count, r_range_count); /* Search the inversion list for the entry that contains the input * code point . The inversion map was initialized to cover the * entire range of possible inputs, so this should not fail. So * the return value is the index into the list's array of the range * that contains , that is, 'i' such that array[i] <= cp < * array[i+1] */ i = _invlist_search(t_invlist, t_cp); assert(i >= 0); /* Here, the data structure might look like: * * index t r Meaning * [i-1] J j # J-L => j-l * [i] M -1 # M => default; as do N, O, P, Q * [i+1] R x # R => x, S => x+1, T => x+2 * [i+2] U y # U => y, V => y+1, ... * ... * [-1] Z -1 # Z => default; as do Z+1, ... infinity * * where 'x' and 'y' above are not to be taken literally. * * The maximum chunk we can handle in this loop iteration, is the * smallest of the three components: the lhs 't_', the rhs 'r_', * and the remainder of the range in element [i]. (In pass 1, that * range will have everything in it be of the same class; we can't * cross into another class.) 'min_range_count' already contains * the smallest of the first two values. The final one is * irrelevant if the map is to the special indicator */ invmap_range_length_remaining = ((Size_t) i + 1 < len) ? t_array[i+1] - t_cp : IV_MAX - t_cp; span = MAX(1, MIN(min_range_count, invmap_range_length_remaining)); /* The end point of this chunk is where we are, plus the span, but * never larger than the platform's infinity */ t_cp_end = MIN(IV_MAX, t_cp + span - 1); if (r_cp == TR_SPECIAL_HANDLING) { r_cp_end = TR_SPECIAL_HANDLING; } else { r_cp_end = MIN(IV_MAX, r_cp + span - 1); /* If something on the lhs is below 256, and something on the * rhs is above, there is a potential mapping here across that * boundary. Indeed the only way there isn't is if both sides * start at the same point. That means they both cross at the * same time. But otherwise one crosses before the other */ if (t_cp < 256 && r_cp_end > 255 && r_cp != t_cp) { can_force_utf8 = TRUE; } } /* If a character appears in the search list more than once, the * 2nd and succeeding occurrences are ignored, so only do this * range if haven't already processed this character. (The range * has been set up so that all members in it will be of the same * ilk) */ if (r_map[i] == TR_UNLISTED) { /* This is the first definition for this chunk, hence is valid * and needs to be processed. Here and in the comments below, * we use the above sample data. The t_cp chunk must be any * contiguous subset of M, N, O, P, and/or Q. * * In the first pass, the t_invlist has been partitioned so * that all elements in any single range have the same number * of bytes in their UTF-8 representations. And the r space is * either a single byte, or a range of strictly monotonically * increasing code points. So the final element in the range * will be represented by no fewer bytes than the initial one. * That means that if the final code point in the t range has * at least as many bytes as the final code point in the r, * then all code points in the t range have at least as many * bytes as their corresponding r range element. But if that's * not true, the transliteration of at least the final code * point grows in length. As an example, suppose we had * tr/\x{fff0}-\x{fff1}/\x{ffff}-\x{10000}/ * The UTF-8 for all but 10000 occupies 3 bytes on ASCII * platforms. We have deliberately set up the data structure * so that any range in the lhs gets split into chunks for * processing, such that every code point in a chunk has the * same number of UTF-8 bytes. We only have to check the final * code point in the rhs against any code point in the lhs. */ if ( ! pass2 && r_cp_end != TR_SPECIAL_HANDLING && UVCHR_SKIP(t_cp_end) < UVCHR_SKIP(r_cp_end)) { /* Consider tr/\xCB/\X{E000}/. The maximum expansion * factor is 1 byte going to 3 if the lhs is not UTF-8, but * 2 bytes going to 3 if it is in UTF-8. We could pass two * different values so doop could choose based on the * UTF-8ness of the target. But khw thinks (perhaps * wrongly) that is overkill. It is used only to make sure * we malloc enough space. If no target string can force * the result to be UTF-8, then we don't have to worry * about this */ NV t_size = (can_force_utf8 && t_cp < 256) ? 1 : UVCHR_SKIP(t_cp_end); NV ratio = UVCHR_SKIP(r_cp_end) / t_size; o->op_private |= OPpTRANS_GROWS; /* Now that we know it grows, we can keep track of the * largest ratio */ if (ratio > max_expansion) { max_expansion = ratio; } } /* The very first range is marked as adjacent to the * non-existent range below it, as it causes things to "just * work" (TradeMark) * * If the lowest code point in this chunk is M, it adjoins the * J-L range */ if (t_cp == t_array[i]) { adjacent_to_range_below = TRUE; /* And if the map has the same offset from the beginning of * the range as does this new code point (or both are for * TR_SPECIAL_HANDLING), this chunk can be completely * merged with the range below. EXCEPT, in the first pass, * we don't merge ranges whose UTF-8 byte representations * have different lengths, so that we can more easily * detect if a replacement is longer than the source, that * is if it 'grows'. But in the 2nd pass, there's no * reason to not merge */ if ( (i > 0 && ( pass2 || UVCHR_SKIP(t_array[i-1]) == UVCHR_SKIP(t_cp))) && ( ( r_cp == TR_SPECIAL_HANDLING && r_map[i-1] == TR_SPECIAL_HANDLING) || ( r_cp != TR_SPECIAL_HANDLING && r_cp - r_map[i-1] == t_cp - t_array[i-1]))) { merge_with_range_below = TRUE; } } /* Similarly, if the highest code point in this chunk is 'Q', * it adjoins the range above, and if the map is suitable, can * be merged with it */ if ( t_cp_end >= IV_MAX - 1 || ( (Size_t) i + 1 < len && (Size_t) t_cp_end + 1 == t_array[i+1])) { adjacent_to_range_above = TRUE; if ((Size_t) i + 1 < len) if ( ( pass2 || UVCHR_SKIP(t_cp) == UVCHR_SKIP(t_array[i+1])) && ( ( r_cp == TR_SPECIAL_HANDLING && r_map[i+1] == (UV) TR_SPECIAL_HANDLING) || ( r_cp != TR_SPECIAL_HANDLING && r_cp_end == r_map[i+1] - 1))) { merge_with_range_above = TRUE; } } if (merge_with_range_below && merge_with_range_above) { /* Here the new chunk looks like M => m, ... Q => q; and * the range above is like R => r, .... Thus, the [i-1] * and [i+1] ranges should be seamlessly melded so the * result looks like * * [i-1] J j # J-T => j-t * [i] U y # U => y, V => y+1, ... * ... * [-1] Z -1 # Z => default; as do Z+1, ... infinity */ Move(t_array + i + 2, t_array + i, len - i - 2, UV); Move(r_map + i + 2, r_map + i, len - i - 2, UV); len -= 2; invlist_set_len(t_invlist, len, *(get_invlist_offset_addr(t_invlist))); } else if (merge_with_range_below) { /* Here the new chunk looks like M => m, .... But either * (or both) it doesn't extend all the way up through Q; or * the range above doesn't start with R => r. */ if (! adjacent_to_range_above) { /* In the first case, let's say the new chunk extends * through O. We then want: * * [i-1] J j # J-O => j-o * [i] P -1 # P => -1, Q => -1 * [i+1] R x # R => x, S => x+1, T => x+2 * [i+2] U y # U => y, V => y+1, ... * ... * [-1] Z -1 # Z => default; as do Z+1, ... * infinity */ t_array[i] = t_cp_end + 1; r_map[i] = TR_UNLISTED; } else { /* Adjoins the range above, but can't merge with it (because 'x' is not the next map after q) */ /* * [i-1] J j # J-Q => j-q * [i] R x # R => x, S => x+1, T => x+2 * [i+1] U y # U => y, V => y+1, ... * ... * [-1] Z -1 # Z => default; as do Z+1, ... * infinity */ Move(t_array + i + 1, t_array + i, len - i - 1, UV); Move(r_map + i + 1, r_map + i, len - i - 1, UV); len--; invlist_set_len(t_invlist, len, *(get_invlist_offset_addr(t_invlist))); } } else if (merge_with_range_above) { /* Here the new chunk ends with Q => q, and the range above * must start with R => r, so the two can be merged. But * either (or both) the new chunk doesn't extend all the * way down to M; or the mapping of the final code point * range below isn't m */ if (! adjacent_to_range_below) { /* In the first case, let's assume the new chunk starts * with P => p. Then, because it's merge-able with the * range above, that range must be R => r. We want: * * [i-1] J j # J-L => j-l * [i] M -1 # M => -1, N => -1 * [i+1] P p # P-T => p-t * [i+2] U y # U => y, V => y+1, ... * ... * [-1] Z -1 # Z => default; as do Z+1, ... * infinity */ t_array[i+1] = t_cp; r_map[i+1] = r_cp; } else { /* Adjoins the range below, but can't merge with it */ /* * [i-1] J j # J-L => j-l * [i] M x # M-T => x-5 .. x+2 * [i+1] U y # U => y, V => y+1, ... * ... * [-1] Z -1 # Z => default; as do Z+1, ... * infinity */ Move(t_array + i + 1, t_array + i, len - i - 1, UV); Move(r_map + i + 1, r_map + i, len - i - 1, UV); len--; t_array[i] = t_cp; r_map[i] = r_cp; invlist_set_len(t_invlist, len, *(get_invlist_offset_addr(t_invlist))); } } else if (adjacent_to_range_below && adjacent_to_range_above) { /* The new chunk completely fills the gap between the * ranges on either side, but can't merge with either of * them. * * [i-1] J j # J-L => j-l * [i] M z # M => z, N => z+1 ... Q => z+4 * [i+1] R x # R => x, S => x+1, T => x+2 * [i+2] U y # U => y, V => y+1, ... * ... * [-1] Z -1 # Z => default; as do Z+1, ... infinity */ r_map[i] = r_cp; } else if (adjacent_to_range_below) { /* The new chunk adjoins the range below, but not the range * above, and can't merge. Let's assume the chunk ends at * O. * * [i-1] J j # J-L => j-l * [i] M z # M => z, N => z+1, O => z+2 * [i+1] P -1 # P => -1, Q => -1 * [i+2] R x # R => x, S => x+1, T => x+2 * [i+3] U y # U => y, V => y+1, ... * ... * [-w] Z -1 # Z => default; as do Z+1, ... infinity */ invlist_extend(t_invlist, len + 1); t_array = invlist_array(t_invlist); Renew(r_map, len + 1, UV); Move(t_array + i + 1, t_array + i + 2, len - i - 1, UV); Move(r_map + i + 1, r_map + i + 2, len - i - 1, UV); r_map[i] = r_cp; t_array[i+1] = t_cp_end + 1; r_map[i+1] = TR_UNLISTED; len++; invlist_set_len(t_invlist, len, *(get_invlist_offset_addr(t_invlist))); } else if (adjacent_to_range_above) { /* The new chunk adjoins the range above, but not the range * below, and can't merge. Let's assume the new chunk * starts at O * * [i-1] J j # J-L => j-l * [i] M -1 # M => default, N => default * [i+1] O z # O => z, P => z+1, Q => z+2 * [i+2] R x # R => x, S => x+1, T => x+2 * [i+3] U y # U => y, V => y+1, ... * ... * [-1] Z -1 # Z => default; as do Z+1, ... infinity */ invlist_extend(t_invlist, len + 1); t_array = invlist_array(t_invlist); Renew(r_map, len + 1, UV); Move(t_array + i + 1, t_array + i + 2, len - i - 1, UV); Move(r_map + i + 1, r_map + i + 2, len - i - 1, UV); t_array[i+1] = t_cp; r_map[i+1] = r_cp; len++; invlist_set_len(t_invlist, len, *(get_invlist_offset_addr(t_invlist))); } else { /* The new chunk adjoins neither the range above, nor the * range below. Lets assume it is N..P => n..p * * [i-1] J j # J-L => j-l * [i] M -1 # M => default * [i+1] N n # N..P => n..p * [i+2] Q -1 # Q => default * [i+3] R x # R => x, S => x+1, T => x+2 * [i+4] U y # U => y, V => y+1, ... * ... * [-1] Z -1 # Z => default; as do Z+1, ... infinity */ invlist_extend(t_invlist, len + 2); t_array = invlist_array(t_invlist); Renew(r_map, len + 2, UV); Move(t_array + i + 1, t_array + i + 2 + 1, len - i - (2 - 1), UV); Move(r_map + i + 1, r_map + i + 2 + 1, len - i - (2 - 1), UV); len += 2; invlist_set_len(t_invlist, len, *(get_invlist_offset_addr(t_invlist))); t_array[i+1] = t_cp; r_map[i+1] = r_cp; t_array[i+2] = t_cp_end + 1; r_map[i+2] = TR_UNLISTED; } } /* End of this chunk needs to be processed */ /* Done with this chunk. */ t_cp += span; if (t_cp >= IV_MAX) { break; } t_range_count -= span; if (r_cp != TR_SPECIAL_HANDLING) { r_cp += span; r_range_count -= span; } else { r_range_count = 0; } } /* End of loop through the search list */ /* We don't need an exact count, but we do need to know if there is * anything left over in the replacement list. So, just assume it's * one byte per character */ if (rend > r) { r_count++; } } /* End of passes */ SvREFCNT_dec(inverted_tstr); /* We now have normalized the input into an inversion map. * * See if the lhs and rhs are equivalent. If so, this tr/// is a no-op * except for the count, and streamlined runtime code can be used */ if (!del && !squash) { /* They are identical if they point to same address, or if everything * maps to UNLISTED or to itself. This catches things that not looking * at the normalized inversion map doesn't catch, like tr/aa/ab/ or * tr/\x{100}-\x{104}/\x{100}-\x{102}\x{103}-\x{104} */ if (r0 != t0) { for (i = 0; i < len; i++) { if (r_map[i] != TR_UNLISTED && r_map[i] != t_array[i]) { goto done_identical_check; } } } /* Here have gone through entire list, and didn't find any * non-identical mappings */ o->op_private |= OPpTRANS_IDENTICAL; done_identical_check: ; } t_array = invlist_array(t_invlist); /* If has components above 255, we generally need to use the inversion map * implementation */ if ( can_force_utf8 || ( len > 0 && t_array[len-1] > 255 /* If the final range is 0x100-INFINITY and is a special * mapping, the table implementation can handle it */ && ! ( t_array[len-1] == 256 && ( r_map[len-1] == TR_UNLISTED || r_map[len-1] == TR_SPECIAL_HANDLING)))) { SV* r_map_sv; /* A UTF-8 op is generated, indicated by this flag. This op is an * sv_op */ o->op_private |= OPpTRANS_USE_SVOP; if (can_force_utf8) { o->op_private |= OPpTRANS_CAN_FORCE_UTF8; } /* The inversion map is pushed; first the list. */ invmap = MUTABLE_AV(newAV()); av_push(invmap, t_invlist); /* 2nd is the mapping */ r_map_sv = newSVpvn((char *) r_map, len * sizeof(UV)); av_push(invmap, r_map_sv); /* 3rd is the max possible expansion factor */ av_push(invmap, newSVnv(max_expansion)); /* Characters that are in the search list, but not in the replacement * list are mapped to the final character in the replacement list */ if (! del && r_count < t_count) { av_push(invmap, newSVuv(final_map)); } #ifdef USE_ITHREADS cPADOPo->op_padix = pad_alloc(OP_TRANS, SVf_READONLY); SvREFCNT_dec(PAD_SVl(cPADOPo->op_padix)); PAD_SETSV(cPADOPo->op_padix, (SV *) invmap); SvPADTMP_on(invmap); SvREADONLY_on(invmap); #else cSVOPo->op_sv = (SV *) invmap; #endif } else { OPtrans_map *tbl; Size_t i; /* The OPtrans_map struct already contains one slot; hence the -1. */ SSize_t struct_size = sizeof(OPtrans_map) + (256 - 1 + 1)*sizeof(short); /* Non-utf8 case: set o->op_pv to point to a simple 256+ entry lookup * table. Entries with the value TR_UNMAPPED indicate chars not to be * translated, while TR_DELETE indicates a search char without a * corresponding replacement char under /d. * * In addition, an extra slot at the end is used to store the final * repeating char, or TR_R_EMPTY under an empty replacement list, or * TR_DELETE under /d; which makes the runtime code easier. */ /* Indicate this is an op_pv */ o->op_private &= ~OPpTRANS_USE_SVOP; tbl = (OPtrans_map*)PerlMemShared_calloc(struct_size, 1); tbl->size = 256; cPVOPo->op_pv = (char*)tbl; for (i = 0; i < len; i++) { STATIC_ASSERT_DECL(TR_SPECIAL_HANDLING == TR_DELETE); short upper = i >= len - 1 ? 256 : t_array[i+1]; short to = r_map[i]; short j; bool do_increment = TRUE; /* Any code points above our limit should be irrelevant */ if (t_array[i] >= tbl->size) break; /* Set up the map */ if (to == (short) TR_SPECIAL_HANDLING && ! del) { to = final_map; do_increment = FALSE; } else if (to < 0) { do_increment = FALSE; } /* Create a map for everything in this range. The value increases * except for the special cases */ for (j = t_array[i]; j < upper; j++) { tbl->map[j] = to; if (do_increment) to++; } } tbl->map[tbl->size] = del ? (short) TR_DELETE : (short) rlen ? (short) final_map : (short) TR_R_EMPTY; SvREFCNT_dec(t_invlist); #if 0 /* code that added excess above-255 chars at the end of the table, in case we ever want to not use the inversion map implementation for this */ ASSUME(j <= rlen); excess = rlen - j; if (excess) { /* More replacement chars than search chars: * store excess replacement chars at end of main table. */ struct_size += excess; tbl = (OPtrans_map*)PerlMemShared_realloc(tbl, struct_size + excess * sizeof(short)); tbl->size += excess; cPVOPo->op_pv = (char*)tbl; for (i = 0; i < excess; i++) tbl->map[i + 256] = r[j+i]; } else { /* no more replacement chars than search chars */ #endif } Safefree(r_map); if(del && rlen != 0 && r_count == t_count) { Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Useless use of /d modifier in transliteration operator"); } else if(r_count > t_count) { Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Replacement list is longer than search list"); } op_free(expr); op_free(repl); return o; } /* =for apidoc newPMOP Constructs, checks, and returns an op of any pattern matching type. C is the opcode. C gives the eight bits of C and, shifted up eight bits, the eight bits of C. =cut */ OP * Perl_newPMOP(pTHX_ I32 type, I32 flags) { dVAR; PMOP *pmop; assert((PL_opargs[type] & OA_CLASS_MASK) == OA_PMOP || type == OP_CUSTOM); NewOp(1101, pmop, 1, PMOP); OpTYPE_set(pmop, type); pmop->op_flags = (U8)flags; pmop->op_private = (U8)(0 | (flags >> 8)); if (PL_opargs[type] & OA_RETSCALAR) scalar((OP *)pmop); if (PL_hints & HINT_RE_TAINT) pmop->op_pmflags |= PMf_RETAINT; #ifdef USE_LOCALE_CTYPE if (IN_LC_COMPILETIME(LC_CTYPE)) { set_regex_charset(&(pmop->op_pmflags), REGEX_LOCALE_CHARSET); } else #endif if (IN_UNI_8_BIT) { set_regex_charset(&(pmop->op_pmflags), REGEX_UNICODE_CHARSET); } if (PL_hints & HINT_RE_FLAGS) { SV *reflags = Perl_refcounted_he_fetch_pvn(aTHX_ PL_compiling.cop_hints_hash, STR_WITH_LEN("reflags"), 0, 0 ); if (reflags && SvOK(reflags)) pmop->op_pmflags |= SvIV(reflags); reflags = Perl_refcounted_he_fetch_pvn(aTHX_ PL_compiling.cop_hints_hash, STR_WITH_LEN("reflags_charset"), 0, 0 ); if (reflags && SvOK(reflags)) { set_regex_charset(&(pmop->op_pmflags), (regex_charset)SvIV(reflags)); } } #ifdef USE_ITHREADS assert(SvPOK(PL_regex_pad[0])); if (SvCUR(PL_regex_pad[0])) { /* Pop off the "packed" IV from the end. */ SV *const repointer_list = PL_regex_pad[0]; const char *p = SvEND(repointer_list) - sizeof(IV); const IV offset = *((IV*)p); assert(SvCUR(repointer_list) % sizeof(IV) == 0); SvEND_set(repointer_list, p); pmop->op_pmoffset = offset; /* This slot should be free, so assert this: */ assert(PL_regex_pad[offset] == &PL_sv_undef); } else { SV * const repointer = &PL_sv_undef; av_push(PL_regex_padav, repointer); pmop->op_pmoffset = av_tindex(PL_regex_padav); PL_regex_pad = AvARRAY(PL_regex_padav); } #endif return CHECKOP(type, pmop); } static void S_set_haseval(pTHX) { PADOFFSET i = 1; PL_cv_has_eval = 1; /* Any pad names in scope are potentially lvalues. */ for (; i < PadnamelistMAXNAMED(PL_comppad_name); i++) { PADNAME *pn = PAD_COMPNAME_SV(i); if (!pn || !PadnameLEN(pn)) continue; if (PadnameOUTER(pn) || PadnameIN_SCOPE(pn, PL_cop_seqmax)) S_mark_padname_lvalue(aTHX_ pn); } } /* Given some sort of match op o, and an expression expr containing a * pattern, either compile expr into a regex and attach it to o (if it's * constant), or convert expr into a runtime regcomp op sequence (if it's * not) * * Flags currently has 2 bits of meaning: * 1: isreg indicates that the pattern is part of a regex construct, eg * $x =~ /pattern/ or split /pattern/, as opposed to $x =~ $pattern or * split "pattern", which aren't. In the former case, expr will be a list * if the pattern contains more than one term (eg /a$b/). * 2: The pattern is for a split. * * When the pattern has been compiled within a new anon CV (for * qr/(?{...})/ ), then floor indicates the savestack level just before * the new sub was created * * tr/// is also handled. */ OP * Perl_pmruntime(pTHX_ OP *o, OP *expr, OP *repl, UV flags, I32 floor) { PMOP *pm; LOGOP *rcop; I32 repl_has_vars = 0; bool is_trans = (o->op_type == OP_TRANS || o->op_type == OP_TRANSR); bool is_compiletime; bool has_code; bool isreg = cBOOL(flags & 1); bool is_split = cBOOL(flags & 2); PERL_ARGS_ASSERT_PMRUNTIME; if (is_trans) { return pmtrans(o, expr, repl); } /* find whether we have any runtime or code elements; * at the same time, temporarily set the op_next of each DO block; * then when we LINKLIST, this will cause the DO blocks to be excluded * from the op_next chain (and from having LINKLIST recursively * applied to them). We fix up the DOs specially later */ is_compiletime = 1; has_code = 0; if (expr->op_type == OP_LIST) { OP *o; for (o = cLISTOPx(expr)->op_first; o; o = OpSIBLING(o)) { if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL)) { has_code = 1; assert(!o->op_next); if (UNLIKELY(!OpHAS_SIBLING(o))) { assert(PL_parser && PL_parser->error_count); /* This can happen with qr/ (?{(^{})/. Just fake up the op we were expecting to see, to avoid crashing elsewhere. */ op_sibling_splice(expr, o, 0, newSVOP(OP_CONST, 0, &PL_sv_no)); } o->op_next = OpSIBLING(o); } else if (o->op_type != OP_CONST && o->op_type != OP_PUSHMARK) is_compiletime = 0; } } else if (expr->op_type != OP_CONST) is_compiletime = 0; LINKLIST(expr); /* fix up DO blocks; treat each one as a separate little sub; * also, mark any arrays as LIST/REF */ if (expr->op_type == OP_LIST) { OP *o; for (o = cLISTOPx(expr)->op_first; o; o = OpSIBLING(o)) { if (o->op_type == OP_PADAV || o->op_type == OP_RV2AV) { assert( !(o->op_flags & OPf_WANT)); /* push the array rather than its contents. The regex * engine will retrieve and join the elements later */ o->op_flags |= (OPf_WANT_LIST | OPf_REF); continue; } if (!(o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL))) continue; o->op_next = NULL; /* undo temporary hack from above */ scalar(o); LINKLIST(o); if (cLISTOPo->op_first->op_type == OP_LEAVE) { LISTOP *leaveop = cLISTOPx(cLISTOPo->op_first); /* skip ENTER */ assert(leaveop->op_first->op_type == OP_ENTER); assert(OpHAS_SIBLING(leaveop->op_first)); o->op_next = OpSIBLING(leaveop->op_first); /* skip leave */ assert(leaveop->op_flags & OPf_KIDS); assert(leaveop->op_last->op_next == (OP*)leaveop); leaveop->op_next = NULL; /* stop on last op */ op_null((OP*)leaveop); } else { /* skip SCOPE */ OP *scope = cLISTOPo->op_first; assert(scope->op_type == OP_SCOPE); assert(scope->op_flags & OPf_KIDS); scope->op_next = NULL; /* stop on last op */ op_null(scope); } /* XXX optimize_optree() must be called on o before * CALL_PEEP(), as currently S_maybe_multiconcat() can't * currently cope with a peephole-optimised optree. * Calling optimize_optree() here ensures that condition * is met, but may mean optimize_optree() is applied * to the same optree later (where hopefully it won't do any * harm as it can't convert an op to multiconcat if it's * already been converted */ optimize_optree(o); /* have to peep the DOs individually as we've removed it from * the op_next chain */ CALL_PEEP(o); S_prune_chain_head(&(o->op_next)); if (is_compiletime) /* runtime finalizes as part of finalizing whole tree */ finalize_optree(o); } } else if (expr->op_type == OP_PADAV || expr->op_type == OP_RV2AV) { assert( !(expr->op_flags & OPf_WANT)); /* push the array rather than its contents. The regex * engine will retrieve and join the elements later */ expr->op_flags |= (OPf_WANT_LIST | OPf_REF); } PL_hints |= HINT_BLOCK_SCOPE; pm = (PMOP*)o; assert(floor==0 || (pm->op_pmflags & PMf_HAS_CV)); if (is_compiletime) { U32 rx_flags = pm->op_pmflags & RXf_PMf_COMPILETIME; regexp_engine const *eng = current_re_engine(); if (is_split) { /* make engine handle split ' ' specially */ pm->op_pmflags |= PMf_SPLIT; rx_flags |= RXf_SPLIT; } if (!has_code || !eng->op_comp) { /* compile-time simple constant pattern */ if ((pm->op_pmflags & PMf_HAS_CV) && !has_code) { /* whoops! we guessed that a qr// had a code block, but we * were wrong (e.g. /[(?{}]/ ). Throw away the PL_compcv * that isn't required now. Note that we have to be pretty * confident that nothing used that CV's pad while the * regex was parsed, except maybe op targets for \Q etc. * If there were any op targets, though, they should have * been stolen by constant folding. */ #ifdef DEBUGGING SSize_t i = 0; assert(PadnamelistMAXNAMED(PL_comppad_name) == 0); while (++i <= AvFILLp(PL_comppad)) { # ifdef USE_PAD_RESET /* under USE_PAD_RESET, pad swipe replaces a swiped * folded constant with a fresh padtmp */ assert(!PL_curpad[i] || SvPADTMP(PL_curpad[i])); # else assert(!PL_curpad[i]); # endif } #endif /* This LEAVE_SCOPE will restore PL_compcv to point to the * outer CV (the one whose slab holds the pm op). The * inner CV (which holds expr) will be freed later, once * all the entries on the parse stack have been popped on * return from this function. Which is why its safe to * call op_free(expr) below. */ LEAVE_SCOPE(floor); pm->op_pmflags &= ~PMf_HAS_CV; } /* Skip compiling if parser found an error for this pattern */ if (pm->op_pmflags & PMf_HAS_ERROR) { return o; } PM_SETRE(pm, eng->op_comp ? eng->op_comp(aTHX_ NULL, 0, expr, eng, NULL, NULL, rx_flags, pm->op_pmflags) : Perl_re_op_compile(aTHX_ NULL, 0, expr, eng, NULL, NULL, rx_flags, pm->op_pmflags) ); op_free(expr); } else { /* compile-time pattern that includes literal code blocks */ REGEXP* re; /* Skip compiling if parser found an error for this pattern */ if (pm->op_pmflags & PMf_HAS_ERROR) { return o; } re = eng->op_comp(aTHX_ NULL, 0, expr, eng, NULL, NULL, rx_flags, (pm->op_pmflags | ((PL_hints & HINT_RE_EVAL) ? PMf_USE_RE_EVAL : 0)) ); PM_SETRE(pm, re); if (pm->op_pmflags & PMf_HAS_CV) { CV *cv; /* this QR op (and the anon sub we embed it in) is never * actually executed. It's just a placeholder where we can * squirrel away expr in op_code_list without the peephole * optimiser etc processing it for a second time */ OP *qr = newPMOP(OP_QR, 0); ((PMOP*)qr)->op_code_list = expr; /* handle the implicit sub{} wrapped round the qr/(?{..})/ */ SvREFCNT_inc_simple_void(PL_compcv); cv = newATTRSUB(floor, 0, NULL, NULL, qr); ReANY(re)->qr_anoncv = cv; /* attach the anon CV to the pad so that * pad_fixup_inner_anons() can find it */ (void)pad_add_anon(cv, o->op_type); SvREFCNT_inc_simple_void(cv); } else { pm->op_code_list = expr; } } } else { /* runtime pattern: build chain of regcomp etc ops */ bool reglist; PADOFFSET cv_targ = 0; reglist = isreg && expr->op_type == OP_LIST; if (reglist) op_null(expr); if (has_code) { pm->op_code_list = expr; /* don't free op_code_list; its ops are embedded elsewhere too */ pm->op_pmflags |= PMf_CODELIST_PRIVATE; } if (is_split) /* make engine handle split ' ' specially */ pm->op_pmflags |= PMf_SPLIT; /* the OP_REGCMAYBE is a placeholder in the non-threaded case * to allow its op_next to be pointed past the regcomp and * preceding stacking ops; * OP_REGCRESET is there to reset taint before executing the * stacking ops */ if (pm->op_pmflags & PMf_KEEP || TAINTING_get) expr = newUNOP((TAINTING_get ? OP_REGCRESET : OP_REGCMAYBE),0,expr); if (pm->op_pmflags & PMf_HAS_CV) { /* we have a runtime qr with literal code. This means * that the qr// has been wrapped in a new CV, which * means that runtime consts, vars etc will have been compiled * against a new pad. So... we need to execute those ops * within the environment of the new CV. So wrap them in a call * to a new anon sub. i.e. for * * qr/a$b(?{...})/, * * we build an anon sub that looks like * * sub { "a", $b, '(?{...})' } * * and call it, passing the returned list to regcomp. * Or to put it another way, the list of ops that get executed * are: * * normal PMf_HAS_CV * ------ ------------------- * pushmark (for regcomp) * pushmark (for entersub) * anoncode * srefgen * entersub * regcreset regcreset * pushmark pushmark * const("a") const("a") * gvsv(b) gvsv(b) * const("(?{...})") const("(?{...})") * leavesub * regcomp regcomp */ SvREFCNT_inc_simple_void(PL_compcv); CvLVALUE_on(PL_compcv); /* these lines are just an unrolled newANONATTRSUB */ expr = newSVOP(OP_ANONCODE, 0, MUTABLE_SV(newATTRSUB(floor, 0, NULL, NULL, expr))); cv_targ = expr->op_targ; expr = newUNOP(OP_REFGEN, 0, expr); expr = list(force_list(newUNOP(OP_ENTERSUB, 0, scalar(expr)), 1)); } rcop = alloc_LOGOP(OP_REGCOMP, scalar(expr), o); rcop->op_flags |= ((PL_hints & HINT_RE_EVAL) ? OPf_SPECIAL : 0) | (reglist ? OPf_STACKED : 0); rcop->op_targ = cv_targ; /* /$x/ may cause an eval, since $x might be qr/(?{..})/ */ if (PL_hints & HINT_RE_EVAL) S_set_haseval(aTHX); /* establish postfix order */ if (expr->op_type == OP_REGCRESET || expr->op_type == OP_REGCMAYBE) { LINKLIST(expr); rcop->op_next = expr; ((UNOP*)expr)->op_first->op_next = (OP*)rcop; } else { rcop->op_next = LINKLIST(expr); expr->op_next = (OP*)rcop; } op_prepend_elem(o->op_type, scalar((OP*)rcop), o); } if (repl) { OP *curop = repl; bool konst; /* If we are looking at s//.../e with a single statement, get past the implicit do{}. */ if (curop->op_type == OP_NULL && curop->op_flags & OPf_KIDS && cUNOPx(curop)->op_first->op_type == OP_SCOPE && cUNOPx(curop)->op_first->op_flags & OPf_KIDS) { OP *sib; OP *kid = cUNOPx(cUNOPx(curop)->op_first)->op_first; if (kid->op_type == OP_NULL && (sib = OpSIBLING(kid)) && !OpHAS_SIBLING(sib)) curop = sib; } if (curop->op_type == OP_CONST) konst = TRUE; else if (( (curop->op_type == OP_RV2SV || curop->op_type == OP_RV2AV || curop->op_type == OP_RV2HV || curop->op_type == OP_RV2GV) && cUNOPx(curop)->op_first && cUNOPx(curop)->op_first->op_type == OP_GV ) || curop->op_type == OP_PADSV || curop->op_type == OP_PADAV || curop->op_type == OP_PADHV || curop->op_type == OP_PADANY) { repl_has_vars = 1; konst = TRUE; } else konst = FALSE; if (konst && !(repl_has_vars && (!PM_GETRE(pm) || !RX_PRELEN(PM_GETRE(pm)) || RX_EXTFLAGS(PM_GETRE(pm)) & RXf_EVAL_SEEN))) { pm->op_pmflags |= PMf_CONST; /* const for long enough */ op_prepend_elem(o->op_type, scalar(repl), o); } else { rcop = alloc_LOGOP(OP_SUBSTCONT, scalar(repl), o); rcop->op_private = 1; /* establish postfix order */ rcop->op_next = LINKLIST(repl); repl->op_next = (OP*)rcop; pm->op_pmreplrootu.op_pmreplroot = scalar((OP*)rcop); assert(!(pm->op_pmflags & PMf_ONCE)); pm->op_pmstashstartu.op_pmreplstart = LINKLIST(rcop); rcop->op_next = 0; } } return (OP*)pm; } /* =for apidoc newSVOP Constructs, checks, and returns an op of any type that involves an embedded SV. C is the opcode. C gives the eight bits of C. C gives the SV to embed in the op; this function takes ownership of one reference to it. =cut */ OP * Perl_newSVOP(pTHX_ I32 type, I32 flags, SV *sv) { dVAR; SVOP *svop; PERL_ARGS_ASSERT_NEWSVOP; assert((PL_opargs[type] & OA_CLASS_MASK) == OA_SVOP || (PL_opargs[type] & OA_CLASS_MASK) == OA_PVOP_OR_SVOP || (PL_opargs[type] & OA_CLASS_MASK) == OA_FILESTATOP || type == OP_CUSTOM); NewOp(1101, svop, 1, SVOP); OpTYPE_set(svop, type); svop->op_sv = sv; svop->op_next = (OP*)svop; svop->op_flags = (U8)flags; svop->op_private = (U8)(0 | (flags >> 8)); if (PL_opargs[type] & OA_RETSCALAR) scalar((OP*)svop); if (PL_opargs[type] & OA_TARGET) svop->op_targ = pad_alloc(type, SVs_PADTMP); return CHECKOP(type, svop); } /* =for apidoc newDEFSVOP Constructs and returns an op to access C<$_>. =cut */ OP * Perl_newDEFSVOP(pTHX) { return newSVREF(newGVOP(OP_GV, 0, PL_defgv)); } #ifdef USE_ITHREADS /* =for apidoc newPADOP Constructs, checks, and returns an op of any type that involves a reference to a pad element. C is the opcode. C gives the eight bits of C. A pad slot is automatically allocated, and is populated with C; this function takes ownership of one reference to it. This function only exists if Perl has been compiled to use ithreads. =cut */ OP * Perl_newPADOP(pTHX_ I32 type, I32 flags, SV *sv) { dVAR; PADOP *padop; PERL_ARGS_ASSERT_NEWPADOP; assert((PL_opargs[type] & OA_CLASS_MASK) == OA_SVOP || (PL_opargs[type] & OA_CLASS_MASK) == OA_PVOP_OR_SVOP || (PL_opargs[type] & OA_CLASS_MASK) == OA_FILESTATOP || type == OP_CUSTOM); NewOp(1101, padop, 1, PADOP); OpTYPE_set(padop, type); padop->op_padix = pad_alloc(type, isGV(sv) ? SVf_READONLY : SVs_PADTMP); SvREFCNT_dec(PAD_SVl(padop->op_padix)); PAD_SETSV(padop->op_padix, sv); assert(sv); padop->op_next = (OP*)padop; padop->op_flags = (U8)flags; if (PL_opargs[type] & OA_RETSCALAR) scalar((OP*)padop); if (PL_opargs[type] & OA_TARGET) padop->op_targ = pad_alloc(type, SVs_PADTMP); return CHECKOP(type, padop); } #endif /* USE_ITHREADS */ /* =for apidoc newGVOP Constructs, checks, and returns an op of any type that involves an embedded reference to a GV. C is the opcode. C gives the eight bits of C. C identifies the GV that the op should reference; calling this function does not transfer ownership of any reference to it. =cut */ OP * Perl_newGVOP(pTHX_ I32 type, I32 flags, GV *gv) { PERL_ARGS_ASSERT_NEWGVOP; #ifdef USE_ITHREADS return newPADOP(type, flags, SvREFCNT_inc_simple_NN(gv)); #else return newSVOP(type, flags, SvREFCNT_inc_simple_NN(gv)); #endif } /* =for apidoc newPVOP Constructs, checks, and returns an op of any type that involves an embedded C-level pointer (PV). C is the opcode. C gives the eight bits of C. C supplies the C-level pointer. Depending on the op type, the memory referenced by C may be freed when the op is destroyed. If the op is of a freeing type, C must have been allocated using C. =cut */ OP * Perl_newPVOP(pTHX_ I32 type, I32 flags, char *pv) { dVAR; const bool utf8 = cBOOL(flags & SVf_UTF8); PVOP *pvop; flags &= ~SVf_UTF8; assert((PL_opargs[type] & OA_CLASS_MASK) == OA_PVOP_OR_SVOP || type == OP_RUNCV || type == OP_CUSTOM || (PL_opargs[type] & OA_CLASS_MASK) == OA_LOOPEXOP); NewOp(1101, pvop, 1, PVOP); OpTYPE_set(pvop, type); pvop->op_pv = pv; pvop->op_next = (OP*)pvop; pvop->op_flags = (U8)flags; pvop->op_private = utf8 ? OPpPV_IS_UTF8 : 0; if (PL_opargs[type] & OA_RETSCALAR) scalar((OP*)pvop); if (PL_opargs[type] & OA_TARGET) pvop->op_targ = pad_alloc(type, SVs_PADTMP); return CHECKOP(type, pvop); } void Perl_package(pTHX_ OP *o) { SV *const sv = cSVOPo->op_sv; PERL_ARGS_ASSERT_PACKAGE; SAVEGENERICSV(PL_curstash); save_item(PL_curstname); PL_curstash = (HV *)SvREFCNT_inc(gv_stashsv(sv, GV_ADD)); sv_setsv(PL_curstname, sv); PL_hints |= HINT_BLOCK_SCOPE; PL_parser->copline = NOLINE; op_free(o); } void Perl_package_version( pTHX_ OP *v ) { U32 savehints = PL_hints; PERL_ARGS_ASSERT_PACKAGE_VERSION; PL_hints &= ~HINT_STRICT_VARS; sv_setsv( GvSV(gv_fetchpvs("VERSION", GV_ADDMULTI, SVt_PV)), cSVOPx(v)->op_sv ); PL_hints = savehints; op_free(v); } void Perl_utilize(pTHX_ int aver, I32 floor, OP *version, OP *idop, OP *arg) { OP *pack; OP *imop; OP *veop; SV *use_version = NULL; PERL_ARGS_ASSERT_UTILIZE; if (idop->op_type != OP_CONST) Perl_croak(aTHX_ "Module name must be constant"); veop = NULL; if (version) { SV * const vesv = ((SVOP*)version)->op_sv; if (!arg && !SvNIOKp(vesv)) { arg = version; } else { OP *pack; SV *meth; if (version->op_type != OP_CONST || !SvNIOKp(vesv)) Perl_croak(aTHX_ "Version number must be a constant number"); /* Make copy of idop so we don't free it twice */ pack = newSVOP(OP_CONST, 0, newSVsv(((SVOP*)idop)->op_sv)); /* Fake up a method call to VERSION */ meth = newSVpvs_share("VERSION"); veop = op_convert_list(OP_ENTERSUB, OPf_STACKED|OPf_SPECIAL, op_append_elem(OP_LIST, op_prepend_elem(OP_LIST, pack, version), newMETHOP_named(OP_METHOD_NAMED, 0, meth))); } } /* Fake up an import/unimport */ if (arg && arg->op_type == OP_STUB) { imop = arg; /* no import on explicit () */ } else if (SvNIOKp(((SVOP*)idop)->op_sv)) { imop = NULL; /* use 5.0; */ if (aver) use_version = ((SVOP*)idop)->op_sv; else idop->op_private |= OPpCONST_NOVER; } else { SV *meth; /* Make copy of idop so we don't free it twice */ pack = newSVOP(OP_CONST, 0, newSVsv(((SVOP*)idop)->op_sv)); /* Fake up a method call to import/unimport */ meth = aver ? newSVpvs_share("import") : newSVpvs_share("unimport"); imop = op_convert_list(OP_ENTERSUB, OPf_STACKED|OPf_SPECIAL, op_append_elem(OP_LIST, op_prepend_elem(OP_LIST, pack, arg), newMETHOP_named(OP_METHOD_NAMED, 0, meth) )); } /* Fake up the BEGIN {}, which does its thing immediately. */ newATTRSUB(floor, newSVOP(OP_CONST, 0, newSVpvs_share("BEGIN")), NULL, NULL, op_append_elem(OP_LINESEQ, op_append_elem(OP_LINESEQ, newSTATEOP(0, NULL, newUNOP(OP_REQUIRE, 0, idop)), newSTATEOP(0, NULL, veop)), newSTATEOP(0, NULL, imop) )); if (use_version) { /* Enable the * feature bundle that corresponds to the required version. */ use_version = sv_2mortal(new_version(use_version)); S_enable_feature_bundle(aTHX_ use_version); /* If a version >= 5.11.0 is requested, strictures are on by default! */ if (vcmp(use_version, sv_2mortal(upg_version(newSVnv(5.011000), FALSE))) >= 0) { if (!(PL_hints & HINT_EXPLICIT_STRICT_REFS)) PL_hints |= HINT_STRICT_REFS; if (!(PL_hints & HINT_EXPLICIT_STRICT_SUBS)) PL_hints |= HINT_STRICT_SUBS; if (!(PL_hints & HINT_EXPLICIT_STRICT_VARS)) PL_hints |= HINT_STRICT_VARS; } /* otherwise they are off */ else { if (!(PL_hints & HINT_EXPLICIT_STRICT_REFS)) PL_hints &= ~HINT_STRICT_REFS; if (!(PL_hints & HINT_EXPLICIT_STRICT_SUBS)) PL_hints &= ~HINT_STRICT_SUBS; if (!(PL_hints & HINT_EXPLICIT_STRICT_VARS)) PL_hints &= ~HINT_STRICT_VARS; } } /* The "did you use incorrect case?" warning used to be here. * The problem is that on case-insensitive filesystems one * might get false positives for "use" (and "require"): * "use Strict" or "require CARP" will work. This causes * portability problems for the script: in case-strict * filesystems the script will stop working. * * The "incorrect case" warning checked whether "use Foo" * imported "Foo" to your namespace, but that is wrong, too: * there is no requirement nor promise in the language that * a Foo.pm should or would contain anything in package "Foo". * * There is very little Configure-wise that can be done, either: * the case-sensitivity of the build filesystem of Perl does not * help in guessing the case-sensitivity of the runtime environment. */ PL_hints |= HINT_BLOCK_SCOPE; PL_parser->copline = NOLINE; COP_SEQMAX_INC; /* Purely for B::*'s benefit */ } /* =head1 Embedding Functions =for apidoc load_module Loads the module whose name is pointed to by the string part of C. Note that the actual module name, not its filename, should be given. Eg, "Foo::Bar" instead of "Foo/Bar.pm". ver, if specified and not NULL, provides version semantics similar to C. The optional trailing arguments can be used to specify arguments to the module's C method, similar to C; their precise handling depends on the flags. The flags argument is a bitwise-ORed collection of any of C, C, or C (or 0 for no flags). If C is set, the module is loaded as if with an empty import list, as in C; this is the only circumstance in which the trailing optional arguments may be omitted entirely. Otherwise, if C is set, the trailing arguments must consist of exactly one C, containing the op tree that produces the relevant import arguments. Otherwise, the trailing arguments must all be C values that will be used as import arguments; and the list must be terminated with C<(SV*) NULL>. If neither C nor C is set, the trailing C pointer is needed even if no import arguments are desired. The reference count for each specified C argument is decremented. In addition, the C argument is modified. If C is set, the module is loaded as if with C rather than C. =cut */ void Perl_load_module(pTHX_ U32 flags, SV *name, SV *ver, ...) { va_list args; PERL_ARGS_ASSERT_LOAD_MODULE; va_start(args, ver); vload_module(flags, name, ver, &args); va_end(args); } #ifdef PERL_IMPLICIT_CONTEXT void Perl_load_module_nocontext(U32 flags, SV *name, SV *ver, ...) { dTHX; va_list args; PERL_ARGS_ASSERT_LOAD_MODULE_NOCONTEXT; va_start(args, ver); vload_module(flags, name, ver, &args); va_end(args); } #endif void Perl_vload_module(pTHX_ U32 flags, SV *name, SV *ver, va_list *args) { OP *veop, *imop; OP * modname; I32 floor; PERL_ARGS_ASSERT_VLOAD_MODULE; /* utilize() fakes up a BEGIN { require ..; import ... }, so make sure * that it has a PL_parser to play with while doing that, and also * that it doesn't mess with any existing parser, by creating a tmp * new parser with lex_start(). This won't actually be used for much, * since pp_require() will create another parser for the real work. * The ENTER/LEAVE pair protect callers from any side effects of use. * * start_subparse() creates a new PL_compcv. This means that any ops * allocated below will be allocated from that CV's op slab, and so * will be automatically freed if the utilise() fails */ ENTER; SAVEVPTR(PL_curcop); lex_start(NULL, NULL, LEX_START_SAME_FILTER); floor = start_subparse(FALSE, 0); modname = newSVOP(OP_CONST, 0, name); modname->op_private |= OPpCONST_BARE; if (ver) { veop = newSVOP(OP_CONST, 0, ver); } else veop = NULL; if (flags & PERL_LOADMOD_NOIMPORT) { imop = sawparens(newNULLLIST()); } else if (flags & PERL_LOADMOD_IMPORT_OPS) { imop = va_arg(*args, OP*); } else { SV *sv; imop = NULL; sv = va_arg(*args, SV*); while (sv) { imop = op_append_elem(OP_LIST, imop, newSVOP(OP_CONST, 0, sv)); sv = va_arg(*args, SV*); } } utilize(!(flags & PERL_LOADMOD_DENY), floor, veop, modname, imop); LEAVE; } PERL_STATIC_INLINE OP * S_new_entersubop(pTHX_ GV *gv, OP *arg) { return newUNOP(OP_ENTERSUB, OPf_STACKED, newLISTOP(OP_LIST, 0, arg, newUNOP(OP_RV2CV, 0, newGVOP(OP_GV, 0, gv)))); } OP * Perl_dofile(pTHX_ OP *term, I32 force_builtin) { OP *doop; GV *gv; PERL_ARGS_ASSERT_DOFILE; if (!force_builtin && (gv = gv_override("do", 2))) { doop = S_new_entersubop(aTHX_ gv, term); } else { doop = newUNOP(OP_DOFILE, 0, scalar(term)); } return doop; } /* =head1 Optree construction =for apidoc newSLICEOP Constructs, checks, and returns an C (list slice) op. C gives the eight bits of C, except that C will be set automatically, and, shifted up eight bits, the eight bits of C, except that the bit with value 1 or 2 is automatically set as required. C and C supply the parameters of the slice; they are consumed by this function and become part of the constructed op tree. =cut */ OP * Perl_newSLICEOP(pTHX_ I32 flags, OP *subscript, OP *listval) { return newBINOP(OP_LSLICE, flags, list(force_list(subscript, 1)), list(force_list(listval, 1)) ); } #define ASSIGN_SCALAR 0 #define ASSIGN_LIST 1 #define ASSIGN_REF 2 /* given the optree o on the LHS of an assignment, determine whether its: * ASSIGN_SCALAR $x = ... * ASSIGN_LIST ($x) = ... * ASSIGN_REF \$x = ... */ STATIC I32 S_assignment_type(pTHX_ const OP *o) { unsigned type; U8 flags; U8 ret; if (!o) return ASSIGN_LIST; if (o->op_type == OP_SREFGEN) { OP * const kid = cUNOPx(cUNOPo->op_first)->op_first; type = kid->op_type; flags = o->op_flags | kid->op_flags; if (!(flags & OPf_PARENS) && (kid->op_type == OP_RV2AV || kid->op_type == OP_PADAV || kid->op_type == OP_RV2HV || kid->op_type == OP_PADHV )) return ASSIGN_REF; ret = ASSIGN_REF; } else { if ((o->op_type == OP_NULL) && (o->op_flags & OPf_KIDS)) o = cUNOPo->op_first; flags = o->op_flags; type = o->op_type; ret = ASSIGN_SCALAR; } if (type == OP_COND_EXPR) { OP * const sib = OpSIBLING(cLOGOPo->op_first); const I32 t = assignment_type(sib); const I32 f = assignment_type(OpSIBLING(sib)); if (t == ASSIGN_LIST && f == ASSIGN_LIST) return ASSIGN_LIST; if ((t == ASSIGN_LIST) ^ (f == ASSIGN_LIST)) yyerror("Assignment to both a list and a scalar"); return ASSIGN_SCALAR; } if (type == OP_LIST && (flags & OPf_WANT) == OPf_WANT_SCALAR && o->op_private & OPpLVAL_INTRO) return ret; if (type == OP_LIST || flags & OPf_PARENS || type == OP_RV2AV || type == OP_RV2HV || type == OP_ASLICE || type == OP_HSLICE || type == OP_KVASLICE || type == OP_KVHSLICE || type == OP_REFGEN) return ASSIGN_LIST; if (type == OP_PADAV || type == OP_PADHV) return ASSIGN_LIST; if (type == OP_RV2SV) return ret; return ret; } static OP * S_newONCEOP(pTHX_ OP *initop, OP *padop) { dVAR; const PADOFFSET target = padop->op_targ; OP *const other = newOP(OP_PADSV, padop->op_flags | ((padop->op_private & ~OPpLVAL_INTRO) << 8)); OP *const first = newOP(OP_NULL, 0); OP *const nullop = newCONDOP(0, first, initop, other); /* XXX targlex disabled for now; see ticket #124160 newCONDOP(0, first, S_maybe_targlex(aTHX_ initop), other); */ OP *const condop = first->op_next; OpTYPE_set(condop, OP_ONCE); other->op_targ = target; nullop->op_flags |= OPf_WANT_SCALAR; /* Store the initializedness of state vars in a separate pad entry. */ condop->op_targ = pad_add_name_pvn("$",1,padadd_NO_DUP_CHECK|padadd_STATE,0,0); /* hijacking PADSTALE for uninitialized state variables */ SvPADSTALE_on(PAD_SVl(condop->op_targ)); return nullop; } /* =for apidoc newASSIGNOP Constructs, checks, and returns an assignment op. C and C supply the parameters of the assignment; they are consumed by this function and become part of the constructed op tree. If C is C, C, or C, then a suitable conditional optree is constructed. If C is the opcode of a binary operator, such as C, then an op is constructed that performs the binary operation and assigns the result to the left argument. Either way, if C is non-zero then C has no effect. If C is zero, then a plain scalar or list assignment is constructed. Which type of assignment it is is automatically determined. C gives the eight bits of C, except that C will be set automatically, and, shifted up eight bits, the eight bits of C, except that the bit with value 1 or 2 is automatically set as required. =cut */ OP * Perl_newASSIGNOP(pTHX_ I32 flags, OP *left, I32 optype, OP *right) { OP *o; I32 assign_type; if (optype) { if (optype == OP_ANDASSIGN || optype == OP_ORASSIGN || optype == OP_DORASSIGN) { right = scalar(right); return newLOGOP(optype, 0, op_lvalue(scalar(left), optype), newBINOP(OP_SASSIGN, OPpASSIGN_BACKWARDS<<8, right, right)); } else { return newBINOP(optype, OPf_STACKED, op_lvalue(scalar(left), optype), scalar(right)); } } if ((assign_type = assignment_type(left)) == ASSIGN_LIST) { OP *state_var_op = NULL; static const char no_list_state[] = "Initialization of state variables" " in list currently forbidden"; OP *curop; if (left->op_type == OP_ASLICE || left->op_type == OP_HSLICE) left->op_private &= ~ OPpSLICEWARNING; PL_modcount = 0; left = op_lvalue(left, OP_AASSIGN); curop = list(force_list(left, 1)); o = newBINOP(OP_AASSIGN, flags, list(force_list(right, 1)), curop); o->op_private = (U8)(0 | (flags >> 8)); if (OP_TYPE_IS_OR_WAS(left, OP_LIST)) { OP *lop = ((LISTOP*)left)->op_first, *vop, *eop; if (!(left->op_flags & OPf_PARENS) && lop->op_type == OP_PUSHMARK && (vop = OpSIBLING(lop)) && (vop->op_type == OP_PADAV || vop->op_type == OP_PADHV) && !(vop->op_flags & OPf_PARENS) && (vop->op_private & (OPpLVAL_INTRO|OPpPAD_STATE)) == (OPpLVAL_INTRO|OPpPAD_STATE) && (eop = OpSIBLING(vop)) && eop->op_type == OP_ENTERSUB && !OpHAS_SIBLING(eop)) { state_var_op = vop; } else { while (lop) { if ((lop->op_type == OP_PADSV || lop->op_type == OP_PADAV || lop->op_type == OP_PADHV || lop->op_type == OP_PADANY) && (lop->op_private & OPpPAD_STATE) ) yyerror(no_list_state); lop = OpSIBLING(lop); } } } else if ( (left->op_private & OPpLVAL_INTRO) && (left->op_private & OPpPAD_STATE) && ( left->op_type == OP_PADSV || left->op_type == OP_PADAV || left->op_type == OP_PADHV || left->op_type == OP_PADANY) ) { /* All single variable list context state assignments, hence state ($a) = ... (state $a) = ... state @a = ... state (@a) = ... (state @a) = ... state %a = ... state (%a) = ... (state %a) = ... */ if (left->op_flags & OPf_PARENS) yyerror(no_list_state); else state_var_op = left; } /* optimise @a = split(...) into: * @{expr}: split(..., @{expr}) (where @a is not flattened) * @a, my @a, local @a: split(...) (where @a is attached to * the split op itself) */ if ( right && right->op_type == OP_SPLIT /* don't do twice, e.g. @b = (@a = split) */ && !(right->op_private & OPpSPLIT_ASSIGN)) { OP *gvop = NULL; if ( ( left->op_type == OP_RV2AV && (gvop=((UNOP*)left)->op_first)->op_type==OP_GV) || left->op_type == OP_PADAV) { /* @pkg or @lex or local @pkg' or 'my @lex' */ OP *tmpop; if (gvop) { #ifdef USE_ITHREADS ((PMOP*)right)->op_pmreplrootu.op_pmtargetoff = cPADOPx(gvop)->op_padix; cPADOPx(gvop)->op_padix = 0; /* steal it */ #else ((PMOP*)right)->op_pmreplrootu.op_pmtargetgv = MUTABLE_GV(cSVOPx(gvop)->op_sv); cSVOPx(gvop)->op_sv = NULL; /* steal it */ #endif right->op_private |= left->op_private & OPpOUR_INTRO; } else { ((PMOP*)right)->op_pmreplrootu.op_pmtargetoff = left->op_targ; left->op_targ = 0; /* steal it */ right->op_private |= OPpSPLIT_LEX; } right->op_private |= left->op_private & OPpLVAL_INTRO; detach_split: tmpop = cUNOPo->op_first; /* to list (nulled) */ tmpop = ((UNOP*)tmpop)->op_first; /* to pushmark */ assert(OpSIBLING(tmpop) == right); assert(!OpHAS_SIBLING(right)); /* detach the split subtreee from the o tree, * then free the residual o tree */ op_sibling_splice(cUNOPo->op_first, tmpop, 1, NULL); op_free(o); /* blow off assign */ right->op_private |= OPpSPLIT_ASSIGN; right->op_flags &= ~OPf_WANT; /* "I don't know and I don't care." */ return right; } else if (left->op_type == OP_RV2AV) { /* @{expr} */ OP *pushop = cUNOPx(cBINOPo->op_last)->op_first; assert(OpSIBLING(pushop) == left); /* Detach the array ... */ op_sibling_splice(cBINOPo->op_last, pushop, 1, NULL); /* ... and attach it to the split. */ op_sibling_splice(right, cLISTOPx(right)->op_last, 0, left); right->op_flags |= OPf_STACKED; /* Detach split and expunge aassign as above. */ goto detach_split; } else if (PL_modcount < RETURN_UNLIMITED_NUMBER && ((LISTOP*)right)->op_last->op_type == OP_CONST) { /* convert split(...,0) to split(..., PL_modcount+1) */ SV ** const svp = &((SVOP*)((LISTOP*)right)->op_last)->op_sv; SV * const sv = *svp; if (SvIOK(sv) && SvIVX(sv) == 0) { if (right->op_private & OPpSPLIT_IMPLIM) { /* our own SV, created in ck_split */ SvREADONLY_off(sv); sv_setiv(sv, PL_modcount+1); } else { /* SV may belong to someone else */ SvREFCNT_dec(sv); *svp = newSViv(PL_modcount+1); } } } } if (state_var_op) o = S_newONCEOP(aTHX_ o, state_var_op); return o; } if (assign_type == ASSIGN_REF) return newBINOP(OP_REFASSIGN, flags, scalar(right), left); if (!right) right = newOP(OP_UNDEF, 0); if (right->op_type == OP_READLINE) { right->op_flags |= OPf_STACKED; return newBINOP(OP_NULL, flags, op_lvalue(scalar(left), OP_SASSIGN), scalar(right)); } else { o = newBINOP(OP_SASSIGN, flags, scalar(right), op_lvalue(scalar(left), OP_SASSIGN) ); } return o; } /* =for apidoc newSTATEOP Constructs a state op (COP). The state op is normally a C op, but will be a C op if debugging is enabled for currently-compiled code. The state op is populated from C (or C). If C