diff options
author | Yves Orton <demerphq@gmail.com> | 2006-12-29 22:45:51 +0100 |
---|---|---|
committer | Rafael Garcia-Suarez <rgarciasuarez@gmail.com> | 2007-01-04 14:30:02 +0000 |
commit | 44a2ac759eaf811ea851bdf9177a51bf9b95b5ce (patch) | |
tree | de91bdd2393df02ca17ddee82d06318cda92010e | |
parent | 1f2e791661e807f561a2d5dd8f2b6a4e339e444e (diff) | |
download | perl-44a2ac759eaf811ea851bdf9177a51bf9b95b5ce.tar.gz |
Re: [PATCH] Change implementation of %+ to use a proper tied hash interface and add support for %-
Message-ID: <9b18b3110612291245q792fe91cu69422d2b81bb4f0b@mail.gmail.com>
p4raw-id: //depot/perl@29682
-rw-r--r-- | MANIFEST | 1 | ||||
-rw-r--r-- | doop.c | 3 | ||||
-rw-r--r-- | dump.c | 50 | ||||
-rw-r--r-- | embed.fnc | 6 | ||||
-rw-r--r-- | embed.h | 10 | ||||
-rw-r--r-- | ext/re/lib/re/Tie/Hash/NamedCapture.pm | 111 | ||||
-rw-r--r-- | ext/re/re.pm | 46 | ||||
-rw-r--r-- | ext/re/re.xs | 196 | ||||
-rw-r--r-- | ext/re/t/re_funcs.t | 48 | ||||
-rw-r--r-- | global.sym | 2 | ||||
-rw-r--r-- | gv.c | 108 | ||||
-rw-r--r-- | hv.c | 105 | ||||
-rw-r--r-- | mg.c | 75 | ||||
-rw-r--r-- | perl.h | 5 | ||||
-rw-r--r-- | pod/perlapi.pod | 4 | ||||
-rw-r--r-- | pod/perlintern.pod | 4 | ||||
-rw-r--r-- | pod/perlvar.pod | 43 | ||||
-rw-r--r-- | proto.h | 12 | ||||
-rw-r--r-- | regcomp.c | 105 | ||||
-rw-r--r-- | sv.c | 3 | ||||
-rwxr-xr-x | t/op/pat.t | 21 |
21 files changed, 678 insertions, 280 deletions
@@ -980,6 +980,7 @@ ext/re/re_comp.h re extension wrapper for regcomp.h ext/re/re.pm re extension Perl module ext/re/re_top.h re extension symbol hiding header ext/re/re.xs re extension external subroutines +ext/re/lib/re/Tie/Hash/NamedCapture.pm Implements %- and %+ behaviour ext/re/t/lexical_debug.pl generate debug output for lexical re 'debug' ext/re/t/lexical_debug.t test that lexical re 'debug' works ext/re/t/re_funcs.t see if exportable funcs from re.pm work @@ -1434,8 +1434,7 @@ Perl_do_kv(pTHX) RETURN; } - if (! SvTIED_mg((SV*)keys, PERL_MAGIC_tied) - && ! SvTIED_mg((SV*)keys, PERL_MAGIC_regdata_names)) + if (! SvTIED_mg((SV*)keys, PERL_MAGIC_tied) ) { i = HvKEYS(keys); } @@ -192,6 +192,10 @@ sequence. Thus the output will either be a single char, an octal escape sequence, a special escape like C<\n> or a 3 or more digit hex value. +If PERL_PV_ESCAPE_RE is set then the escape char used will be a '%' and +not a '\\'. This is because regexes very often contain backslashed +sequences, whereas '%' is not a particularly common character in patterns. + Returns a pointer to the escaped text as held by dsv. =cut @@ -203,14 +207,16 @@ Perl_pv_escape( pTHX_ SV *dsv, char const * const str, const STRLEN count, const STRLEN max, STRLEN * const escaped, const U32 flags ) { - char dq = (flags & PERL_PV_ESCAPE_QUOTE) ? '"' : '\\'; - char octbuf[PV_ESCAPE_OCTBUFSIZE] = "\\123456789ABCDF"; + char esc = (flags & PERL_PV_ESCAPE_RE) ? '%' : '\\'; + char dq = (flags & PERL_PV_ESCAPE_QUOTE) ? '"' : esc; + char octbuf[PV_ESCAPE_OCTBUFSIZE] = "%123456789ABCDF"; STRLEN wrote = 0; /* chars written so far */ STRLEN chsize = 0; /* size of data to be written */ STRLEN readsize = 1; /* size of data just read */ bool isuni= flags & PERL_PV_ESCAPE_UNI ? 1 : 0; /* is this unicode */ const char *pv = str; const char *end = pv + count; /* end of string */ + octbuf[0] = esc; if (!flags & PERL_PV_ESCAPE_NOCLEAR) sv_setpvn(dsv, "", 0); @@ -228,42 +234,49 @@ Perl_pv_escape( pTHX_ SV *dsv, char const * const str, "%"UVxf, u); else chsize = my_snprintf( octbuf, PV_ESCAPE_OCTBUFSIZE, - "\\x{%"UVxf"}", u); + "%cx{%"UVxf"}", esc, u); } else if (flags & PERL_PV_ESCAPE_NOBACKSLASH) { chsize = 1; } else { - if ( (c == dq) || (c == '\\') || !isPRINT(c) ) { - chsize = 2; + if ( (c == dq) || (c == esc) || !isPRINT(c) ) { + chsize = 2; switch (c) { - case '\\' : octbuf[1] = '\\'; break; + + case '\\' : /* fallthrough */ + case '%' : if ( c == esc ) { + octbuf[1] = esc; + } else { + chsize = 1; + } + break; case '\v' : octbuf[1] = 'v'; break; case '\t' : octbuf[1] = 't'; break; case '\r' : octbuf[1] = 'r'; break; case '\n' : octbuf[1] = 'n'; break; case '\f' : octbuf[1] = 'f'; break; - case '"' : + case '"' : if ( dq == '"' ) octbuf[1] = '"'; else chsize = 1; - break; + break; default: if ( (pv < end) && isDIGIT((U8)*(pv+readsize)) ) chsize = my_snprintf( octbuf, PV_ESCAPE_OCTBUFSIZE, - "\\%03o", c); - else + "%c%03o", esc, c); + else chsize = my_snprintf( octbuf, PV_ESCAPE_OCTBUFSIZE, - "\\%o", c); + "%c%o", esc, c); } } else { - chsize=1; + chsize = 1; } - } - if ( max && (wrote + chsize > max) ) { - break; + } + if ( max && (wrote + chsize > max) ) { + break; } else if (chsize > 1) { - sv_catpvn(dsv, octbuf, chsize); - wrote += chsize; + sv_catpvn(dsv, octbuf, chsize); + wrote += chsize; } else { Perl_sv_catpvf( aTHX_ dsv, "%c", c); wrote++; @@ -308,7 +321,7 @@ Perl_pv_pretty( pTHX_ SV *dsv, char const * const str, const STRLEN count, const STRLEN max, char const * const start_color, char const * const end_color, const U32 flags ) { - U8 dq = (flags & PERL_PV_PRETTY_QUOTE) ? '"' : '\\'; + U8 dq = (flags & PERL_PV_PRETTY_QUOTE) ? '"' : '%'; STRLEN escaped; if ( dq == '"' ) @@ -1129,7 +1142,6 @@ static const struct { const char type; const char *name; } magic_names[] = { { PERL_MAGIC_sv, "sv(\\0)" }, { PERL_MAGIC_arylen, "arylen(#)" }, { PERL_MAGIC_rhash, "rhash(%)" }, - { PERL_MAGIC_regdata_names, "regdata_names(+)" }, { PERL_MAGIC_pos, "pos(.)" }, { PERL_MAGIC_symtab, "symtab(:)" }, { PERL_MAGIC_backref, "backref(<)" }, @@ -684,7 +684,8 @@ Ap |I32 |regexec_flags |NN regexp* prog|NN char* stringarg \ |NN char* strend|NN char* strbeg|I32 minend \ |NN SV* screamer|NULLOK void* data|U32 flags ApR |regnode*|regnext |NN regnode* p -Ep |SV*|reg_named_buff_sv |NN SV* namesv +EXp |SV*|reg_named_buff_get |NN SV* namesv|NULLOK const REGEXP * const from_re|U32 flags +EXp |SV*|reg_numbered_buff_get|I32 paren|NN const REGEXP * const rx|NULLOK SV* usesv|U32 flags Ep |void |regprop |NULLOK const regexp *prog|NN SV* sv|NN const regnode* o Ap |void |repeatcpy |NN char* to|NN const char* from|I32 len|I32 count ApP |char* |rninstr |NN const char* big|NN const char* bigend \ @@ -1100,7 +1101,8 @@ sR |I32 |do_trans_complex_utf8 |NN SV * const sv #if defined(PERL_IN_GV_C) || defined(PERL_DECL_PROT) s |void |gv_init_sv |NN GV *gv|I32 sv_type -s |void |require_errno |NN GV *gv +s |HV* |require_tie_mod|NN GV *gv|NN const char *varpv|NN SV* namesv \ + |NN const char *methpv|const U32 flags #endif : #if defined(PERL_IN_HV_C) || defined(PERL_IN_SV_C) || defined(PERL_DECL_PROT) @@ -692,7 +692,8 @@ #define regexec_flags Perl_regexec_flags #define regnext Perl_regnext #if defined(PERL_CORE) || defined(PERL_EXT) -#define reg_named_buff_sv Perl_reg_named_buff_sv +#define reg_named_buff_get Perl_reg_named_buff_get +#define reg_numbered_buff_get Perl_reg_numbered_buff_get #define regprop Perl_regprop #endif #define repeatcpy Perl_repeatcpy @@ -1098,7 +1099,7 @@ #if defined(PERL_IN_GV_C) || defined(PERL_DECL_PROT) #ifdef PERL_CORE #define gv_init_sv S_gv_init_sv -#define require_errno S_require_errno +#define require_tie_mod S_require_tie_mod #endif #endif #if defined(PERL_IN_HV_C) || defined(PERL_DECL_PROT) @@ -2904,7 +2905,8 @@ #define regexec_flags(a,b,c,d,e,f,g,h) Perl_regexec_flags(aTHX_ a,b,c,d,e,f,g,h) #define regnext(a) Perl_regnext(aTHX_ a) #if defined(PERL_CORE) || defined(PERL_EXT) -#define reg_named_buff_sv(a) Perl_reg_named_buff_sv(aTHX_ a) +#define reg_named_buff_get(a,b,c) Perl_reg_named_buff_get(aTHX_ a,b,c) +#define reg_numbered_buff_get(a,b,c,d) Perl_reg_numbered_buff_get(aTHX_ a,b,c,d) #define regprop(a,b,c) Perl_regprop(aTHX_ a,b,c) #endif #define repeatcpy(a,b,c,d) Perl_repeatcpy(aTHX_ a,b,c,d) @@ -3301,7 +3303,7 @@ #if defined(PERL_IN_GV_C) || defined(PERL_DECL_PROT) #ifdef PERL_CORE #define gv_init_sv(a,b) S_gv_init_sv(aTHX_ a,b) -#define require_errno(a) S_require_errno(aTHX_ a) +#define require_tie_mod(a,b,c,d,e) S_require_tie_mod(aTHX_ a,b,c,d,e) #endif #endif #ifdef PERL_CORE diff --git a/ext/re/lib/re/Tie/Hash/NamedCapture.pm b/ext/re/lib/re/Tie/Hash/NamedCapture.pm new file mode 100644 index 0000000000..a76c6ab7aa --- /dev/null +++ b/ext/re/lib/re/Tie/Hash/NamedCapture.pm @@ -0,0 +1,111 @@ +package re::Tie::Hash::NamedCapture; +use strict; +use warnings; +our $VERSION = "0.01"; +use re qw(is_regexp + regname + regnames + regnames_count + regnames_iterinit + regnames_iternext); + +sub TIEHASH { + my $classname = shift; + my $hash = {@_}; + + if ($hash->{re} && !is_regexp($hash->{re})) { + die "'re' parameter to ",__PACKAGE__,"->TIEHASH must be a qr//" + } + + return bless $hash, $classname; +} + +sub FETCH { + return regname($_[1],$_[0]->{re},$_[0]->{all}); +} + +sub STORE { + require Carp; + Carp::croak("STORE forbidden: Hashes tied to ",__PACKAGE__," are read/only."); +} + +sub FIRSTKEY { + regnames_iterinit($_[0]->{re}); + return $_[0]->NEXTKEY; +} + +sub NEXTKEY { + return regnames_iternext($_[0]->{re},$_[0]->{all}); +} + +sub EXISTS { + return defined regname( $_[1], $_[0]->{re},$_[0]->{all}); +} + +sub DELETE { + require Carp; + Carp::croak("DELETE forbidden: Hashes tied to ",__PACKAGE__," are read/only"); +} + +sub CLEAR { + require Carp; + Carp::croak("CLEAR forbidden: Hashes tied to ",__PACKAGE__," are read/only"); +} + +sub SCALAR { + return scalar regnames($_[0]->{re},$_[0]->{all}); +} + +1; + +__END__ + +=head1 NAME + +re::Tie::Hash::NamedCapture - Perl module to support named regex capture buffers + +=head1 SYNOPSIS + + tie my %hash,"re::Tie::Hash::NamedCapture"; + # %hash now behaves like %- + + tie my %hash,"re::Tie::Hash::NamedCapture",re => $qr, all=> 1, + # %hash now access buffers from regex in $qr like %+ + +=head1 DESCRIPTION + +Implements the behaviour required for C<%+> and C<%-> but can be used +independently. + +When the C<re> parameter is provided, and the value is the result of +a C<qr//> expression then the hash is bound to that particular regexp +and will return the results of its last successful match. If the +parameter is omitted then the hash behaves just as C<$1> does by +referencing the last successful match. + +When the C<all> parameter is provided then the result of a fetch +is an array ref containing the contents of each buffer whose name +was the same as the key used for the access. If the buffer wasn't +involved in the match then an undef will be stored. When the all +parameter is omitted or not a true value then the return will be +a the content of the left most defined buffer with the given name. +If there is no buffer with the desired name defined then C<undef> +is returned. + + +For instance: + + my $qr = qr/(?<foo>bar)/; + if ( 'bar' =~ /$qr/ ) { + tie my %hash,"re::Tie::Hash::NamedCapture",re => $qr, all => 1; + if ('bar'=~/bar/) { + # last successful match is now different + print $hash{foo}; # prints foo + } + } + +=head1 SEE ALSO + +L<re>, L<perlmodlib/Pragmatic Modules>. + +=cut diff --git a/ext/re/re.pm b/ext/re/re.pm index ce01214315..4a64af3964 100644 --- a/ext/re/re.pm +++ b/ext/re/re.pm @@ -4,9 +4,11 @@ package re; use strict; use warnings; -our $VERSION = "0.07"; +our $VERSION = "0.08"; our @ISA = qw(Exporter); -our @EXPORT_OK = qw(is_regexp regexp_pattern regmust); +our @EXPORT_OK = qw(is_regexp regexp_pattern regmust + regname regnames + regnames_count regnames_iterinit regnames_iternext); our %EXPORT_OK = map { $_ => 1 } @EXPORT_OK; # *** WARNING *** WARNING *** WARNING *** WARNING *** WARNING *** @@ -464,6 +466,46 @@ floating string. This will be what the optimiser of the Perl that you are using thinks is the longest. If you believe that the result is wrong please report it via the L<perlbug> utility. +=item regname($name,$qr,$all) + +Returns the contents of a named buffer. If $qr is missing, or is not the +result of a qr// then returns the result of the last successful match. If +$all is true then returns an array ref containing one entry per buffer, +otherwise returns the first defined buffer. + +=item regnames($qr,$all) + +Returns a list of all of the named buffers defined in a pattern. If +$all is true then it returns all names defined, if not returns only +names which were involved in the last successful match. If $qr is omitted +or is not the result of a qr// then returns the details for the last +successful match. + +=item regnames_iterinit($qr) + +Initializes the internal hash iterator associated to a regexps named capture +buffers. If $qr is omitted resets the iterator associated with the regexp used +in the last successful match. + +=item regnames_iternext($qr,$all) + +Gets the next key from the hash associated with a regexp. If $qr +is omitted resets the iterator associated with the regexp used in the +last successful match. If $all is true returns the keys of all of the +distinct named buffers in the pattern, if not returns only those names +used in the last successful match. + +=item regnames_count($qr) + +Returns the number of distinct names defined in the regexp $qr. If +$qr is omitted or not a regexp returns the count of names in the +last successful match. + +B<Note:> that this result is always the actual number of distinct +named buffers defined, it may not actually match that which is +returned by C<regnames()> and related routines when those routines +have not been called with the $all parameter set.. + =back =head1 SEE ALSO diff --git a/ext/re/re.xs b/ext/re/re.xs index d1d27023e8..aa601cf67d 100644 --- a/ext/re/re.xs +++ b/ext/re/re.xs @@ -41,6 +41,25 @@ const struct regexp_engine my_reg_engine = { #endif }; +regexp * +get_re_arg( pTHX_ SV *sv, U32 flags, MAGIC **mgp) { + MAGIC *mg; + if (sv) { + if (SvMAGICAL(sv)) + mg_get(sv); + if (SvROK(sv) && + (sv = (SV*)SvRV(sv)) && /* assign deliberate */ + SvTYPE(sv) == SVt_PVMG && + (mg = mg_find(sv, PERL_MAGIC_qr))) /* assign deliberate */ + { + if (mgp) *mgp = mg; + return (regexp *)mg->mg_obj; + } + } + if (mgp) *mgp = NULL; + return ((flags && PL_curpm) ? PM_GETRE(PL_curpm) : NULL); +} + MODULE = re PACKAGE = re void @@ -55,16 +74,9 @@ void is_regexp(sv) SV * sv PROTOTYPE: $ -PREINIT: - MAGIC *mg; PPCODE: { - if (SvMAGICAL(sv)) - mg_get(sv); - if (SvROK(sv) && - (sv = (SV*)SvRV(sv)) && /* assign deliberate */ - SvTYPE(sv) == SVt_PVMG && - (mg = mg_find(sv, PERL_MAGIC_qr))) /* assign deliberate */ + if ( get_re_arg( aTHX_ sv, 0, NULL ) ) { XSRETURN_YES; } else { @@ -79,6 +91,7 @@ regexp_pattern(sv) PROTOTYPE: $ PREINIT: MAGIC *mg; + regexp *re; PPCODE: { /* @@ -92,17 +105,10 @@ PPCODE: on the object. */ - if (SvMAGICAL(sv)) - mg_get(sv); - if (SvROK(sv) && - (sv = (SV*)SvRV(sv)) && /* assign deliberate */ - SvTYPE(sv) == SVt_PVMG && - (mg = mg_find(sv, PERL_MAGIC_qr))) /* assign deliberate */ + if ( re = get_re_arg( aTHX_ sv, 0, &mg) ) /* assign deliberate */ { - /* Housten, we have a regex! */ SV *pattern; - regexp *re = (regexp *)mg->mg_obj; STRLEN patlen = 0; STRLEN left = 0; char reflags[6]; @@ -173,19 +179,13 @@ regmust(sv) SV * sv PROTOTYPE: $ PREINIT: - MAGIC *mg; + regexp *re; PPCODE: { - if (SvMAGICAL(sv)) - mg_get(sv); - if (SvROK(sv) && - (sv = (SV*)SvRV(sv)) && /* assign deliberate */ - SvTYPE(sv) == SVt_PVMG && - (mg = mg_find(sv, PERL_MAGIC_qr))) /* assign deliberate */ + if ( re = get_re_arg( aTHX_ sv, 0, 0) ) /* assign deliberate */ { SV *an = &PL_sv_no; SV *fl = &PL_sv_no; - regexp *re = (regexp *)mg->mg_obj; if (re->anchored_substr) { an = newSVsv(re->anchored_substr); } else if (re->anchored_utf8) { @@ -202,3 +202,151 @@ PPCODE: } XSRETURN_UNDEF; } + +void +regname(sv, qr = NULL, all = NULL) + SV * sv + SV * qr + SV * all +PROTOTYPE: ;$$$ +PREINIT: + regexp *re = NULL; + SV *bufs = NULL; +PPCODE: +{ + re = get_re_arg( aTHX_ qr, 1, NULL); + if (SvPOK(sv) && re && re->paren_names) { + bufs = Perl_reg_named_buff_get(aTHX_ sv, re ,all && SvTRUE(all)); + if (bufs) { + if (all && SvTRUE(all)) + XPUSHs(newRV(bufs)); + else + XPUSHs(SvREFCNT_inc(bufs)); + XSRETURN(1); + } + } + XSRETURN_UNDEF; +} + +void +regnames(sv = NULL, all = NULL) + SV *sv + SV *all +PROTOTYPE: ;$$ +PREINIT: + regexp *re = NULL; + IV count = 0; +PPCODE: +{ + re = get_re_arg( aTHX_ sv, 1, NULL ); + if (re && re->paren_names) { + HV *hv= re->paren_names; + (void)hv_iterinit(hv); + while (1) { + HE *temphe = hv_iternext_flags(hv,0); + if (temphe) { + IV i; + IV parno = 0; + SV* sv_dat = HeVAL(temphe); + I32 *nums = (I32*)SvPVX(sv_dat); + for ( i = 0; i < SvIVX(sv_dat); i++ ) { + if ((I32)(re->lastcloseparen) >= nums[i] && + re->startp[nums[i]] != -1 && + re->endp[nums[i]] != -1) + { + parno = nums[i]; + break; + } + } + if (parno || (all && SvTRUE(all))) { + STRLEN len; + char *pv = HePV(temphe, len); + if ( GIMME_V == G_ARRAY ) + XPUSHs(newSVpvn(pv,len)); + count++; + } + } else { + break; + } + } + } + if ( GIMME_V == G_ARRAY ) + XSRETURN(count); + else + XSRETURN_UNDEF; +} + +void +regnames_iterinit(sv = NULL) + SV * sv +PROTOTYPE: ;$ +PREINIT: + regexp *re = NULL; +PPCODE: +{ + re = get_re_arg( aTHX_ sv, 1, NULL ); + if (re && re->paren_names) { + (void)hv_iterinit(re->paren_names); + XPUSHs(newSViv(HvTOTALKEYS(re->paren_names))); + } else { + XSRETURN_UNDEF; + } +} + +void +regnames_iternext(sv = NULL, all = NULL) + SV *sv + SV *all +PROTOTYPE: ;$$ +PREINIT: + regexp *re; +PPCODE: +{ + re = get_re_arg( aTHX_ sv, 1, NULL ); + if (re && re->paren_names) { + HV *hv= re->paren_names; + while (1) { + HE *temphe = hv_iternext_flags(hv,0); + if (temphe) { + IV i; + IV parno = 0; + SV* sv_dat = HeVAL(temphe); + I32 *nums = (I32*)SvPVX(sv_dat); + for ( i = 0; i < SvIVX(sv_dat); i++ ) { + if ((I32)(re->lastcloseparen) >= nums[i] && + re->startp[nums[i]] != -1 && + re->endp[nums[i]] != -1) + { + parno = nums[i]; + break; + } + } + if (parno || (all && SvTRUE(all))) { + STRLEN len; + char *pv = HePV(temphe, len); + XPUSHs(newSVpvn(pv,len)); + XSRETURN(1); + } + } else { + break; + } + } + } + XSRETURN_UNDEF; +} + +void +regnames_count(sv = NULL) + SV * sv +PROTOTYPE: ;$ +PREINIT: + regexp *re = NULL; +PPCODE: +{ + re = get_re_arg( aTHX_ sv, 1, NULL ); + if (re && re->paren_names) { + XPUSHs(newSViv(HvTOTALKEYS(re->paren_names))); + } else { + XSRETURN_UNDEF; + } +} diff --git a/ext/re/t/re_funcs.t b/ext/re/t/re_funcs.t index f84e2b09a6..736829cbc4 100644 --- a/ext/re/t/re_funcs.t +++ b/ext/re/t/re_funcs.t @@ -13,7 +13,9 @@ BEGIN { use strict; use Test::More; # test count at bottom of file -use re qw(is_regexp regexp_pattern regmust); +use re qw(is_regexp regexp_pattern regmust + regname regnames regnames_count + regnames_iterinit regnames_iternext); my $qr=qr/foo/i; ok(is_regexp($qr),'is_regexp($qr)'); @@ -37,6 +39,48 @@ ok(!regexp_pattern(''),'!regexp_pattern("")'); is($floating,undef,"Regmust anchored - ref"); } + +if ('1234'=~/(?:(?<A>\d)|(?<C>!))(?<B>\d)(?<A>\d)(?<B>\d)/){ + my $qr = qr/(?<foo>foo)(?<bar>bar)/; + my @names = sort +regnames($qr); + is("@names","","regnames"); + @names = sort +regnames($qr,1); + is("@names","bar foo","regnames - all"); + @names = sort +regnames(); + is("@names","A B","regnames"); + @names = sort +regnames(undef,1); + is("@names","A B C","regnames"); + is(join("", @{regname("A",undef,1)}),"13"); + is(join("", @{regname("B",undef,1)}),"24"); + { + if ('foobar'=~/$qr/) { + regnames_iterinit(); + my @res; + while (defined(my $key=regnames_iternext)) { + push @res,$key; + } + @res=sort @res; + is("@res","bar foo"); + is(regnames_count(),2); + } else { + ok(0); ok(0); + } + } + is(regnames_count(),3); + is(regnames_count($qr),2); +} +{ + use warnings; + require re::Tie::Hash::NamedCapture; + my $qr = qr/(?<foo>foo)/; + if ( 'foo' =~ /$qr/ ) { + tie my %hash,"re::Tie::Hash::NamedCapture",re => $qr; + if ('bar'=~/bar/) { + # last successful match is now different + is($hash{foo},'foo'); # prints foo + } + } +} # New tests above this line, don't forget to update the test count below! -use Test::More tests => 12; +use Test::More tests => 23; # No tests here! diff --git a/global.sym b/global.sym index d22185753b..3bc39282de 100644 --- a/global.sym +++ b/global.sym @@ -396,6 +396,8 @@ Perl_re_intuit_start Perl_re_intuit_string Perl_regexec_flags Perl_regnext +Perl_reg_named_buff_get +Perl_reg_numbered_buff_get Perl_repeatcpy Perl_rninstr Perl_rsignal @@ -664,28 +664,44 @@ Perl_gv_autoload4(pTHX_ HV *stash, const char *name, STRLEN len, I32 method) return gv; } -/* The "gv" parameter should be the glob known to Perl code as *! - * The scalar must already have been magicalized. + +/* require_tie_mod() internal routine for requiring a module + * that implements the logic of automatical ties like %! and %- + * + * The "gv" parameter should be the glob. + * "varpv" holds the name of the var, used for error messages + * "namesv" holds the module name + * "methpv" holds the method name to test for to check that things + * are working reasonably close to as expected + * "flags" if flag & 1 then save the scalar before loading. + * For the protection of $! to work (it is set by this routine) + * the sv slot must already be magicalized. */ -STATIC void -S_require_errno(pTHX_ GV *gv) +STATIC HV* +S_require_tie_mod(pTHX_ GV *gv, const char *varpv, SV* namesv, const char *methpv,const U32 flags) { dVAR; - HV* stash = gv_stashpvs("Errno", FALSE); - - if (!stash || !(gv_fetchmethod(stash, "TIEHASH"))) { + HV* stash = gv_stashsv(namesv, FALSE); + + if (!stash || !(gv_fetchmethod(stash, methpv))) { + SV *module = newSVsv(namesv); dSP; PUTBACK; ENTER; - save_scalar(gv); /* keep the value of $! */ - Perl_load_module(aTHX_ PERL_LOADMOD_NOIMPORT, - newSVpvs("Errno"), NULL); + if ( flags & 1 ) + save_scalar(gv); + Perl_load_module(aTHX_ PERL_LOADMOD_NOIMPORT, module, NULL); LEAVE; SPAGAIN; - stash = gv_stashpvs("Errno", FALSE); - if (!stash || !(gv_fetchmethod(stash, "TIEHASH"))) - Perl_croak(aTHX_ "Can't use %%! because Errno.pm is not available"); + stash = gv_stashsv(namesv, FALSE); + if (!stash) + Perl_croak( aTHX_ "panic: Can't use %%%s because %"SVf" is not available", + varpv, module); + else if (!gv_fetchmethod(stash, methpv)) + Perl_croak( aTHX_ "panic: Can't use %%%s because %"SVf" does not support method %s", + varpv, module, methpv); } + return stash; } /* @@ -976,8 +992,14 @@ Perl_gv_fetchpvn_flags(pTHX_ const char *nambeg, STRLEN full_len, I32 flags, if (add) { GvMULTI_on(gv); gv_init_sv(gv, sv_type); - if (*name=='!' && sv_type == SVt_PVHV && len==1) - require_errno(gv); + if (sv_type == SVt_PVHV && len == 1 ) { + if (*name == '!') + require_tie_mod(gv, "!", newSVpvs("Errno"), "TIEHASH", 1); + else + if (*name == '-' || *name == '+') + require_tie_mod(gv, name, newSVpvs("re::Tie::Hash::NamedCapture"), "FETCH", 0); + + } } return gv; } else if (no_init) { @@ -1156,25 +1178,45 @@ Perl_gv_fetchpvn_flags(pTHX_ const char *nambeg, STRLEN full_len, I32 flags, goto magicalize; case '!': - - /* If %! has been used, automatically load Errno.pm. - The require will itself set errno, so in order to - preserve its value we have to set up the magic - now (rather than going to magicalize) - */ + GvMULTI_on(gv); + /* If %! has been used, automatically load Errno.pm. */ sv_magic(GvSVn(gv), (SV*)gv, PERL_MAGIC_sv, name, len); + /* magicalization must be done before require_tie_mod is called */ if (sv_type == SVt_PVHV) - require_errno(gv); + require_tie_mod(gv, "!", newSVpvs("Errno"), "TIEHASH", 1); break; case '-': - { - AV* const av = GvAVn(gv); - sv_magic((SV*)av, NULL, PERL_MAGIC_regdata, NULL, 0); - SvREADONLY_on(av); - goto magicalize; + case '+': + GvMULTI_on(gv); /* no used once warnings here */ + { + bool plus = (*name == '+'); + SV *stashname = newSVpvs("re::Tie::Hash::NamedCapture"); + AV* const av = GvAVn(gv); + HV *const hv = GvHVn(gv); + HV *const hv_tie = newHV(); + SV *tie = newRV_noinc((SV*)hv_tie); + + sv_bless(tie, gv_stashsv(stashname,1)); + hv_magic(hv, (GV*)tie, PERL_MAGIC_tied); + sv_magic((SV*)av, (plus ? (SV*)av : NULL), PERL_MAGIC_regdata, NULL, 0); + sv_magic(GvSVn(gv), (SV*)gv, PERL_MAGIC_sv, name, len); + + if (plus) + SvREADONLY_on(GvSVn(gv)); + else + Perl_hv_store(aTHX_ hv_tie, STR_WITH_LEN("all"), newSViv(1), 0); + + SvREADONLY_on(hv); + SvREADONLY_on(tie); + SvREADONLY_on(av); + + if (sv_type == SVt_PVHV) + require_tie_mod(gv, name, stashname, "FETCH", 0); + + break; } case '*': case '#': @@ -1192,18 +1234,6 @@ Perl_gv_fetchpvn_flags(pTHX_ const char *nambeg, STRLEN full_len, I32 flags, hv_magic(hv, NULL, PERL_MAGIC_hints); } goto magicalize; - - case '+': - GvMULTI_on(gv); - { - AV* const av = GvAVn(gv); - HV* const hv = GvHVn(gv); - sv_magic((SV*)av, (SV*)av, PERL_MAGIC_regdata, NULL, 0); - SvREADONLY_on(av); - hv_magic(hv, NULL, PERL_MAGIC_regdata_names); - SvREADONLY_on(hv); - /* FALL THROUGH */ - } case '\023': /* $^S */ case '1': case '2': @@ -450,10 +450,7 @@ S_hv_fetch_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, xhv = (XPVHV*)SvANY(hv); if (SvMAGICAL(hv)) { if (SvRMAGICAL(hv) && !(action & (HV_FETCH_ISSTORE|HV_FETCH_ISEXISTS))) { - MAGIC *regdata = NULL; - if (( regdata = mg_find((SV*)hv, PERL_MAGIC_regdata_names)) || - mg_find((SV*)hv, PERL_MAGIC_tied) || - SvGMAGICAL((SV*)hv)) + if ( mg_find((SV*)hv, PERL_MAGIC_tied) || SvGMAGICAL((SV*)hv)) { /* XXX should be able to skimp on the HE/HEK here when HV_FETCH_JUST_SV is true. */ @@ -465,14 +462,8 @@ S_hv_fetch_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, } else { keysv = newSVsv(keysv); } - if (regdata) { - sv = Perl_reg_named_buff_sv(aTHX_ keysv); - if (!sv) - sv = sv_newmortal(); - } else { - sv = sv_newmortal(); - mg_copy((SV*)hv, sv, (char *)keysv, HEf_SVKEY); - } + sv = sv_newmortal(); + mg_copy((SV*)hv, sv, (char *)keysv, HEf_SVKEY); /* grab a fake HE/HEK pair from the pool or make a new one */ entry = PL_hv_fetch_ent_mh; @@ -1931,17 +1922,7 @@ Perl_hv_iterinit(pTHX_ HV *hv) } else { hv_auxinit(hv); } - if ( SvRMAGICAL(hv) ) { - MAGIC *mg = mg_find((SV*)hv, PERL_MAGIC_regdata_names); - if ( mg ) { - if (PL_curpm) { - const REGEXP * const rx = PM_GETRE(PL_curpm); - if (rx && rx->paren_names) { - (void)hv_iterinit(rx->paren_names); - } - } - } - } + /* used to be xhv->xhv_fill before 5.004_65 */ return HvTOTALKEYS(hv); } @@ -2109,83 +2090,7 @@ Perl_hv_iternext_flags(pTHX_ HV *hv, I32 flags) oldentry = entry = iter->xhv_eiter; /* HvEITER(hv) */ if (SvMAGICAL(hv) && SvRMAGICAL(hv)) { - if ( ( mg = mg_find((SV*)hv, PERL_MAGIC_regdata_names) ) ) { - SV * key; - SV *val = NULL; - REGEXP * rx; - if (!PL_curpm) - return NULL; - rx = PM_GETRE(PL_curpm); - if (rx && rx->paren_names) { - hv = rx->paren_names; - } else { - return NULL; - } - - key = sv_newmortal(); - if (entry) { - sv_setsv(key, HeSVKEY_force(entry)); - SvREFCNT_dec(HeSVKEY(entry)); /* get rid of previous key */ - } - else { - char *k; - HEK *hek; - - /* one HE per MAGICAL hash */ - iter->xhv_eiter = entry = new_HE(); /* HvEITER(hv) = new_HE() */ - Zero(entry, 1, HE); - Newxz(k, HEK_BASESIZE + sizeof(SV*), char); - hek = (HEK*)k; - HeKEY_hek(entry) = hek; - HeKLEN(entry) = HEf_SVKEY; - } - { - while (!val) { - HE *temphe = hv_iternext_flags(hv,flags); - if (temphe) { - IV i; - IV parno = 0; - SV* sv_dat = HeVAL(temphe); - I32 *nums = (I32*)SvPVX(sv_dat); - for ( i = 0; i < SvIVX(sv_dat); i++ ) { - if ((I32)(rx->lastcloseparen) >= nums[i] && - rx->startp[nums[i]] != -1 && - rx->endp[nums[i]] != -1) - { - parno = nums[i]; - break; - } - } - if (parno) { - GV *gv_paren; - STRLEN len; - SV *sv = sv_newmortal(); - const char* pvkey = HePV(temphe, len); - - Perl_sv_setpvf(aTHX_ sv, "%"IVdf,(IV)parno); - gv_paren = Perl_gv_fetchsv(aTHX_ sv, GV_ADD, SVt_PVGV); - Perl_sv_setpvn(aTHX_ key, pvkey, len); - val = GvSVn(gv_paren); - } - } else { - break; - } - } - } - if (val && SvOK(key)) { - /* force key to stay around until next time */ - HeSVKEY_set(entry, SvREFCNT_inc_simple_NN(key)); - HeVAL(entry) = SvREFCNT_inc_simple_NN(val); - return entry; /* beware, hent_val is not set */ - } - if (HeVAL(entry)) - SvREFCNT_dec(HeVAL(entry)); - Safefree(HeKEY_hek(entry)); - del_HE(entry); - iter->xhv_eiter = NULL; /* HvEITER(hv) = NULL */ - return NULL; - } - else if ( ( mg = mg_find((SV*)hv, PERL_MAGIC_tied) ) ) { + if ( ( mg = mg_find((SV*)hv, PERL_MAGIC_tied) ) ) { SV * const key = sv_newmortal(); if (entry) { sv_setsv(key, HeSVKEY_force(entry)); @@ -672,7 +672,6 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg) dVAR; register I32 paren; register char *s = NULL; - register I32 i; register REGEXP *rx; const char * const remaining = mg->mg_ptr + 1; const char nextchar = *remaining; @@ -851,90 +850,46 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg) case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '&': if (PL_curpm && (rx = PM_GETRE(PL_curpm))) { - I32 s1, t1; - /* * Pre-threads, this was paren = atoi(GvENAME((GV*)mg->mg_obj)); * XXX Does the new way break anything? */ paren = atoi(mg->mg_ptr); /* $& is in [0] */ - getparen: - if (paren <= (I32)rx->nparens && - (s1 = rx->startp[paren]) != -1 && - (t1 = rx->endp[paren]) != -1) - { - i = t1 - s1; - s = rx->subbeg + s1; - assert(rx->subbeg); - assert(rx->sublen >= s1); - - getrx: - if (i >= 0) { - const int oldtainted = PL_tainted; - TAINT_NOT; - sv_setpvn(sv, s, i); - PL_tainted = oldtainted; - if ( (rx->extflags & RXf_CANY_SEEN) - ? (RX_MATCH_UTF8(rx) - && (!i || is_utf8_string((U8*)s, i))) - : (RX_MATCH_UTF8(rx)) ) - { - SvUTF8_on(sv); - } - else - SvUTF8_off(sv); - if (PL_tainting) { - if (RX_MATCH_TAINTED(rx)) { - MAGIC* const mg = SvMAGIC(sv); - MAGIC* mgt; - PL_tainted = 1; - SvMAGIC_set(sv, mg->mg_moremagic); - SvTAINT(sv); - if ((mgt = SvMAGIC(sv))) { - mg->mg_moremagic = mgt; - SvMAGIC_set(sv, mg); - } - } else - SvTAINTED_off(sv); - } - break; - } - } + reg_numbered_buff_get( paren, rx, sv, 0); + break; } sv_setsv(sv,&PL_sv_undef); break; case '+': if (PL_curpm && (rx = PM_GETRE(PL_curpm))) { - paren = rx->lastparen; - if (paren) - goto getparen; + if (rx->lastparen) { + reg_numbered_buff_get( rx->lastparen, rx, sv, 0); + break; + } } sv_setsv(sv,&PL_sv_undef); break; case '\016': /* ^N */ if (PL_curpm && (rx = PM_GETRE(PL_curpm))) { - paren = rx->lastcloseparen; - if (paren) - goto getparen; + if (rx->lastcloseparen) { + reg_numbered_buff_get( rx->lastcloseparen, rx, sv, 0); + break; + } + } sv_setsv(sv,&PL_sv_undef); break; case '`': if (PL_curpm && (rx = PM_GETRE(PL_curpm))) { - if ((s = rx->subbeg) && rx->startp[0] != -1) { - i = rx->startp[0]; - goto getrx; - } + reg_numbered_buff_get( -2, rx, sv, 0); + break; } sv_setsv(sv,&PL_sv_undef); break; case '\'': if (PL_curpm && (rx = PM_GETRE(PL_curpm))) { - if (rx->subbeg && rx->endp[0] != -1) { - s = rx->subbeg + rx->endp[0]; - i = rx->sublen - rx->endp[0]; - goto getrx; - } + reg_numbered_buff_get( -1, rx, sv, 0); + break; } sv_setsv(sv,&PL_sv_undef); break; @@ -3654,8 +3654,6 @@ Gid_t getegid (void); #define PERL_MAGIC_overload_elem 'a' /* %OVERLOAD hash element */ #define PERL_MAGIC_overload_table 'c' /* Holds overload table (AMT) on stash */ #define PERL_MAGIC_bm 'B' /* Boyer-Moore (fast string search) */ -#define PERL_MAGIC_regdata_names '+' /* Regex named capture buffer hash - (%+ support) */ #define PERL_MAGIC_regdata 'D' /* Regex match position data (@+ and @- vars) */ #define PERL_MAGIC_regdatum 'd' /* Regex match position data element */ @@ -5711,10 +5709,11 @@ extern void moncontrol(int); #define PERL_PV_ESCAPE_ALL 0x1000 #define PERL_PV_ESCAPE_NOBACKSLASH 0x2000 #define PERL_PV_ESCAPE_NOCLEAR 0x4000 +#define PERL_PV_ESCAPE_RE 0x8000 /* used by pv_display in dump.c*/ #define PERL_PV_PRETTY_DUMP PERL_PV_PRETTY_ELIPSES|PERL_PV_PRETTY_QUOTE -#define PERL_PV_PRETTY_REGPROP PERL_PV_PRETTY_ELIPSES|PERL_PV_PRETTY_LTGT +#define PERL_PV_PRETTY_REGPROP PERL_PV_PRETTY_ELIPSES|PERL_PV_PRETTY_LTGT|PERL_PV_ESCAPE_RE /* diff --git a/pod/perlapi.pod b/pod/perlapi.pod index 992afe8ab8..9acb5f9a0c 100644 --- a/pod/perlapi.pod +++ b/pod/perlapi.pod @@ -824,6 +824,10 @@ sequence. Thus the output will either be a single char, an octal escape sequence, a special escape like C<\n> or a 3 or more digit hex value. +If PERL_PV_ESCAPE_RE is set then the escape char used will be a '%' and +not a '\\'. This is because regexes very often contain backslashed +sequences, whereas '%' is not a particularly common character in patterns. + Returns a pointer to the escaped text as held by dsv. NOTE: the perl_ form of this function is deprecated. diff --git a/pod/perlintern.pod b/pod/perlintern.pod index 5ee68ad2a6..83bdda31f2 100644 --- a/pod/perlintern.pod +++ b/pod/perlintern.pod @@ -622,7 +622,7 @@ The SVs in the names AV have their PV being the name of the variable. xlow+1..xhigh inclusive in the NV union is a range of cop_seq numbers for which the name is valid. For typed lexicals name SV is SVt_PVMG and SvSTASH points at the type. For C<our> lexicals, the type is also SVt_PVMG, with the -OURSTASH slot pointing at the stash of the associated global (so that +SvOURSTASH slot pointing at the stash of the associated global (so that duplicate C<our> declarations in the same package can be detected). SvUVX is sometimes hijacked to store the generation number during compilation. @@ -714,7 +714,7 @@ offset. If C<typestash> is valid, the name is for a typed lexical; set the name's stash to that value. If C<ourstash> is valid, it's an our lexical, set the name's -OURSTASH to that value +SvOURSTASH to that value If fake, it means we're cloning an existing entry diff --git a/pod/perlvar.pod b/pod/perlvar.pod index 8a486b2b74..a211c378ae 100644 --- a/pod/perlvar.pod +++ b/pod/perlvar.pod @@ -324,6 +324,15 @@ C<$+{foo}> is equivalent to C<$1> after the following match: 'foo'=~/(?<foo>foo)/; +The underlying behaviour of %+ is provided by the L<re::Tie::Hash::NamedCapture> +module. + +B<Note:> As C<%-> and C<%+> are tied views into a common internal hash +associated with the last successful regular expression. Therefore mixing +iterative access to them via C<each> may have unpredictable results. +Likewise, if the last successful match changes then the results may be +surprising. + =item HANDLE->input_line_number(EXPR) =item $INPUT_LINE_NUMBER @@ -579,6 +588,40 @@ After a match against some variable $var: =back +=item %- +X<%-> + +Similar to %+, this variable allows access to the named capture +buffers that were defined in the last successful match. It returns +a reference to an array containing one value per buffer of a given +name in the pattern. + + if ('1234'=~/(?<A>1)(?<B>2)(?<A>3)(?<B>4)/) { + foreach my $name (sort keys(%-)) { + my $ary = $-{$name}; + foreach my $idx (0..$#$ary) { + print "\$-{$name}[$idx] : ", + (defined($ary->[$idx]) ? "'$ary->[$idx]'" : "undef"), + "\n"; + } + } + } + +would print out: + + $-{A}[0] : '1' + $-{A}[1] : '3' + $-{B}[0] : '2' + $-{B}[1] : '4' + +The behaviour of %- is implemented via the L<re::Tie::Hash::NamedCapture> module. + +Note that C<%-> and C<%+> are tied views into a common internal hash +associated with the last successful regular expression. Therefore mixing +iterative access to them via C<each> may have unpredictable results. +Likewise, if the last successful match changes then the results may be +surprising. + =item HANDLE->format_name(EXPR) =item $FORMAT_NAME @@ -1873,9 +1873,12 @@ PERL_CALLCONV regnode* Perl_regnext(pTHX_ regnode* p) __attribute__warn_unused_result__ __attribute__nonnull__(pTHX_1); -PERL_CALLCONV SV* Perl_reg_named_buff_sv(pTHX_ SV* namesv) +PERL_CALLCONV SV* Perl_reg_named_buff_get(pTHX_ SV* namesv, const REGEXP * const from_re, U32 flags) __attribute__nonnull__(pTHX_1); +PERL_CALLCONV SV* Perl_reg_numbered_buff_get(pTHX_ I32 paren, const REGEXP * const rx, SV* usesv, U32 flags) + __attribute__nonnull__(pTHX_2); + PERL_CALLCONV void Perl_regprop(pTHX_ const regexp *prog, SV* sv, const regnode* o) __attribute__nonnull__(pTHX_2) __attribute__nonnull__(pTHX_3); @@ -2953,8 +2956,11 @@ STATIC I32 S_do_trans_complex_utf8(pTHX_ SV * const sv) STATIC void S_gv_init_sv(pTHX_ GV *gv, I32 sv_type) __attribute__nonnull__(pTHX_1); -STATIC void S_require_errno(pTHX_ GV *gv) - __attribute__nonnull__(pTHX_1); +STATIC HV* S_require_tie_mod(pTHX_ GV *gv, const char *varpv, SV* namesv, const char *methpv, const U32 flags) + __attribute__nonnull__(pTHX_1) + __attribute__nonnull__(pTHX_2) + __attribute__nonnull__(pTHX_3) + __attribute__nonnull__(pTHX_4); #endif @@ -4624,11 +4624,15 @@ reStudy: #ifndef PERL_IN_XSUB_RE SV* -Perl_reg_named_buff_sv(pTHX_ SV* namesv) +Perl_reg_named_buff_get(pTHX_ SV* namesv, const REGEXP * const from_re, U32 flags) { - I32 parno = 0; /* no match */ - if (PL_curpm) { - const REGEXP * const rx = PM_GETRE(PL_curpm); + AV *retarray = NULL; + SV *ret; + if (flags & 1) + retarray=newAV(); + + if (from_re || PL_curpm) { + const REGEXP * const rx = from_re ? from_re : PM_GETRE(PL_curpm); if (rx && rx->paren_names) { HE *he_str = hv_fetch_ent( rx->paren_names, namesv, 0, 0 ); if (he_str) { @@ -4639,22 +4643,97 @@ Perl_reg_named_buff_sv(pTHX_ SV* namesv) if ((I32)(rx->lastparen) >= nums[i] && rx->endp[nums[i]] != -1) { - parno = nums[i]; - break; + ret = reg_numbered_buff_get(nums[i],rx,NULL,0); + if (!retarray) + return ret; + } else { + ret = newSVsv(&PL_sv_undef); + } + if (retarray) { + SvREFCNT_inc(ret); + av_push(retarray, ret); } } + if (retarray) + return (SV*)retarray; } } } - if ( !parno ) { - return 0; + return NULL; +} + +SV* +Perl_reg_numbered_buff_get(pTHX_ I32 paren, const REGEXP * const rx, SV* usesv, U32 flags) +{ + char *s = NULL; + I32 i; + I32 s1, t1; + SV *sv = usesv ? usesv : newSVpvs(""); + + if (paren == -2 && (s = rx->subbeg) && rx->startp[0] != -1) { + /* $` */ + i = rx->startp[0]; + } + else + if (paren == -1 && rx->subbeg && rx->endp[0] != -1) { + /* $' */ + s = rx->subbeg + rx->endp[0]; + i = rx->sublen - rx->endp[0]; + } + else + if ( 0 <= paren && paren <= (I32)rx->nparens && + (s1 = rx->startp[paren]) != -1 && + (t1 = rx->endp[paren]) != -1) + { + /* $& $1 ... */ + i = t1 - s1; + s = rx->subbeg + s1; + } + + if (s) { + assert(rx->subbeg); + assert(rx->sublen >= (s - rx->subbeg) + i ); + + if (i >= 0) { + const int oldtainted = PL_tainted; + TAINT_NOT; + sv_setpvn(sv, s, i); + PL_tainted = oldtainted; + if ( (rx->extflags & RXf_CANY_SEEN) + ? (RX_MATCH_UTF8(rx) + && (!i || is_utf8_string((U8*)s, i))) + : (RX_MATCH_UTF8(rx)) ) + { + SvUTF8_on(sv); + } + else + SvUTF8_off(sv); + if (PL_tainting) { + if (RX_MATCH_TAINTED(rx)) { + if (SvTYPE(sv) >= SVt_PVMG) { + MAGIC* const mg = SvMAGIC(sv); + MAGIC* mgt; + PL_tainted = 1; + SvMAGIC_set(sv, mg->mg_moremagic); + SvTAINT(sv); + if ((mgt = SvMAGIC(sv))) { + mg->mg_moremagic = mgt; + SvMAGIC_set(sv, mg); + } + } else { + PL_tainted = 1; + SvTAINT(sv); + } + } else + SvTAINTED_off(sv); + } + } else { + sv_setsv(sv,&PL_sv_undef); + } } else { - GV *gv_paren; - SV *sv= sv_newmortal(); - Perl_sv_setpvf(aTHX_ sv, "%"IVdf,(IV)parno); - gv_paren= Perl_gv_fetchsv(aTHX_ sv, GV_ADD, SVt_PVGV); - return GvSVn(gv_paren); + sv_setsv(sv,&PL_sv_undef); } + return sv; } #endif @@ -4515,9 +4515,6 @@ Perl_sv_magic(pTHX_ register SV *sv, SV *obj, int how, const char *name, I32 nam case PERL_MAGIC_regdata: vtable = &PL_vtbl_regdata; break; - case PERL_MAGIC_regdata_names: - vtable = &PL_vtbl_regdata_names; - break; case PERL_MAGIC_regdatum: vtable = &PL_vtbl_regdatum; break; diff --git a/t/op/pat.t b/t/op/pat.t index 84dc2e85f7..24aa38a6df 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -3745,7 +3745,24 @@ sub iseq($$;$) { '; ok(!$@,'lvalue $+{...} should not throw an exception'); } - +{ + my $s='foo bar baz'; + my @res; + if ('1234'=~/(?<A>1)(?<B>2)(?<A>3)(?<B>4)/) { + foreach my $name (sort keys(%-)) { + my $ary = $-{$name}; + foreach my $idx (0..$#$ary) { + push @res,"$name:$idx:$ary->[$idx]"; + } + } + } + my @expect=qw(A:0:1 A:1:3 B:0:2 B:1:4); + iseq("@res","@expect","Check %-"); + eval' + print for $-{this_key_doesnt_exist}; + '; + ok(!$@,'lvalue $-{...} should not throw an exception'); +} # stress test CURLYX/WHILEM. # # This test includes varying levels of nesting, and according to @@ -4240,7 +4257,7 @@ ok($@=~/\QSequence \k... not terminated in regex;\E/); iseq(0+$::test,$::TestCount,"Got the right number of tests!"); # Don't forget to update this! BEGIN { - $::TestCount = 1606; + $::TestCount = 1608; print "1..$::TestCount\n"; } |