summaryrefslogtreecommitdiff
path: root/ext
diff options
context:
space:
mode:
Diffstat (limited to 'ext')
-rw-r--r--ext/re/lib/re/Tie/Hash/NamedCapture.pm111
-rw-r--r--ext/re/re.pm46
-rw-r--r--ext/re/re.xs196
-rw-r--r--ext/re/t/re_funcs.t48
4 files changed, 373 insertions, 28 deletions
diff --git a/ext/re/lib/re/Tie/Hash/NamedCapture.pm b/ext/re/lib/re/Tie/Hash/NamedCapture.pm
new file mode 100644
index 0000000000..a76c6ab7aa
--- /dev/null
+++ b/ext/re/lib/re/Tie/Hash/NamedCapture.pm
@@ -0,0 +1,111 @@
+package re::Tie::Hash::NamedCapture;
+use strict;
+use warnings;
+our $VERSION = "0.01";
+use re qw(is_regexp
+ regname
+ regnames
+ regnames_count
+ regnames_iterinit
+ regnames_iternext);
+
+sub TIEHASH {
+ my $classname = shift;
+ my $hash = {@_};
+
+ if ($hash->{re} && !is_regexp($hash->{re})) {
+ die "'re' parameter to ",__PACKAGE__,"->TIEHASH must be a qr//"
+ }
+
+ return bless $hash, $classname;
+}
+
+sub FETCH {
+ return regname($_[1],$_[0]->{re},$_[0]->{all});
+}
+
+sub STORE {
+ require Carp;
+ Carp::croak("STORE forbidden: Hashes tied to ",__PACKAGE__," are read/only.");
+}
+
+sub FIRSTKEY {
+ regnames_iterinit($_[0]->{re});
+ return $_[0]->NEXTKEY;
+}
+
+sub NEXTKEY {
+ return regnames_iternext($_[0]->{re},$_[0]->{all});
+}
+
+sub EXISTS {
+ return defined regname( $_[1], $_[0]->{re},$_[0]->{all});
+}
+
+sub DELETE {
+ require Carp;
+ Carp::croak("DELETE forbidden: Hashes tied to ",__PACKAGE__," are read/only");
+}
+
+sub CLEAR {
+ require Carp;
+ Carp::croak("CLEAR forbidden: Hashes tied to ",__PACKAGE__," are read/only");
+}
+
+sub SCALAR {
+ return scalar regnames($_[0]->{re},$_[0]->{all});
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+re::Tie::Hash::NamedCapture - Perl module to support named regex capture buffers
+
+=head1 SYNOPSIS
+
+ tie my %hash,"re::Tie::Hash::NamedCapture";
+ # %hash now behaves like %-
+
+ tie my %hash,"re::Tie::Hash::NamedCapture",re => $qr, all=> 1,
+ # %hash now access buffers from regex in $qr like %+
+
+=head1 DESCRIPTION
+
+Implements the behaviour required for C<%+> and C<%-> but can be used
+independently.
+
+When the C<re> parameter is provided, and the value is the result of
+a C<qr//> expression then the hash is bound to that particular regexp
+and will return the results of its last successful match. If the
+parameter is omitted then the hash behaves just as C<$1> does by
+referencing the last successful match.
+
+When the C<all> parameter is provided then the result of a fetch
+is an array ref containing the contents of each buffer whose name
+was the same as the key used for the access. If the buffer wasn't
+involved in the match then an undef will be stored. When the all
+parameter is omitted or not a true value then the return will be
+a the content of the left most defined buffer with the given name.
+If there is no buffer with the desired name defined then C<undef>
+is returned.
+
+
+For instance:
+
+ my $qr = qr/(?<foo>bar)/;
+ if ( 'bar' =~ /$qr/ ) {
+ tie my %hash,"re::Tie::Hash::NamedCapture",re => $qr, all => 1;
+ if ('bar'=~/bar/) {
+ # last successful match is now different
+ print $hash{foo}; # prints foo
+ }
+ }
+
+=head1 SEE ALSO
+
+L<re>, L<perlmodlib/Pragmatic Modules>.
+
+=cut
diff --git a/ext/re/re.pm b/ext/re/re.pm
index ce01214315..4a64af3964 100644
--- a/ext/re/re.pm
+++ b/ext/re/re.pm
@@ -4,9 +4,11 @@ package re;
use strict;
use warnings;
-our $VERSION = "0.07";
+our $VERSION = "0.08";
our @ISA = qw(Exporter);
-our @EXPORT_OK = qw(is_regexp regexp_pattern regmust);
+our @EXPORT_OK = qw(is_regexp regexp_pattern regmust
+ regname regnames
+ regnames_count regnames_iterinit regnames_iternext);
our %EXPORT_OK = map { $_ => 1 } @EXPORT_OK;
# *** WARNING *** WARNING *** WARNING *** WARNING *** WARNING ***
@@ -464,6 +466,46 @@ floating string. This will be what the optimiser of the Perl that you
are using thinks is the longest. If you believe that the result is wrong
please report it via the L<perlbug> utility.
+=item regname($name,$qr,$all)
+
+Returns the contents of a named buffer. If $qr is missing, or is not the
+result of a qr// then returns the result of the last successful match. If
+$all is true then returns an array ref containing one entry per buffer,
+otherwise returns the first defined buffer.
+
+=item regnames($qr,$all)
+
+Returns a list of all of the named buffers defined in a pattern. If
+$all is true then it returns all names defined, if not returns only
+names which were involved in the last successful match. If $qr is omitted
+or is not the result of a qr// then returns the details for the last
+successful match.
+
+=item regnames_iterinit($qr)
+
+Initializes the internal hash iterator associated to a regexps named capture
+buffers. If $qr is omitted resets the iterator associated with the regexp used
+in the last successful match.
+
+=item regnames_iternext($qr,$all)
+
+Gets the next key from the hash associated with a regexp. If $qr
+is omitted resets the iterator associated with the regexp used in the
+last successful match. If $all is true returns the keys of all of the
+distinct named buffers in the pattern, if not returns only those names
+used in the last successful match.
+
+=item regnames_count($qr)
+
+Returns the number of distinct names defined in the regexp $qr. If
+$qr is omitted or not a regexp returns the count of names in the
+last successful match.
+
+B<Note:> that this result is always the actual number of distinct
+named buffers defined, it may not actually match that which is
+returned by C<regnames()> and related routines when those routines
+have not been called with the $all parameter set..
+
=back
=head1 SEE ALSO
diff --git a/ext/re/re.xs b/ext/re/re.xs
index d1d27023e8..aa601cf67d 100644
--- a/ext/re/re.xs
+++ b/ext/re/re.xs
@@ -41,6 +41,25 @@ const struct regexp_engine my_reg_engine = {
#endif
};
+regexp *
+get_re_arg( pTHX_ SV *sv, U32 flags, MAGIC **mgp) {
+ MAGIC *mg;
+ if (sv) {
+ if (SvMAGICAL(sv))
+ mg_get(sv);
+ if (SvROK(sv) &&
+ (sv = (SV*)SvRV(sv)) && /* assign deliberate */
+ SvTYPE(sv) == SVt_PVMG &&
+ (mg = mg_find(sv, PERL_MAGIC_qr))) /* assign deliberate */
+ {
+ if (mgp) *mgp = mg;
+ return (regexp *)mg->mg_obj;
+ }
+ }
+ if (mgp) *mgp = NULL;
+ return ((flags && PL_curpm) ? PM_GETRE(PL_curpm) : NULL);
+}
+
MODULE = re PACKAGE = re
void
@@ -55,16 +74,9 @@ void
is_regexp(sv)
SV * sv
PROTOTYPE: $
-PREINIT:
- MAGIC *mg;
PPCODE:
{
- if (SvMAGICAL(sv))
- mg_get(sv);
- if (SvROK(sv) &&
- (sv = (SV*)SvRV(sv)) && /* assign deliberate */
- SvTYPE(sv) == SVt_PVMG &&
- (mg = mg_find(sv, PERL_MAGIC_qr))) /* assign deliberate */
+ if ( get_re_arg( aTHX_ sv, 0, NULL ) )
{
XSRETURN_YES;
} else {
@@ -79,6 +91,7 @@ regexp_pattern(sv)
PROTOTYPE: $
PREINIT:
MAGIC *mg;
+ regexp *re;
PPCODE:
{
/*
@@ -92,17 +105,10 @@ PPCODE:
on the object.
*/
- if (SvMAGICAL(sv))
- mg_get(sv);
- if (SvROK(sv) &&
- (sv = (SV*)SvRV(sv)) && /* assign deliberate */
- SvTYPE(sv) == SVt_PVMG &&
- (mg = mg_find(sv, PERL_MAGIC_qr))) /* assign deliberate */
+ if ( re = get_re_arg( aTHX_ sv, 0, &mg) ) /* assign deliberate */
{
-
/* Housten, we have a regex! */
SV *pattern;
- regexp *re = (regexp *)mg->mg_obj;
STRLEN patlen = 0;
STRLEN left = 0;
char reflags[6];
@@ -173,19 +179,13 @@ regmust(sv)
SV * sv
PROTOTYPE: $
PREINIT:
- MAGIC *mg;
+ regexp *re;
PPCODE:
{
- if (SvMAGICAL(sv))
- mg_get(sv);
- if (SvROK(sv) &&
- (sv = (SV*)SvRV(sv)) && /* assign deliberate */
- SvTYPE(sv) == SVt_PVMG &&
- (mg = mg_find(sv, PERL_MAGIC_qr))) /* assign deliberate */
+ if ( re = get_re_arg( aTHX_ sv, 0, 0) ) /* assign deliberate */
{
SV *an = &PL_sv_no;
SV *fl = &PL_sv_no;
- regexp *re = (regexp *)mg->mg_obj;
if (re->anchored_substr) {
an = newSVsv(re->anchored_substr);
} else if (re->anchored_utf8) {
@@ -202,3 +202,151 @@ PPCODE:
}
XSRETURN_UNDEF;
}
+
+void
+regname(sv, qr = NULL, all = NULL)
+ SV * sv
+ SV * qr
+ SV * all
+PROTOTYPE: ;$$$
+PREINIT:
+ regexp *re = NULL;
+ SV *bufs = NULL;
+PPCODE:
+{
+ re = get_re_arg( aTHX_ qr, 1, NULL);
+ if (SvPOK(sv) && re && re->paren_names) {
+ bufs = Perl_reg_named_buff_get(aTHX_ sv, re ,all && SvTRUE(all));
+ if (bufs) {
+ if (all && SvTRUE(all))
+ XPUSHs(newRV(bufs));
+ else
+ XPUSHs(SvREFCNT_inc(bufs));
+ XSRETURN(1);
+ }
+ }
+ XSRETURN_UNDEF;
+}
+
+void
+regnames(sv = NULL, all = NULL)
+ SV *sv
+ SV *all
+PROTOTYPE: ;$$
+PREINIT:
+ regexp *re = NULL;
+ IV count = 0;
+PPCODE:
+{
+ re = get_re_arg( aTHX_ sv, 1, NULL );
+ if (re && re->paren_names) {
+ HV *hv= re->paren_names;
+ (void)hv_iterinit(hv);
+ while (1) {
+ HE *temphe = hv_iternext_flags(hv,0);
+ if (temphe) {
+ IV i;
+ IV parno = 0;
+ SV* sv_dat = HeVAL(temphe);
+ I32 *nums = (I32*)SvPVX(sv_dat);
+ for ( i = 0; i < SvIVX(sv_dat); i++ ) {
+ if ((I32)(re->lastcloseparen) >= nums[i] &&
+ re->startp[nums[i]] != -1 &&
+ re->endp[nums[i]] != -1)
+ {
+ parno = nums[i];
+ break;
+ }
+ }
+ if (parno || (all && SvTRUE(all))) {
+ STRLEN len;
+ char *pv = HePV(temphe, len);
+ if ( GIMME_V == G_ARRAY )
+ XPUSHs(newSVpvn(pv,len));
+ count++;
+ }
+ } else {
+ break;
+ }
+ }
+ }
+ if ( GIMME_V == G_ARRAY )
+ XSRETURN(count);
+ else
+ XSRETURN_UNDEF;
+}
+
+void
+regnames_iterinit(sv = NULL)
+ SV * sv
+PROTOTYPE: ;$
+PREINIT:
+ regexp *re = NULL;
+PPCODE:
+{
+ re = get_re_arg( aTHX_ sv, 1, NULL );
+ if (re && re->paren_names) {
+ (void)hv_iterinit(re->paren_names);
+ XPUSHs(newSViv(HvTOTALKEYS(re->paren_names)));
+ } else {
+ XSRETURN_UNDEF;
+ }
+}
+
+void
+regnames_iternext(sv = NULL, all = NULL)
+ SV *sv
+ SV *all
+PROTOTYPE: ;$$
+PREINIT:
+ regexp *re;
+PPCODE:
+{
+ re = get_re_arg( aTHX_ sv, 1, NULL );
+ if (re && re->paren_names) {
+ HV *hv= re->paren_names;
+ while (1) {
+ HE *temphe = hv_iternext_flags(hv,0);
+ if (temphe) {
+ IV i;
+ IV parno = 0;
+ SV* sv_dat = HeVAL(temphe);
+ I32 *nums = (I32*)SvPVX(sv_dat);
+ for ( i = 0; i < SvIVX(sv_dat); i++ ) {
+ if ((I32)(re->lastcloseparen) >= nums[i] &&
+ re->startp[nums[i]] != -1 &&
+ re->endp[nums[i]] != -1)
+ {
+ parno = nums[i];
+ break;
+ }
+ }
+ if (parno || (all && SvTRUE(all))) {
+ STRLEN len;
+ char *pv = HePV(temphe, len);
+ XPUSHs(newSVpvn(pv,len));
+ XSRETURN(1);
+ }
+ } else {
+ break;
+ }
+ }
+ }
+ XSRETURN_UNDEF;
+}
+
+void
+regnames_count(sv = NULL)
+ SV * sv
+PROTOTYPE: ;$
+PREINIT:
+ regexp *re = NULL;
+PPCODE:
+{
+ re = get_re_arg( aTHX_ sv, 1, NULL );
+ if (re && re->paren_names) {
+ XPUSHs(newSViv(HvTOTALKEYS(re->paren_names)));
+ } else {
+ XSRETURN_UNDEF;
+ }
+}
diff --git a/ext/re/t/re_funcs.t b/ext/re/t/re_funcs.t
index f84e2b09a6..736829cbc4 100644
--- a/ext/re/t/re_funcs.t
+++ b/ext/re/t/re_funcs.t
@@ -13,7 +13,9 @@ BEGIN {
use strict;
use Test::More; # test count at bottom of file
-use re qw(is_regexp regexp_pattern regmust);
+use re qw(is_regexp regexp_pattern regmust
+ regname regnames regnames_count
+ regnames_iterinit regnames_iternext);
my $qr=qr/foo/i;
ok(is_regexp($qr),'is_regexp($qr)');
@@ -37,6 +39,48 @@ ok(!regexp_pattern(''),'!regexp_pattern("")');
is($floating,undef,"Regmust anchored - ref");
}
+
+if ('1234'=~/(?:(?<A>\d)|(?<C>!))(?<B>\d)(?<A>\d)(?<B>\d)/){
+ my $qr = qr/(?<foo>foo)(?<bar>bar)/;
+ my @names = sort +regnames($qr);
+ is("@names","","regnames");
+ @names = sort +regnames($qr,1);
+ is("@names","bar foo","regnames - all");
+ @names = sort +regnames();
+ is("@names","A B","regnames");
+ @names = sort +regnames(undef,1);
+ is("@names","A B C","regnames");
+ is(join("", @{regname("A",undef,1)}),"13");
+ is(join("", @{regname("B",undef,1)}),"24");
+ {
+ if ('foobar'=~/$qr/) {
+ regnames_iterinit();
+ my @res;
+ while (defined(my $key=regnames_iternext)) {
+ push @res,$key;
+ }
+ @res=sort @res;
+ is("@res","bar foo");
+ is(regnames_count(),2);
+ } else {
+ ok(0); ok(0);
+ }
+ }
+ is(regnames_count(),3);
+ is(regnames_count($qr),2);
+}
+{
+ use warnings;
+ require re::Tie::Hash::NamedCapture;
+ my $qr = qr/(?<foo>foo)/;
+ if ( 'foo' =~ /$qr/ ) {
+ tie my %hash,"re::Tie::Hash::NamedCapture",re => $qr;
+ if ('bar'=~/bar/) {
+ # last successful match is now different
+ is($hash{foo},'foo'); # prints foo
+ }
+ }
+}
# New tests above this line, don't forget to update the test count below!
-use Test::More tests => 12;
+use Test::More tests => 23;
# No tests here!