summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAbigail <abigail@abigail.be>2007-06-30 01:38:07 +0200
committerYves Orton <demerphq@gmail.com>2007-06-30 15:37:41 +0000
commitf7819f85d6af0dcf9114284a4fe5ef21855e4e5a (patch)
tree0f20f81b6e703acd015e53b4400a73956f2cf7a4
parentc4a7531db1b7667c9d43fd3494f5bbf4901ff149 (diff)
downloadperl-f7819f85d6af0dcf9114284a4fe5ef21855e4e5a.tar.gz
/p vs (?p)
Date: Fri, 29 Jun 2007 23:38:07 +0200 Message-ID: <20070629213807.GA14454@abigail.nl> Subject: [PATCH pod/perlre.pod] Keeping up with the changes. From: Abigail <abigail@abigail.be> Date: Sat, 30 Jun 2007 01:24:36 +0200 Message-ID: <20070629232436.GA15326@abigail.nl> Plus tweaks, and debug enahancements. p4raw-id: //depot/perl@31506
-rw-r--r--embed.fnc2
-rw-r--r--embed.h4
-rw-r--r--ext/re/re.pm1
-rw-r--r--globvar.sym1
-rw-r--r--pod/perlre.pod10
-rw-r--r--proto.h4
-rw-r--r--regcomp.c49
-rw-r--r--regcomp.h4
-rw-r--r--regcomp.pl31
-rw-r--r--regexp.h2
-rw-r--r--regnodes.h42
-rw-r--r--t/op/reg_pmod.t22
-rw-r--r--win32/Makefile2
13 files changed, 146 insertions, 28 deletions
diff --git a/embed.fnc b/embed.fnc
index fbd6ec735a..14f5292e78 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -672,6 +672,7 @@ Ap |void |push_scope
Amb |OP* |ref |NULLOK OP* o|I32 type
p |OP* |refkids |NULLOK OP* o|I32 type
Ap |void |regdump |NN const regexp* r
+Ap |void |regdump |NN const regexp* r
Ap |SV* |regclass_swash |NULLOK const regexp *prog|NN const struct regnode *n|bool doinit|NULLOK SV **listsvp|NULLOK SV **altsvp
Ap |I32 |pregexec |NN REGEXP * const prog|NN char* stringarg \
|NN char* strend|NN char* strbeg|I32 minend \
@@ -1403,6 +1404,7 @@ Es |I32 |make_trie |NN struct RExC_state_t* state|NN regnode *startbranch \
Es |void |make_trie_failtable |NN struct RExC_state_t* state \
|NN regnode *source|NN regnode *node|U32 depth
# ifdef DEBUGGING
+Es |void |regdump_extflags|NULLOK const char *lead| const U32 flags
Es |const regnode*|dumpuntil|NN const regexp *r|NN const regnode *start \
|NN const regnode *node \
|NULLOK const regnode *last \
diff --git a/embed.h b/embed.h
index bfa2cd1739..9c273fc4f5 100644
--- a/embed.h
+++ b/embed.h
@@ -686,6 +686,7 @@
#define refkids Perl_refkids
#endif
#define regdump Perl_regdump
+#define regdump Perl_regdump
#define regclass_swash Perl_regclass_swash
#define pregexec Perl_pregexec
#define pregfree Perl_pregfree
@@ -1404,6 +1405,7 @@
#endif
# ifdef DEBUGGING
#if defined(PERL_CORE) || defined(PERL_EXT)
+#define regdump_extflags S_regdump_extflags
#define dumpuntil S_dumpuntil
#define put_byte S_put_byte
#define dump_trie S_dump_trie
@@ -2973,6 +2975,7 @@
#define refkids(a,b) Perl_refkids(aTHX_ a,b)
#endif
#define regdump(a) Perl_regdump(aTHX_ a)
+#define regdump(a) Perl_regdump(aTHX_ a)
#define regclass_swash(a,b,c,d,e) Perl_regclass_swash(aTHX_ a,b,c,d,e)
#define pregexec(a,b,c,d,e,f,g) Perl_pregexec(aTHX_ a,b,c,d,e,f,g)
#define pregfree(a) Perl_pregfree(aTHX_ a)
@@ -3691,6 +3694,7 @@
#endif
# ifdef DEBUGGING
#if defined(PERL_CORE) || defined(PERL_EXT)
+#define regdump_extflags(a,b) S_regdump_extflags(aTHX_ a,b)
#define dumpuntil(a,b,c,d,e,f,g,h) S_dumpuntil(aTHX_ a,b,c,d,e,f,g,h)
#define put_byte(a,b) S_put_byte(aTHX_ a,b)
#define dump_trie(a,b,c,d) S_dump_trie(aTHX_ a,b,c,d)
diff --git a/ext/re/re.pm b/ext/re/re.pm
index 61e373ef18..0cf5376e86 100644
--- a/ext/re/re.pm
+++ b/ext/re/re.pm
@@ -53,6 +53,7 @@ my %flags = (
OPTIMISE => 0x000002,
TRIEC => 0x000004,
DUMP => 0x000008,
+ FLAGS => 0x000010,
EXECUTE => 0x00FF00,
INTUIT => 0x000100,
diff --git a/globvar.sym b/globvar.sym
index bb5f58f208..d98b4d306f 100644
--- a/globvar.sym
+++ b/globvar.sym
@@ -29,6 +29,7 @@ opargs
ppaddr
regkind
reg_name
+reg_extflags_name
sig_name
sig_num
simple
diff --git a/pod/perlre.pod b/pod/perlre.pod
index 0f9ded3d0c..0bfd09ceae 100644
--- a/pod/perlre.pod
+++ b/pod/perlre.pod
@@ -237,7 +237,7 @@ You'll need to write something like C<m/\Quser\E\@\Qhost/>.
In addition, Perl defines the following:
X<\w> X<\W> X<\s> X<\S> X<\d> X<\D> X<\X> X<\p> X<\P> X<\C>
-X<\g> X<\k> X<\N> X<\K> X<\v> X<\V>
+X<\g> X<\k> X<\N> X<\K> X<\v> X<\V> X<\h> X<\H>
X<word> X<whitespace> X<character class> X<backreference>
\w Match a "word" character (alphanumeric plus "_")
@@ -670,7 +670,7 @@ whitespace formatting, a simple C<#> will suffice. Note that Perl closes
the comment as soon as it sees a C<)>, so there is no way to put a literal
C<)> in the comment.
-=item C<(?kimsx-imsx)>
+=item C<(?pimsx-imsx)>
X<(?)>
One or more embedded pattern-match modifiers, to be turned on (or
@@ -1346,7 +1346,7 @@ argument, then C<$REGERROR> and C<$REGMARK> are not touched at all.
=over 4
=item C<(*PRUNE)> C<(*PRUNE:NAME)>
-X<(*PRUNE)> X<(*PRUNE:NAME)> X<\v>
+X<(*PRUNE)> X<(*PRUNE:NAME)>
This zero-width pattern prunes the backtracking tree at the current point
when backtracked into on failure. Consider the pattern C<A (*PRUNE) B>,
@@ -1356,8 +1356,6 @@ continues in B, which may also backtrack as necessary; however, should B
not match, then no further backtracking will take place, and the pattern
will fail outright at the current starting position.
-As a shortcut, C<\v> is exactly equivalent to C<(*PRUNE)>.
-
The following example counts all the possible matching strings in a
pattern (without actually matching any of them).
@@ -1409,8 +1407,6 @@ of this pattern. This effectively means that the regex engine "skips" forward
to this position on failure and tries to match again, (assuming that
there is sufficient room to match).
-As a shortcut C<\V> is exactly equivalent to C<(*SKIP)>.
-
The name of the C<(*SKIP:NAME)> pattern has special significance. If a
C<(*MARK:NAME)> was encountered while matching, then it is that position
which is used as the "skip point". If no C<(*MARK)> of that name was
diff --git a/proto.h b/proto.h
index aa659507e6..13bd3c24ff 100644
--- a/proto.h
+++ b/proto.h
@@ -1843,6 +1843,9 @@ PERL_CALLCONV OP* Perl_refkids(pTHX_ OP* o, I32 type);
PERL_CALLCONV void Perl_regdump(pTHX_ const regexp* r)
__attribute__nonnull__(pTHX_1);
+PERL_CALLCONV void Perl_regdump(pTHX_ const regexp* r)
+ __attribute__nonnull__(pTHX_1);
+
PERL_CALLCONV SV* Perl_regclass_swash(pTHX_ const regexp *prog, const struct regnode *n, bool doinit, SV **listsvp, SV **altsvp)
__attribute__nonnull__(pTHX_2);
@@ -3791,6 +3794,7 @@ STATIC void S_make_trie_failtable(pTHX_ struct RExC_state_t* state, regnode *sou
__attribute__nonnull__(pTHX_3);
# ifdef DEBUGGING
+STATIC void S_regdump_extflags(pTHX_ const char *lead, const U32 flags);
STATIC const regnode* S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node, const regnode *last, const regnode *plast, SV* sv, I32 indent, U32 depth)
__attribute__nonnull__(pTHX_1)
__attribute__nonnull__(pTHX_2)
diff --git a/regcomp.c b/regcomp.c
index 0f87282744..71cf68aca5 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -4243,21 +4243,21 @@ redo_first_pass:
r->prelen = plen;
r->extflags = pm_flags;
{
- bool has_k = ((r->extflags & RXf_PMf_KEEPCOPY) == RXf_PMf_KEEPCOPY);
+ bool has_p = ((r->extflags & RXf_PMf_KEEPCOPY) == RXf_PMf_KEEPCOPY);
bool has_minus = ((r->extflags & RXf_PMf_STD_PMMOD) != RXf_PMf_STD_PMMOD);
bool has_runon = ((RExC_seen & REG_SEEN_RUN_ON_COMMENT)==REG_SEEN_RUN_ON_COMMENT);
U16 reganch = (U16)((r->extflags & RXf_PMf_STD_PMMOD) >> 12);
const char *fptr = STD_PAT_MODS; /*"msix"*/
char *p;
- r->wraplen = r->prelen + has_minus + has_k + has_runon
+ r->wraplen = r->prelen + has_minus + has_p + has_runon
+ (sizeof(STD_PAT_MODS) - 1)
+ (sizeof("(?:)") - 1);
Newx(r->wrapped, r->wraplen + 1, char );
p = r->wrapped;
*p++='('; *p++='?';
- if (has_k)
- *p++ = KEEPCOPY_PAT_MOD; /*'k'*/
+ if (has_p)
+ *p++ = KEEPCOPY_PAT_MOD; /*'p'*/
{
char *r = p + (sizeof(STD_PAT_MODS) - 1) + has_minus - 1;
char *colon = r + 1;
@@ -4362,7 +4362,7 @@ reStudy:
#endif
/* Dig out information for optimizations. */
- r->extflags = pm_flags; /* Again? */
+ r->extflags = RExC_flags; /* was pm_op */
/*dmq: removed as part of de-PMOP: pm->op_pmflags = RExC_flags; */
if (UTF)
@@ -5291,7 +5291,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
register regnode *ender = NULL;
register I32 parno = 0;
I32 flags;
- const I32 oregflags = RExC_flags;
+ U32 oregflags = RExC_flags;
bool have_branch = 0;
bool is_open = 0;
I32 freeze_paren = 0;
@@ -5890,8 +5890,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
and must be globally applied -- japhy */
switch (*RExC_parse) {
CASE_STD_PMMOD_FLAGS_PARSE_SET(flagsp);
- case 'o':
- case 'g':
+ case ONCE_PAT_MOD: /* 'o' */
+ case GLOBAL_PAT_MOD: /* 'g' */
if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
const I32 wflagbit = *RExC_parse == 'o' ? WASTED_O : WASTED_G;
if (! (wastedflags & wflagbit) ) {
@@ -5908,7 +5908,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
}
break;
- case 'c':
+ case CONTINUE_PAT_MOD: /* 'c' */
if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
if (! (wastedflags & WASTED_C) ) {
wastedflags |= WASTED_GC;
@@ -5921,10 +5921,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
}
}
break;
- case 'k':
+ case KEEPCOPY_PAT_MOD: /* 'p' */
if (flagsp == &negflags) {
if (SIZE_ONLY && ckWARN(WARN_REGEXP))
- vWARN(RExC_parse + 1,"Useless use of (?-k)");
+ vWARN(RExC_parse + 1,"Useless use of (?-p)");
} else {
*flagsp |= RXf_PMf_KEEPCOPY;
}
@@ -5944,6 +5944,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
case ')':
RExC_flags |= posflags;
RExC_flags &= ~negflags;
+ if (paren != ':') {
+ oregflags |= posflags;
+ oregflags &= ~negflags;
+ }
nextchar(pRExC_state);
if (paren != ':') {
*flagp = TRYAGAIN;
@@ -8633,6 +8637,27 @@ S_regcurly(register const char *s)
/*
- regdump - dump a regexp onto Perl_debug_log in vaguely comprehensible form
*/
+#ifdef DEBUGGING
+void
+S_regdump_extflags(pTHX_ const char *lead, const U32 flags) {
+ int bit;
+ int set=0;
+ for (bit=0; bit<32; bit++) {
+ if (flags & (1<<bit)) {
+ if (!set++ && lead)
+ PerlIO_printf(Perl_debug_log, "%s",lead);
+ PerlIO_printf(Perl_debug_log, "%s ",PL_reg_extflags_name[bit]);
+ }
+ }
+ if (lead) {
+ if (set)
+ PerlIO_printf(Perl_debug_log, "\n");
+ else
+ PerlIO_printf(Perl_debug_log, "%s[none-set]\n",lead);
+ }
+}
+#endif
+
void
Perl_regdump(pTHX_ const regexp *r)
{
@@ -8641,6 +8666,7 @@ Perl_regdump(pTHX_ const regexp *r)
SV * const sv = sv_newmortal();
SV *dsv= sv_newmortal();
RXi_GET_DECL(r,ri);
+ GET_RE_DEBUG_FLAGS_DECL;
(void)dumpuntil(r, ri->program, ri->program + 1, NULL, NULL, sv, 0, 0);
@@ -8714,6 +8740,7 @@ Perl_regdump(pTHX_ const regexp *r)
if (r->extflags & RXf_EVAL_SEEN)
PerlIO_printf(Perl_debug_log, "with eval ");
PerlIO_printf(Perl_debug_log, "\n");
+ DEBUG_FLAGS_r(regdump_extflags("r->extflags: ",r->extflags));
#else
PERL_UNUSED_CONTEXT;
PERL_UNUSED_ARG(r);
diff --git a/regcomp.h b/regcomp.h
index 8dbeaf1833..8f14a200c1 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -691,6 +691,7 @@ re.pm, especially to the documentation.
#define RE_DEBUG_COMPILE_OPTIMISE 0x000002
#define RE_DEBUG_COMPILE_TRIE 0x000004
#define RE_DEBUG_COMPILE_DUMP 0x000008
+#define RE_DEBUG_COMPILE_FLAGS 0x000010
/* Execute */
#define RE_DEBUG_EXECUTE_MASK 0x00FF00
@@ -723,7 +724,8 @@ re.pm, especially to the documentation.
if (re_debug_flags & RE_DEBUG_COMPILE_DUMP) x )
#define DEBUG_TRIE_COMPILE_r(x) DEBUG_r( \
if (re_debug_flags & RE_DEBUG_COMPILE_TRIE) x )
-
+#define DEBUG_FLAGS_r(x) DEBUG_r( \
+ if (re_debug_flags & RE_DEBUG_COMPILE_FLAGS) x )
/* Execute */
#define DEBUG_EXECUTE_r(x) DEBUG_r( \
if (re_debug_flags & RE_DEBUG_EXECUTE_MASK) x )
diff --git a/regcomp.pl b/regcomp.pl
index 3ba699b0b6..17472cc811 100644
--- a/regcomp.pl
+++ b/regcomp.pl
@@ -187,9 +187,38 @@ print OUT <<EOP;
};
#endif /* DOINIT */
-/* ex: set ro: */
+/* PL_reg_extflags_name[] - Opcode/state names in string form, for debugging */
+
+#ifndef DOINIT
+EXTCONST char * PL_reg_extflags_name[];
+#else
+EXTCONST char * const PL_reg_extflags_name[] = {
EOP
+open my $fh,"<","regexp.h" or die "Can't read regexp.h: $!";
+my %rxfv;
+my $val;
+while (<$fh>) {
+ if (/#define\s+(RXf_\w+)\s+(0x[A-F\d]+)/i) {
+ $rxfv{$1}= eval $2;
+ $val|=$rxfv{$1};
+ }
+}
+my %vrxf=reverse %rxfv;
+printf OUT "\t/* Bits in extflags defined: %032b */\n",$val;
+for (0..31) {
+ my $n=$vrxf{2**$_}||"UNUSED_BIT_$_";
+ $n=~s/^RXf_(PMf_)?//;
+ printf OUT qq(\t%-20s/* 0x%08x */\n),
+ qq("$n",),2**$_;
+}
+
+print OUT <<EOP;
+};
+#endif /* DOINIT */
+
+/* ex: set ro: */
+EOP
close OUT or die "close $tmp_h: $!";
safer_rename $tmp_h, 'regnodes.h';
diff --git a/regexp.h b/regexp.h
index bb3a64077c..27f17e71c9 100644
--- a/regexp.h
+++ b/regexp.h
@@ -247,7 +247,7 @@ and check for NULL.
#define RXf_PMf_SINGLELINE 0x00002000 /* /s */
#define RXf_PMf_FOLD 0x00004000 /* /i */
#define RXf_PMf_EXTENDED 0x00008000 /* /x */
-#define RXf_PMf_KEEPCOPY 0x00010000 /* /k */
+#define RXf_PMf_KEEPCOPY 0x00010000 /* /p */
/* these flags are transfered from the PMOP->op_pmflags member during compilation */
#define RXf_PMf_STD_PMMOD (RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_FOLD|RXf_PMf_EXTENDED)
#define RXf_PMf_COMPILETIME (RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_LOCALE|RXf_PMf_FOLD|RXf_PMf_EXTENDED|RXf_PMf_KEEPCOPY)
diff --git a/regnodes.h b/regnodes.h
index 4e0f44d5ca..0a19006d55 100644
--- a/regnodes.h
+++ b/regnodes.h
@@ -619,4 +619,46 @@ EXTCONST char * const PL_reg_name[] = {
};
#endif /* DOINIT */
+/* PL_reg_extflags_name[] - Opcode/state names in string form, for debugging */
+
+#ifndef DOINIT
+EXTCONST char * PL_reg_extflags_name[];
+#else
+EXTCONST char * const PL_reg_extflags_name[] = {
+ /* Bits in extflags defined: 10111111111111111111111100111111 */
+ "ANCH_BOL", /* 0x00000001 */
+ "ANCH_MBOL", /* 0x00000002 */
+ "ANCH_SBOL", /* 0x00000004 */
+ "ANCH_GPOS", /* 0x00000008 */
+ "GPOS_SEEN", /* 0x00000010 */
+ "GPOS_FLOAT", /* 0x00000020 */
+ "UNUSED_BIT_6", /* 0x00000040 */
+ "UNUSED_BIT_7", /* 0x00000080 */
+ "SKIPWHITE", /* 0x00000100 */
+ "START_ONLY", /* 0x00000200 */
+ "WHITE", /* 0x00000400 */
+ "LOCALE", /* 0x00000800 */
+ "MULTILINE", /* 0x00001000 */
+ "SINGLELINE", /* 0x00002000 */
+ "FOLD", /* 0x00004000 */
+ "EXTENDED", /* 0x00008000 */
+ "KEEPCOPY", /* 0x00010000 */
+ "LOOKBEHIND_SEEN", /* 0x00020000 */
+ "EVAL_SEEN", /* 0x00040000 */
+ "CANY_SEEN", /* 0x00080000 */
+ "NOSCAN", /* 0x00100000 */
+ "CHECK_ALL", /* 0x00200000 */
+ "UTF8", /* 0x00400000 */
+ "MATCH_UTF8", /* 0x00800000 */
+ "USE_INTUIT_NOML", /* 0x01000000 */
+ "USE_INTUIT_ML", /* 0x02000000 */
+ "INTUIT_TAIL", /* 0x04000000 */
+ "SPLIT", /* 0x08000000 */
+ "COPY_DONE", /* 0x10000000 */
+ "TAINTED_SEEN", /* 0x20000000 */
+ "UNUSED_BIT_30", /* 0x40000000 */
+ "TAINTED", /* 0x80000000 */
+};
+#endif /* DOINIT */
+
/* ex: set ro: */
diff --git a/t/op/reg_pmod.t b/t/op/reg_pmod.t
index e20b859bef..301aeefc6d 100644
--- a/t/op/reg_pmod.t
+++ b/t/op/reg_pmod.t
@@ -10,10 +10,11 @@ use strict;
use warnings;
our @tests = (
- # /p Pattern PRE MATCH POST
- [ 'p', "456", "123-", "456", "-789"],
- [ '', "(456)", "123-", "456", "-789"],
- [ '', "456", undef, undef, undef ],
+ # /p Pattern PRE MATCH POST
+ [ '/p', "456", "123-", "456", "-789"],
+ [ '(?p)', "456", "123-", "456", "-789"],
+ [ '', "(456)", "123-", "456", "-789"],
+ [ '', "456", undef, undef, undef ],
);
plan tests => 4 * @tests + 2;
@@ -25,8 +26,17 @@ sub _u($$) { "$_[0] is ".(defined $_[1] ? "'$_[1]'" : "undef") }
$_ = '123-456-789';
foreach my $test (@tests) {
my ($p, $pat,$l,$m,$r) = @$test;
- my $test_name = "/$pat/$p";
- my $ok = ok($p ? /$pat/p : /$pat/, $test_name);
+ my $test_name = $p eq '/p' ? "/$pat/p"
+ : $p eq '(?p)' ? "/(?p)$pat/"
+ : "/$pat/";
+
+ #
+ # Cannot use if/else due to the scope invalidating ${^MATCH} and friends.
+ #
+ my $ok = ok $p eq '/p' ? /$pat/p
+ : $p eq '(?p)' ? /(?p)$pat/
+ : /$pat/
+ => $test_name;
SKIP: {
skip "/$pat/$p failed to match", 3
unless $ok;
diff --git a/win32/Makefile b/win32/Makefile
index 195ca49631..001c7f380a 100644
--- a/win32/Makefile
+++ b/win32/Makefile
@@ -818,7 +818,7 @@ all : .\config.h $(GLOBEXE) $(MINIMOD) $(CONFIGPM) $(UNIDATAFILES) $(PERLEXE) \
$(X2P) MakePPPort Extensions $(PERLSTATIC)
@echo Everything is up to date. '$(MAKE_BARE) test' to run test suite.
-..\regnodes.h : ..\regcomp.sym
+..\regnodes.h : ..\regcomp.sym ..\regcomp.pl ..\regexp.h
cd ..
regcomp.pl
cd win32