diff options
author | Karl Williamson <khw@khw-desktop.(none)> | 2010-07-19 22:26:43 -0600 |
---|---|---|
committer | Rafael Garcia-Suarez <rgs@consttype.org> | 2010-07-29 12:10:18 +0200 |
commit | 1850c8f94216e3e6bf08ca1f3121b4a91d01d1bf (patch) | |
tree | 3ca0eb84849e8b79f73a6ba5a5ad48c657386295 | |
parent | e32a881648677e322dff2b672a89f19c7d31b263 (diff) | |
download | perl-1850c8f94216e3e6bf08ca1f3121b4a91d01d1bf.tar.gz |
Refactor common parts of op.h, regexp.h into new .h
op.h and regexp.h share common elements in their data structures. They
have had to manually be kept in sync. This patch makes it easier by
putting those common parts into a common header #included by the two.
To do this, it seemed easiest to change the symbol definitions to use
left shifts to generate the flag bits. But this meant that regcomp.pl
and axt/B/defsubs_h.PL had to be taught to recognize those forms of
expressions, done in separate commits
-rw-r--r-- | MANIFEST | 1 | ||||
-rw-r--r-- | ext/B/defsubs_h.PL | 2 | ||||
-rw-r--r-- | op.h | 48 | ||||
-rw-r--r-- | op_reg_common.h | 27 | ||||
-rw-r--r-- | regcomp.pl | 47 | ||||
-rw-r--r-- | regexp.h | 60 |
6 files changed, 108 insertions, 77 deletions
@@ -3790,6 +3790,7 @@ opcode.h Automatically generated opcode header opcode.pl Opcode header generator op.h Opcode syntax tree header opnames.h Automatically generated opcode header +op_reg_common.h Common parts of op.h, regexp.h header os2/Changes Changelog for OS/2 port os2/diff.configure Patches to Configure os2/dlfcn.h Addon for dl_open diff --git a/ext/B/defsubs_h.PL b/ext/B/defsubs_h.PL index d8e1439f7d..b6d8aaaabe 100644 --- a/ext/B/defsubs_h.PL +++ b/ext/B/defsubs_h.PL @@ -76,7 +76,7 @@ if ($] < 5.011) { # giving the prefix to limit the names of symbols to define that come # from that file. If none, all symbols will be defined whose values # match the pattern below. -foreach my $tuple (['op.h'],['cop.h'],['regexp.h','RXf_']) +foreach my $tuple (['op_reg_common.h','(?:(?:RXf_)?PMf_)'],['op.h'],['cop.h'],['regexp.h','RXf_']) { my $file = $tuple->[0]; my $pfx = $tuple->[1] || ''; @@ -36,6 +36,7 @@ * the operation is privatized by a check routine, * which may or may not check number of children). */ +#include "op_reg_common.h" #define OPCODE U16 @@ -359,38 +360,39 @@ struct pmop { #define PM_SETRE(o,r) ((o)->op_pmregexp = (r)) #endif - -#define PMf_RETAINT 0x00000040 /* taint $1 etc. if target tainted */ +/* taint $1 etc. if target tainted */ +#define PMf_RETAINT (1<<(_RXf_PMf_SHIFT+1)) /* match successfully only once per reset, with related flag RXf_USED in * re->extflags holding state. This is used only for ?? matches, and only on * OP_MATCH and OP_QR */ -#define PMf_ONCE 0x00000080 -#define PMf_UNUSED 0x00000100 /* free for use */ -#define PMf_MAYBE_CONST 0x00000200 /* replacement contains variables */ +#define PMf_ONCE (1<<(_RXf_PMf_SHIFT+2)) + +/* replacement contains variables */ +#define PMf_MAYBE_CONST (1<<(_RXf_PMf_SHIFT+3)) + +/* PMf_ONCE has matched successfully. Not used under threading. */ +#define PMf_USED (1<<(_RXf_PMf_SHIFT+4)) + +/* subst replacement is constant */ +#define PMf_CONST (1<<(_RXf_PMf_SHIFT+5)) -/* PMf_ONCE has matched successfully. Not used under threading. */ -#define PMf_USED 0x00000400 +/* keep 1st runtime pattern forever */ +#define PMf_KEEP (1<<(_RXf_PMf_SHIFT+6)) +#define PMf_GLOBAL (1<<(_RXf_PMf_SHIFT+7)) /* pattern had a g modifier */ -#define PMf_CONST 0x00000800 /* subst replacement is constant */ -#define PMf_KEEP 0x00001000 /* keep 1st runtime pattern forever */ -#define PMf_GLOBAL 0x00002000 /* pattern had a g modifier */ -#define PMf_CONTINUE 0x00004000 /* don't reset pos() if //g fails */ -#define PMf_EVAL 0x00008000 /* evaluating replacement as expr */ +/* don't reset pos() if //g fails */ +#define PMf_CONTINUE (1<<(_RXf_PMf_SHIFT+8)) + +/* evaluating replacement as expr */ +#define PMf_EVAL (1<<(_RXf_PMf_SHIFT+9)) /* Return substituted string instead of modifying it. */ -#define PMf_NONDESTRUCT 0x00010000 +#define PMf_NONDESTRUCT (1<<(_RXf_PMf_SHIFT+10)) -/* The following flags have exact equivalents in regcomp.h with the prefix RXf_ - * which are stored in the regexp->extflags member. If you change them here, - * you have to change them there, and vice versa. - */ -#define PMf_MULTILINE 0x00000001 /* assume multiple lines */ -#define PMf_SINGLELINE 0x00000002 /* assume single line */ -#define PMf_FOLD 0x00000004 /* case insensitivity */ -#define PMf_EXTENDED 0x00000008 /* chuck embedded whitespace */ -#define PMf_KEEPCOPY 0x00000010 /* copy the string when matching */ -#define PMf_LOCALE 0x00000020 /* use locale for character types */ +#if _RXf_PMf_SHIFT+10 > 31 +# error Too many RXf_PMf bits used. See above and regnodes.h for any spare in middle +#endif /* mask of bits that need to be transfered to re->extflags */ #define PMf_COMPILETIME (PMf_MULTILINE|PMf_SINGLELINE|PMf_LOCALE|PMf_FOLD|PMf_EXTENDED|PMf_KEEPCOPY) diff --git a/op_reg_common.h b/op_reg_common.h new file mode 100644 index 0000000000..b0fd273b3d --- /dev/null +++ b/op_reg_common.h @@ -0,0 +1,27 @@ +/* op_reg_common.h + * + * Definitions common to by op.h and regexp.h + * + * Copyright (C) 2010 by Larry Wall and others + * + * You may distribute under the terms of either the GNU General Public + * License or the Artistic License, as specified in the README file. + * + */ + +/* These defines are used in both op.h and regexp.h The definitions use the + * shift form so that ext/B/defsubs_h.PL will pick them up */ +#define RXf_PMf_MULTILINE (1 << 0) /* /m */ +#define PMf_MULTILINE (1 << 0) /* /m */ +#define RXf_PMf_SINGLELINE (1 << 1) /* /s */ +#define PMf_SINGLELINE (1 << 1) /* /s */ +#define RXf_PMf_FOLD (1 << 2) /* /i */ +#define PMf_FOLD (1 << 2) /* /i */ +#define RXf_PMf_EXTENDED (1 << 3) /* /x */ +#define PMf_EXTENDED (1 << 3) /* /x */ +#define RXf_PMf_KEEPCOPY (1 << 4) /* /p */ +#define PMf_KEEPCOPY (1 << 4) /* /p */ +#define RXf_PMf_LOCALE (1 << 5) +#define PMf_LOCALE (1 << 5) + +#define _RXf_PMf_SHIFT 5 /* Begins with '_' so won't be exported by B */ diff --git a/regcomp.pl b/regcomp.pl index aa0f0fec45..d85482c710 100644 --- a/regcomp.pl +++ b/regcomp.pl @@ -256,36 +256,39 @@ EXTCONST char * PL_reg_extflags_name[]; EXTCONST char * const PL_reg_extflags_name[] = { EOP -open my $fh,"<","regexp.h" or die "Can't read regexp.h: $!"; my %rxfv; my %definitions; # Remember what the symbol definitions are my $val = 0; my %reverse; -while (<$fh>) { - - # optional leading '_'. Return symbol in $1, and strip it from - # rest of line - if (s/ \#define \s+ ( _? RXf_ \w+ ) \s+ //xi) { - chomp; - my $define = $1; - s: / \s* \* .*? \* \s* / : :x; # Replace comments by a blank - - # Replace any prior defined symbols by their values - foreach my $key (keys %definitions) { - s/\b$key\b/$definitions{$key}/g; - } - my $newval = eval $_; # Get numeric definition +foreach my $file ("op_reg_common.h", "regexp.h") { + open my $fh,"<", $file or die "Can't read $file: $!"; + while (<$fh>) { + + # optional leading '_'. Return symbol in $1, and strip it from + # rest of line + if (s/ \#define \s+ ( _? RXf_ \w+ ) \s+ //xi) { + chomp; + my $define = $1; + s: / \s* \* .*? \* \s* / : :x; # Replace comments by a blank + + # Replace any prior defined symbols by their values + foreach my $key (keys %definitions) { + s/\b$key\b/$definitions{$key}/g; + } + my $newval = eval $_; # Get numeric definition - $definitions{$define} = $newval; + $definitions{$define} = $newval; - if($val & $newval) { - die sprintf "Both $define and $reverse{$newval} use %08X", $newval; + next unless $_ =~ /<</; # Bit defines use left shift + if($val & $newval) { + die sprintf "Both $define and $reverse{$newval} use %08X", $newval; + } + $val|=$newval; + $rxfv{$define}= $newval; + $reverse{$newval} = $define; } - $val|=$newval; - $rxfv{$define}= $newval; - $reverse{$newval} = $define; } -} +} my %vrxf=reverse %rxfv; printf $out "\t/* Bits in extflags defined: %032b */\n",$val; for (0..31) { @@ -8,6 +8,8 @@ * */ +#include "op_reg_common.h" + /* * Definitions etc. for regexp(3) routines. * @@ -229,18 +231,11 @@ and check for NULL. /* 0x3F of extflags is used by (RXf_)PMf_COMPILETIME * If you change these you need to change the equivalent flags in op.h, and -/* vice versa. These need to be ordered so that the msix are contiguous + * vice versa. These need to be ordered so that the msix are contiguous * starting at bit 0, followed by the p; bit 0 is because of the shift below * being 0; see STD_PAT_MODS and INT_PAT_MODS below for the contiguity cause */ /* the flags above are transfered from the PMOP->op_pmflags member during * compilation */ -#define RXf_PMf_MULTILINE 0x00000001 /* /m */ -#define RXf_PMf_SINGLELINE 0x00000002 /* /s */ -#define RXf_PMf_FOLD 0x00000004 /* /i */ -#define RXf_PMf_EXTENDED 0x00000008 /* /x */ -#define RXf_PMf_KEEPCOPY 0x00000010 /* /p */ -#define RXf_PMf_LOCALE 0x00000020 /* use locale */ -/* these flags are transfered from the PMOP->op_pmflags member during compilation */ #define RXf_PMf_STD_PMMOD_SHIFT 0 #define RXf_PMf_STD_PMMOD (RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_FOLD|RXf_PMf_EXTENDED) #define RXf_PMf_COMPILETIME (RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_LOCALE|RXf_PMf_FOLD|RXf_PMf_EXTENDED|RXf_PMf_KEEPCOPY) @@ -297,53 +292,56 @@ and check for NULL. */ /* Anchor and GPOS related stuff */ -#define RXf_ANCH_BOL 0x00000100 -#define RXf_ANCH_MBOL 0x00000200 -#define RXf_ANCH_SBOL 0x00000400 -#define RXf_ANCH_GPOS 0x00000800 -#define RXf_GPOS_SEEN 0x00001000 -#define RXf_GPOS_FLOAT 0x00002000 +#define RXf_ANCH_BOL (1<<(_RXf_PMf_SHIFT+3)) +#define RXf_ANCH_MBOL (1<<(_RXf_PMf_SHIFT+4)) +#define RXf_ANCH_SBOL (1<<(_RXf_PMf_SHIFT+5)) +#define RXf_ANCH_GPOS (1<<(_RXf_PMf_SHIFT+6)) +#define RXf_GPOS_SEEN (1<<(_RXf_PMf_SHIFT+7)) +#define RXf_GPOS_FLOAT (1<<(_RXf_PMf_SHIFT+8)) /* two bits here */ #define RXf_ANCH (RXf_ANCH_BOL|RXf_ANCH_MBOL|RXf_ANCH_GPOS|RXf_ANCH_SBOL) #define RXf_GPOS_CHECK (RXf_GPOS_SEEN|RXf_ANCH_GPOS) #define RXf_ANCH_SINGLE (RXf_ANCH_SBOL|RXf_ANCH_GPOS) /* What we have seen */ -#define RXf_LOOKBEHIND_SEEN 0x00004000 -#define RXf_EVAL_SEEN 0x00008000 -#define RXf_CANY_SEEN 0x00010000 +#define RXf_LOOKBEHIND_SEEN (1<<(_RXf_PMf_SHIFT+9)) +#define RXf_EVAL_SEEN (1<<(_RXf_PMf_SHIFT+10)) +#define RXf_CANY_SEEN (1<<(_RXf_PMf_SHIFT+11)) /* Special */ -#define RXf_NOSCAN 0x00020000 -#define RXf_CHECK_ALL 0x00040000 +#define RXf_NOSCAN (1<<(_RXf_PMf_SHIFT+12)) +#define RXf_CHECK_ALL (1<<(_RXf_PMf_SHIFT+13)) /* UTF8 related */ -#define RXf_MATCH_UTF8 0x00100000 +#define RXf_MATCH_UTF8 (1<<(_RXf_PMf_SHIFT+15)) /* Intuit related */ -#define RXf_USE_INTUIT_NOML 0x00200000 -#define RXf_USE_INTUIT_ML 0x00400000 -#define RXf_INTUIT_TAIL 0x00800000 +#define RXf_USE_INTUIT_NOML (1<<(_RXf_PMf_SHIFT+16)) +#define RXf_USE_INTUIT_ML (1<<(_RXf_PMf_SHIFT+17)) +#define RXf_INTUIT_TAIL (1<<(_RXf_PMf_SHIFT+18)) /* Set in Perl_pmruntime if op_flags & OPf_SPECIAL, i.e. split. Will be used by regex engines to check whether they should set RXf_SKIPWHITE */ -#define RXf_SPLIT 0x01000000 +#define RXf_SPLIT (1<<(_RXf_PMf_SHIFT+19)) #define RXf_USE_INTUIT (RXf_USE_INTUIT_NOML|RXf_USE_INTUIT_ML) /* Copy and tainted info */ -#define RXf_COPY_DONE 0x02000000 -#define RXf_TAINTED_SEEN 0x04000000 -#define RXf_TAINTED 0x08000000 /* this pattern is tainted */ +#define RXf_COPY_DONE (1<<(_RXf_PMf_SHIFT+20)) +#define RXf_TAINTED_SEEN (1<<(_RXf_PMf_SHIFT+21)) +#define RXf_TAINTED (1<<(_RXf_PMf_SHIFT+22)) /* this pattern is tainted */ /* Flags indicating special patterns */ -#define RXf_START_ONLY 0x10000000 /* Pattern is /^/ */ -#define RXf_SKIPWHITE 0x20000000 /* Pattern is for a split / / */ -#define RXf_WHITE 0x40000000 /* Pattern is /\s+/ */ -#define RXf_NULL 0x80000000 /* Pattern is // */ +#define RXf_START_ONLY (1<<(_RXf_PMf_SHIFT+23)) /* Pattern is /^/ */ +#define RXf_SKIPWHITE (1<<(_RXf_PMf_SHIFT+24)) /* Pattern is for a split / / */ +#define RXf_WHITE (1<<(_RXf_PMf_SHIFT+25)) /* Pattern is /\s+/ */ +#define RXf_NULL (1<<(_RXf_PMf_SHIFT+26)) /* Pattern is // */ +#if _RXf_PMf_SHIFT+23 > 31 +# error Too many RXf_PMf bits used. See regnodes.h for any spare in middle +#endif /* * NOTE: if you modify any RXf flags you should run regen.pl or regcomp.pl |