diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2013-09-06 17:47:32 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2013-09-06 17:47:32 +0000 |
commit | 4d51fcedd5d47aa4314221fd50ba6e5985842b72 (patch) | |
tree | 2881426e5e03c718fc5eac6ea7efec463c9b2b2d /pcre_internal.h | |
parent | 6737b0da8eb044245ad60bb89559c5e542178e12 (diff) | |
download | pcre-4d51fcedd5d47aa4314221fd50ba6e5985842b72.tar.gz |
Make back references to duplicated named subpatterns more like Perl.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1361 2f5784b3-3f2a-0410-8824-cb99058d5e15
Diffstat (limited to 'pcre_internal.h')
-rw-r--r-- | pcre_internal.h | 98 |
1 files changed, 52 insertions, 46 deletions
diff --git a/pcre_internal.h b/pcre_internal.h index ec9fca6..fa3af98 100644 --- a/pcre_internal.h +++ b/pcre_internal.h @@ -2055,79 +2055,81 @@ enum { class. This does both positive and negative. */ OP_REF, /* 109 Match a back reference, casefully */ OP_REFI, /* 110 Match a back reference, caselessly */ - OP_RECURSE, /* 111 Match a numbered subpattern (possibly recursive) */ - OP_CALLOUT, /* 112 Call out to external function if provided */ + OP_DNREF, /* 111 Match a duplicate name backref, casefully */ + OP_DNREFI, /* 112 Match a duplicate name backref, caselessly */ + OP_RECURSE, /* 113 Match a numbered subpattern (possibly recursive) */ + OP_CALLOUT, /* 114 Call out to external function if provided */ - OP_ALT, /* 113 Start of alternation */ - OP_KET, /* 114 End of group that doesn't have an unbounded repeat */ - OP_KETRMAX, /* 115 These two must remain together and in this */ - OP_KETRMIN, /* 116 order. They are for groups the repeat for ever. */ - OP_KETRPOS, /* 117 Possessive unlimited repeat. */ + OP_ALT, /* 115 Start of alternation */ + OP_KET, /* 116 End of group that doesn't have an unbounded repeat */ + OP_KETRMAX, /* 117 These two must remain together and in this */ + OP_KETRMIN, /* 118 order. They are for groups the repeat for ever. */ + OP_KETRPOS, /* 119 Possessive unlimited repeat. */ /* The assertions must come before BRA, CBRA, ONCE, and COND, and the four asserts must remain in order. */ - OP_REVERSE, /* 118 Move pointer back - used in lookbehind assertions */ - OP_ASSERT, /* 119 Positive lookahead */ - OP_ASSERT_NOT, /* 120 Negative lookahead */ - OP_ASSERTBACK, /* 121 Positive lookbehind */ - OP_ASSERTBACK_NOT, /* 122 Negative lookbehind */ + OP_REVERSE, /* 129 Move pointer back - used in lookbehind assertions */ + OP_ASSERT, /* 121 Positive lookahead */ + OP_ASSERT_NOT, /* 122 Negative lookahead */ + OP_ASSERTBACK, /* 123 Positive lookbehind */ + OP_ASSERTBACK_NOT, /* 124 Negative lookbehind */ /* ONCE, ONCE_NC, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come immediately after the assertions, with ONCE first, as there's a test for >= ONCE for a subpattern that isn't an assertion. The POS versions must immediately follow the non-POS versions in each case. */ - OP_ONCE, /* 123 Atomic group, contains captures */ - OP_ONCE_NC, /* 124 Atomic group containing no captures */ - OP_BRA, /* 125 Start of non-capturing bracket */ - OP_BRAPOS, /* 126 Ditto, with unlimited, possessive repeat */ - OP_CBRA, /* 127 Start of capturing bracket */ - OP_CBRAPOS, /* 128 Ditto, with unlimited, possessive repeat */ - OP_COND, /* 129 Conditional group */ + OP_ONCE, /* 125 Atomic group, contains captures */ + OP_ONCE_NC, /* 126 Atomic group containing no captures */ + OP_BRA, /* 127 Start of non-capturing bracket */ + OP_BRAPOS, /* 128 Ditto, with unlimited, possessive repeat */ + OP_CBRA, /* 129 Start of capturing bracket */ + OP_CBRAPOS, /* 130 Ditto, with unlimited, possessive repeat */ + OP_COND, /* 131 Conditional group */ /* These five must follow the previous five, in the same order. There's a check for >= SBRA to distinguish the two sets. */ - OP_SBRA, /* 130 Start of non-capturing bracket, check empty */ - OP_SBRAPOS, /* 131 Ditto, with unlimited, possessive repeat */ - OP_SCBRA, /* 132 Start of capturing bracket, check empty */ - OP_SCBRAPOS, /* 133 Ditto, with unlimited, possessive repeat */ - OP_SCOND, /* 134 Conditional group, check empty */ + OP_SBRA, /* 132 Start of non-capturing bracket, check empty */ + OP_SBRAPOS, /* 133 Ditto, with unlimited, possessive repeat */ + OP_SCBRA, /* 134 Start of capturing bracket, check empty */ + OP_SCBRAPOS, /* 135 Ditto, with unlimited, possessive repeat */ + OP_SCOND, /* 136 Conditional group, check empty */ /* The next two pairs must (respectively) be kept together. */ - OP_CREF, /* 135 Used to hold a capture number as condition */ - OP_NCREF, /* 136 Same, but generated by a name reference*/ - OP_RREF, /* 137 Used to hold a recursion number as condition */ - OP_NRREF, /* 138 Same, but generated by a name reference*/ - OP_DEF, /* 139 The DEFINE condition */ + OP_CREF, /* 137 Used to hold a capture number as condition */ + OP_NCREF, /* 138 Same, but generated by a name reference*/ + OP_RREF, /* 139 Used to hold a recursion number as condition */ + OP_NRREF, /* 140 Same, but generated by a name reference*/ + OP_DEF, /* 141 The DEFINE condition */ - OP_BRAZERO, /* 140 These two must remain together and in this */ - OP_BRAMINZERO, /* 141 order. */ - OP_BRAPOSZERO, /* 142 */ + OP_BRAZERO, /* 142 These two must remain together and in this */ + OP_BRAMINZERO, /* 143 order. */ + OP_BRAPOSZERO, /* 144 */ /* These are backtracking control verbs */ - OP_MARK, /* 143 always has an argument */ - OP_PRUNE, /* 144 */ - OP_PRUNE_ARG, /* 145 same, but with argument */ - OP_SKIP, /* 146 */ - OP_SKIP_ARG, /* 147 same, but with argument */ - OP_THEN, /* 148 */ - OP_THEN_ARG, /* 149 same, but with argument */ - OP_COMMIT, /* 150 */ + OP_MARK, /* 145 always has an argument */ + OP_PRUNE, /* 146 */ + OP_PRUNE_ARG, /* 147 same, but with argument */ + OP_SKIP, /* 148 */ + OP_SKIP_ARG, /* 149 same, but with argument */ + OP_THEN, /* 150 */ + OP_THEN_ARG, /* 151 same, but with argument */ + OP_COMMIT, /* 152 */ /* These are forced failure and success verbs */ - OP_FAIL, /* 151 */ - OP_ACCEPT, /* 152 */ - OP_ASSERT_ACCEPT, /* 153 Used inside assertions */ - OP_CLOSE, /* 154 Used before OP_ACCEPT to close open captures */ + OP_FAIL, /* 153 */ + OP_ACCEPT, /* 154 */ + OP_ASSERT_ACCEPT, /* 155 Used inside assertions */ + OP_CLOSE, /* 156 Used before OP_ACCEPT to close open captures */ /* This is used to skip a subpattern with a {0} quantifier */ - OP_SKIPZERO, /* 155 */ + OP_SKIPZERO, /* 157 */ /* This is not an opcode, but is used to check that tables indexed by opcode are the correct length, in order to catch updating errors - there have been @@ -2167,7 +2169,7 @@ some cases doesn't actually use these names at all). */ "*", "*?", "+", "+?", "?", "??", "{", "{", "{", \ "*+","++", "?+", "{", \ "*", "*?", "+", "+?", "?", "??", "{", "{", \ - "class", "nclass", "xclass", "Ref", "Refi", \ + "class", "nclass", "xclass", "Ref", "Refi", "DnRef", "DnRefi", \ "Recurse", "Callout", \ "Alt", "Ket", "KetRmax", "KetRmin", "KetRpos", \ "Reverse", "Assert", "Assert not", "AssertB", "AssertB not", \ @@ -2237,6 +2239,8 @@ in UTF-8 mode. The code that uses this table must know about such things. */ 0, /* XCLASS - variable length */ \ 1+IMM2_SIZE, /* REF */ \ 1+IMM2_SIZE, /* REFI */ \ + 1+2*IMM2_SIZE, /* DNREF */ \ + 1+2*IMM2_SIZE, /* DNREFI */ \ 1+LINK_SIZE, /* RECURSE */ \ 2+2*LINK_SIZE, /* CALLOUT */ \ 1+LINK_SIZE, /* Alt */ \ @@ -2441,6 +2445,7 @@ typedef struct compile_data { int max_lookbehind; /* Maximum lookbehind (characters) */ int top_backref; /* Maximum back reference */ unsigned int backref_map; /* Bitmap of low back refs */ + unsigned int namedrefcount; /* Number of backreferences by name */ int assert_depth; /* Depth of nested assertions */ pcre_uint32 external_options; /* External (initial) options */ pcre_uint32 external_flags; /* External flag bits to be set */ @@ -2448,6 +2453,7 @@ typedef struct compile_data { BOOL had_accept; /* (*ACCEPT) encountered */ BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */ BOOL check_lookbehind; /* Lookbehinds need later checking */ + BOOL dupnames; /* Duplicate names exist */ int nltype; /* Newline type */ int nllen; /* Newline string length */ pcre_uchar nl[4]; /* Newline string when fixed length */ |