diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2013-10-01 16:54:40 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2013-10-01 16:54:40 +0000 |
commit | 5f42224005b7d9a503903e3342ec7ada75590b07 (patch) | |
tree | cd216c1c4ce213cc37bb9440077dc878abc54580 /pcre_internal.h | |
parent | f312a9a8397f6f52dc3ef3db4e3589dec11b3f73 (diff) | |
download | pcre-5f42224005b7d9a503903e3342ec7ada75590b07.tar.gz |
Refactored auto-possessification code.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1363 2f5784b3-3f2a-0410-8824-cb99058d5e15
Diffstat (limited to 'pcre_internal.h')
-rw-r--r-- | pcre_internal.h | 75 |
1 files changed, 50 insertions, 25 deletions
diff --git a/pcre_internal.h b/pcre_internal.h index fa3af98..4c8f501 100644 --- a/pcre_internal.h +++ b/pcre_internal.h @@ -1174,7 +1174,8 @@ time, run time, or study time, respectively. */ #define PUBLIC_COMPILE_OPTIONS \ (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \ PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \ - PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \ + PCRE_NO_AUTO_CAPTURE|PCRE_NO_AUTO_POSSESSIFY| \ + PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \ PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \ PCRE_JAVASCRIPT_COMPAT|PCRE_UCP|PCRE_NO_START_OPTIMIZE|PCRE_NEVER_UTF) @@ -1852,6 +1853,7 @@ only. */ #define PT_WORD 8 /* Word - L plus N plus underscore */ #define PT_CLIST 9 /* Pseudo-property: match character list */ #define PT_UCNC 10 /* Universal Character nameable character */ +#define PT_TABSIZE 11 /* Size of square table for autopossessify tests */ /* Flag bits and data types for the extended class (OP_XCLASS) for classes that contain characters with values greater than 255. */ @@ -1891,12 +1893,30 @@ enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_E, ESC_Q, ESC_g, ESC_k, ESC_DU, ESC_du, ESC_SU, ESC_su, ESC_WU, ESC_wu }; -/* Opcode table: Starting from 1 (i.e. after OP_END), the values up to -OP_EOD must correspond in order to the list of escapes immediately above. +/* Opcode table */ -*** NOTE NOTE NOTE *** Whenever this list is updated, the two macro definitions -that follow must also be updated to match. There are also tables called -"coptable" and "poptable" in pcre_dfa_exec.c that must be updated. */ + +/****** NOTE NOTE NOTE ****** + +Starting from 1 (i.e. after OP_END), the values up to OP_EOD must correspond in +order to the list of escapes immediately above. Furthermore, values up to +OP_DOLLM must not be changed without adjusting the table called autoposstab in +pcre_compile.c + +Whenever this list is updated, the two macro definitions that follow must also +be updated to match. There are also tables called "coptable" and "poptable" in +pcre_dfa_exec.c that must be updated. + +****** NOTE NOTE NOTE ******/ + + +/* The values between FIRST_AUTOTAB_OP and LAST_AUTOTAB_RIGHT_OP, inclusive, +are used in a table for deciding whether a repeated character type can be +auto-possessified. */ + +#define FIRST_AUTOTAB_OP OP_NOT_DIGIT +#define LAST_AUTOTAB_LEFT_OP OP_EXTUNI +#define LAST_AUTOTAB_RIGHT_OP OP_DOLLM enum { OP_END, /* 0 End of pattern */ @@ -1928,11 +1948,16 @@ enum { OP_EXTUNI, /* 22 \X (extended Unicode sequence */ OP_EODN, /* 23 End of data or \n at end of data (\Z) */ OP_EOD, /* 24 End of data (\z) */ - - OP_CIRC, /* 25 Start of line - not multiline */ - OP_CIRCM, /* 26 Start of line - multiline */ - OP_DOLL, /* 27 End of line - not multiline */ - OP_DOLLM, /* 28 End of line - multiline */ + + /* Line end assertions */ + + OP_DOLL, /* 25 End of line - not multiline */ + OP_DOLLM, /* 26 End of line - multiline */ + OP_CIRC, /* 27 Start of line - not multiline */ + OP_CIRCM, /* 28 Start of line - multiline */ + + /* Single characters; caseful must precede the caseless ones */ + OP_CHAR, /* 29 Match one character, casefully */ OP_CHARI, /* 30 Match one character, caselessly */ OP_NOT, /* 31 Match one character, not the given one, casefully */ @@ -1941,7 +1966,7 @@ enum { /* The following sets of 13 opcodes must always be kept in step because the offset from the first one is used to generate the others. */ - /**** Single characters, caseful, must precede the caseless ones ****/ + /* Repeated characters; caseful must precede the caseless ones */ OP_STAR, /* 33 The maximizing and minimizing versions of */ OP_MINSTAR, /* 34 these six opcodes must come in pairs, with */ @@ -1959,7 +1984,7 @@ enum { OP_POSQUERY, /* 44 Posesssified query, caseful */ OP_POSUPTO, /* 45 Possessified upto, caseful */ - /**** Single characters, caseless, must follow the caseful ones */ + /* Repeated characters; caseless must follow the caseful ones */ OP_STARI, /* 46 */ OP_MINSTARI, /* 47 */ @@ -1977,8 +2002,8 @@ enum { OP_POSQUERYI, /* 57 Posesssified query, caseless */ OP_POSUPTOI, /* 58 Possessified upto, caseless */ - /**** The negated ones must follow the non-negated ones, and match them ****/ - /**** Negated single character, caseful; must precede the caseless ones ****/ + /* The negated ones must follow the non-negated ones, and match them */ + /* Negated repeated character, caseful; must precede the caseless ones */ OP_NOTSTAR, /* 59 The maximizing and minimizing versions of */ OP_NOTMINSTAR, /* 60 these six opcodes must come in pairs, with */ @@ -1996,7 +2021,7 @@ enum { OP_NOTPOSQUERY, /* 70 */ OP_NOTPOSUPTO, /* 71 */ - /**** Negated single character, caseless; must follow the caseful ones ****/ + /* Negated repeated character, caseless; must follow the caseful ones */ OP_NOTSTARI, /* 72 */ OP_NOTMINSTARI, /* 73 */ @@ -2014,7 +2039,7 @@ enum { OP_NOTPOSQUERYI, /* 83 */ OP_NOTPOSUPTOI, /* 84 */ - /**** Character types ****/ + /* Character types */ OP_TYPESTAR, /* 85 The maximizing and minimizing versions of */ OP_TYPEMINSTAR, /* 86 these six opcodes must come in pairs, with */ @@ -2055,8 +2080,8 @@ enum { class. This does both positive and negative. */ OP_REF, /* 109 Match a back reference, casefully */ OP_REFI, /* 110 Match a back reference, caselessly */ - OP_DNREF, /* 111 Match a duplicate name backref, casefully */ - OP_DNREFI, /* 112 Match a duplicate name backref, caselessly */ + OP_DNREF, /* 111 Match a duplicate name backref, casefully */ + OP_DNREFI, /* 112 Match a duplicate name backref, caselessly */ OP_RECURSE, /* 113 Match a numbered subpattern (possibly recursive) */ OP_CALLOUT, /* 114 Call out to external function if provided */ @@ -2153,7 +2178,7 @@ some cases doesn't actually use these names at all). */ "\\S", "\\s", "\\W", "\\w", "Any", "AllAny", "Anybyte", \ "notprop", "prop", "\\R", "\\H", "\\h", "\\V", "\\v", \ "extuni", "\\Z", "\\z", \ - "^", "^", "$", "$", "char", "chari", "not", "noti", \ + "$", "$", "^", "^", "char", "chari", "not", "noti", \ "*", "*?", "+", "+?", "?", "??", \ "{", "{", "{", \ "*+","++", "?+", "{", \ @@ -2203,7 +2228,7 @@ in UTF-8 mode. The code that uses this table must know about such things. */ 3, 3, /* \P, \p */ \ 1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */ \ 1, /* \X */ \ - 1, 1, 1, 1, 1, 1, /* \Z, \z, ^, ^M, $, $M */ \ + 1, 1, 1, 1, 1, 1, /* \Z, \z, $, $M ^, ^M */ \ 2, /* Char - the minimum length */ \ 2, /* Chari - the minimum length */ \ 2, /* not */ \ @@ -2411,14 +2436,14 @@ typedef struct open_capitem { pcre_uint16 flag; /* Set TRUE if recursive back ref */ } open_capitem; -/* Structure for building a list of named groups during the first pass of +/* Structure for building a list of named groups during the first pass of compiling. */ typedef struct named_group { const pcre_uchar *name; /* Points to the name in the pattern */ int length; /* Length of the name */ pcre_uint32 number; /* Group number */ -} named_group; +} named_group; /* Structure for passing "static" information around between the functions doing the compiling, so that they are thread-safe. */ @@ -2438,14 +2463,14 @@ typedef struct compile_data { pcre_uchar *name_table; /* The name/number table */ int names_found; /* Number of entries so far */ int name_entry_size; /* Size of each entry */ - int named_group_list_size; /* Number of entries in the list */ + int named_group_list_size; /* Number of entries in the list */ int workspace_size; /* Size of workspace */ unsigned int bracount; /* Count of capturing parens as we compile */ int final_bracount; /* Saved value after first pass */ int max_lookbehind; /* Maximum lookbehind (characters) */ int top_backref; /* Maximum back reference */ unsigned int backref_map; /* Bitmap of low back refs */ - unsigned int namedrefcount; /* Number of backreferences by name */ + unsigned int namedrefcount; /* Number of backreferences by name */ int assert_depth; /* Depth of nested assertions */ pcre_uint32 external_options; /* External (initial) options */ pcre_uint32 external_flags; /* External flag bits to be set */ |