summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>2015-07-16 16:11:52 +0000
committerph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>2015-07-16 16:11:52 +0000
commitaacccd6509cf66a728c40bc92a8f9328703f71ad (patch)
tree371bfbf9e00b77b10f6a61512bbe5f5e4f32772d
parentb4489180576884d270f6c2ffea11a6bbcfc7a6aa (diff)
downloadpcre2-aacccd6509cf66a728c40bc92a8f9328703f71ad.tar.gz
Move pcre2_find_bracket() into its own module to avoid circular dependency.
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@309 6239d852-aaf2-0410-a92c-79f79f948069
-rw-r--r--CMakeLists.txt4
-rw-r--r--ChangeLog4
-rw-r--r--Makefile.am1
-rw-r--r--NON-AUTOTOOLS-BUILD3
-rwxr-xr-xPrepareRelease1
-rw-r--r--README3
-rw-r--r--src/pcre2_compile.c169
-rw-r--r--src/pcre2_find_bracket.c218
8 files changed, 231 insertions, 172 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b76a953..94460d9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -67,7 +67,8 @@
# 2013-10-08 PH got rid of the "source" command, which is a bash-ism (use ".")
# 2013-11-05 PH added support for PARENS_NEST_LIMIT
# 2014-08-29 PH converted the file for PCRE2 (which has no C++).
-# 2015-04024 PH added support for PCRE2_DEBUG
+# 2015-04-24 PH added support for PCRE2_DEBUG
+# 2015-07-16 PH updated for new pcre2_find_bracket source module
PROJECT(PCRE2 C)
@@ -390,6 +391,7 @@ SET(PCRE2_SOURCES
src/pcre2_context.c
src/pcre2_dfa_match.c
src/pcre2_error.c
+ src/pcre2_find_bracket.c
src/pcre2_jit_compile.c
src/pcre2_maketables.c
src/pcre2_match.c
diff --git a/ChangeLog b/ChangeLog
index 8b3a4f3..f6d4414 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -25,6 +25,10 @@ during the scan for named groups, and hence the external # was incorrectly
treated as a comment and the invalid (?' at the end of the pattern was not
diagnosed. This caused a buffer overflow during the real compile.
+7. Moved the pcre2_find_bracket() function from src/pcre2_compile.c into its
+own source module to avoid a circular dependency between src/pcre2_compile.c
+and src/pcre2_study.c
+
Version 10.20 30-June-2015
--------------------------
diff --git a/Makefile.am b/Makefile.am
index 56f93db..2da886b 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -319,6 +319,7 @@ COMMON_SOURCES = \
src/pcre2_context.c \
src/pcre2_dfa_match.c \
src/pcre2_error.c \
+ src/pcre2_find_bracket.c \
src/pcre2_internal.h \
src/pcre2_intmodedep.h \
src/pcre2_jit_compile.c \
diff --git a/NON-AUTOTOOLS-BUILD b/NON-AUTOTOOLS-BUILD
index d8d9d2b..bdbff78 100644
--- a/NON-AUTOTOOLS-BUILD
+++ b/NON-AUTOTOOLS-BUILD
@@ -97,6 +97,7 @@ can skip ahead to the CMake section.
pcre2_context.c
pcre2_dfa_match.c
pcre2_error.c
+ pcre2_find_bracket.c
pcre2_jit_compile.c
pcre2_maketables.c
pcre2_match.c
@@ -388,4 +389,4 @@ and executable, is in EBCDIC and native z/OS file formats and this is the
recommended download site.
=============================
-Last Updated: 15 June 2015
+Last Updated: 16 July 2015
diff --git a/PrepareRelease b/PrepareRelease
index f6b138f..9323903 100755
--- a/PrepareRelease
+++ b/PrepareRelease
@@ -204,6 +204,7 @@ files="\
src/pcre2_context.c \
src/pcre2_dfa_match.c \
src/pcre2_error.c \
+ src/pcre2_find_bracket.c \
src/pcre2_internal.h \
src/pcre2_intmodedep.h \
src/pcre2_jit_compile.c \
diff --git a/README b/README
index 7367924..955d45a 100644
--- a/README
+++ b/README
@@ -724,6 +724,7 @@ The distribution should contain the files listed below.
src/pcre2_context.c )
src/pcre2_dfa_match.c )
src/pcre2_error.c )
+ src/pcre2_find_bracket.c )
src/pcre2_jit_compile.c )
src/pcre2_jit_match.c ) sources for the functions in the library,
src/pcre2_jit_misc.c ) and some internal functions that they use
@@ -832,4 +833,4 @@ The distribution should contain the files listed below.
Philip Hazel
Email local part: ph10
Email domain: cam.ac.uk
-Last updated: 24 April 2015
+Last updated: 16 July 2015
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index cc09ebd..84c1e65 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -2334,175 +2334,6 @@ return p;
/*************************************************
-* Scan compiled regex for specific bracket *
-*************************************************/
-
-/* This function scans through a compiled pattern until it finds a
-capturing bracket with the given number, or, if the number is negative, an
-instance of OP_REVERSE for a lookbehind. The function is global in the C sense
-so that it can be called from pcre2_study() when finding the minimum matching
-length.
-
-Arguments:
- code points to start of expression
- utf TRUE in UTF mode
- number the required bracket number or negative to find a lookbehind
-
-Returns: pointer to the opcode for the bracket, or NULL if not found
-*/
-
-PCRE2_SPTR
-PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number)
-{
-for (;;)
- {
- register PCRE2_UCHAR c = *code;
-
- if (c == OP_END) return NULL;
-
- /* XCLASS is used for classes that cannot be represented just by a bit map.
- This includes negated single high-valued characters. CALLOUT_STR is used for
- callouts with string arguments. In both cases the length in the table is
- zero; the actual length is stored in the compiled code. */
-
- if (c == OP_XCLASS) code += GET(code, 1);
- else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
-
- /* Handle recursion */
-
- else if (c == OP_REVERSE)
- {
- if (number < 0) return (PCRE2_UCHAR *)code;
- code += PRIV(OP_lengths)[c];
- }
-
- /* Handle capturing bracket */
-
- else if (c == OP_CBRA || c == OP_SCBRA ||
- c == OP_CBRAPOS || c == OP_SCBRAPOS)
- {
- int n = (int)GET2(code, 1+LINK_SIZE);
- if (n == number) return (PCRE2_UCHAR *)code;
- code += PRIV(OP_lengths)[c];
- }
-
- /* Otherwise, we can get the item's length from the table, except that for
- repeated character types, we have to test for \p and \P, which have an extra
- two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we
- must add in its length. */
-
- else
- {
- switch(c)
- {
- case OP_TYPESTAR:
- case OP_TYPEMINSTAR:
- case OP_TYPEPLUS:
- case OP_TYPEMINPLUS:
- case OP_TYPEQUERY:
- case OP_TYPEMINQUERY:
- case OP_TYPEPOSSTAR:
- case OP_TYPEPOSPLUS:
- case OP_TYPEPOSQUERY:
- if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
- break;
-
- case OP_TYPEUPTO:
- case OP_TYPEMINUPTO:
- case OP_TYPEEXACT:
- case OP_TYPEPOSUPTO:
- if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
- code += 2;
- break;
-
- case OP_MARK:
- case OP_PRUNE_ARG:
- case OP_SKIP_ARG:
- case OP_THEN_ARG:
- code += code[1];
- break;
- }
-
- /* Add in the fixed length from the table */
-
- code += PRIV(OP_lengths)[c];
-
- /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
- followed by a multi-byte character. The length in the table is a minimum, so
- we have to arrange to skip the extra bytes. */
-
-#ifdef MAYBE_UTF_MULTI
- if (utf) switch(c)
- {
- case OP_CHAR:
- case OP_CHARI:
- case OP_NOT:
- case OP_NOTI:
- case OP_EXACT:
- case OP_EXACTI:
- case OP_NOTEXACT:
- case OP_NOTEXACTI:
- case OP_UPTO:
- case OP_UPTOI:
- case OP_NOTUPTO:
- case OP_NOTUPTOI:
- case OP_MINUPTO:
- case OP_MINUPTOI:
- case OP_NOTMINUPTO:
- case OP_NOTMINUPTOI:
- case OP_POSUPTO:
- case OP_POSUPTOI:
- case OP_NOTPOSUPTO:
- case OP_NOTPOSUPTOI:
- case OP_STAR:
- case OP_STARI:
- case OP_NOTSTAR:
- case OP_NOTSTARI:
- case OP_MINSTAR:
- case OP_MINSTARI:
- case OP_NOTMINSTAR:
- case OP_NOTMINSTARI:
- case OP_POSSTAR:
- case OP_POSSTARI:
- case OP_NOTPOSSTAR:
- case OP_NOTPOSSTARI:
- case OP_PLUS:
- case OP_PLUSI:
- case OP_NOTPLUS:
- case OP_NOTPLUSI:
- case OP_MINPLUS:
- case OP_MINPLUSI:
- case OP_NOTMINPLUS:
- case OP_NOTMINPLUSI:
- case OP_POSPLUS:
- case OP_POSPLUSI:
- case OP_NOTPOSPLUS:
- case OP_NOTPOSPLUSI:
- case OP_QUERY:
- case OP_QUERYI:
- case OP_NOTQUERY:
- case OP_NOTQUERYI:
- case OP_MINQUERY:
- case OP_MINQUERYI:
- case OP_NOTMINQUERY:
- case OP_NOTMINQUERYI:
- case OP_POSQUERY:
- case OP_POSQUERYI:
- case OP_NOTPOSQUERY:
- case OP_NOTPOSQUERYI:
- if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
- break;
- }
-#else
- (void)(utf); /* Keep compiler happy by referencing function argument */
-#endif /* MAYBE_UTF_MULTI */
- }
- }
-}
-
-
-
-/*************************************************
* Scan compiled regex for recursion reference *
*************************************************/
diff --git a/src/pcre2_find_bracket.c b/src/pcre2_find_bracket.c
new file mode 100644
index 0000000..3cdc419
--- /dev/null
+++ b/src/pcre2_find_bracket.c
@@ -0,0 +1,218 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+ Written by Philip Hazel
+ Original API code Copyright (c) 1997-2012 University of Cambridge
+ New API code Copyright (c) 2015 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+
+/* This module contains a single function that scans through a compiled pattern
+until it finds a capturing bracket with the given number, or, if the number is
+negative, an instance of OP_REVERSE for a lookbehind. The function is called
+from pcre2_compile.c and also from pcre2_study.c when finding the minimum
+matching length. */
+
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "pcre2_internal.h"
+
+
+/*************************************************
+* Scan compiled regex for specific bracket *
+*************************************************/
+
+/*
+Arguments:
+ code points to start of expression
+ utf TRUE in UTF mode
+ number the required bracket number or negative to find a lookbehind
+
+Returns: pointer to the opcode for the bracket, or NULL if not found
+*/
+
+PCRE2_SPTR
+PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number)
+{
+for (;;)
+ {
+ register PCRE2_UCHAR c = *code;
+
+ if (c == OP_END) return NULL;
+
+ /* XCLASS is used for classes that cannot be represented just by a bit map.
+ This includes negated single high-valued characters. CALLOUT_STR is used for
+ callouts with string arguments. In both cases the length in the table is
+ zero; the actual length is stored in the compiled code. */
+
+ if (c == OP_XCLASS) code += GET(code, 1);
+ else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
+
+ /* Handle recursion */
+
+ else if (c == OP_REVERSE)
+ {
+ if (number < 0) return (PCRE2_UCHAR *)code;
+ code += PRIV(OP_lengths)[c];
+ }
+
+ /* Handle capturing bracket */
+
+ else if (c == OP_CBRA || c == OP_SCBRA ||
+ c == OP_CBRAPOS || c == OP_SCBRAPOS)
+ {
+ int n = (int)GET2(code, 1+LINK_SIZE);
+ if (n == number) return (PCRE2_UCHAR *)code;
+ code += PRIV(OP_lengths)[c];
+ }
+
+ /* Otherwise, we can get the item's length from the table, except that for
+ repeated character types, we have to test for \p and \P, which have an extra
+ two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we
+ must add in its length. */
+
+ else
+ {
+ switch(c)
+ {
+ case OP_TYPESTAR:
+ case OP_TYPEMINSTAR:
+ case OP_TYPEPLUS:
+ case OP_TYPEMINPLUS:
+ case OP_TYPEQUERY:
+ case OP_TYPEMINQUERY:
+ case OP_TYPEPOSSTAR:
+ case OP_TYPEPOSPLUS:
+ case OP_TYPEPOSQUERY:
+ if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
+ break;
+
+ case OP_TYPEUPTO:
+ case OP_TYPEMINUPTO:
+ case OP_TYPEEXACT:
+ case OP_TYPEPOSUPTO:
+ if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
+ code += 2;
+ break;
+
+ case OP_MARK:
+ case OP_PRUNE_ARG:
+ case OP_SKIP_ARG:
+ case OP_THEN_ARG:
+ code += code[1];
+ break;
+ }
+
+ /* Add in the fixed length from the table */
+
+ code += PRIV(OP_lengths)[c];
+
+ /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
+ followed by a multi-byte character. The length in the table is a minimum, so
+ we have to arrange to skip the extra bytes. */
+
+#ifdef MAYBE_UTF_MULTI
+ if (utf) switch(c)
+ {
+ case OP_CHAR:
+ case OP_CHARI:
+ case OP_NOT:
+ case OP_NOTI:
+ case OP_EXACT:
+ case OP_EXACTI:
+ case OP_NOTEXACT:
+ case OP_NOTEXACTI:
+ case OP_UPTO:
+ case OP_UPTOI:
+ case OP_NOTUPTO:
+ case OP_NOTUPTOI:
+ case OP_MINUPTO:
+ case OP_MINUPTOI:
+ case OP_NOTMINUPTO:
+ case OP_NOTMINUPTOI:
+ case OP_POSUPTO:
+ case OP_POSUPTOI:
+ case OP_NOTPOSUPTO:
+ case OP_NOTPOSUPTOI:
+ case OP_STAR:
+ case OP_STARI:
+ case OP_NOTSTAR:
+ case OP_NOTSTARI:
+ case OP_MINSTAR:
+ case OP_MINSTARI:
+ case OP_NOTMINSTAR:
+ case OP_NOTMINSTARI:
+ case OP_POSSTAR:
+ case OP_POSSTARI:
+ case OP_NOTPOSSTAR:
+ case OP_NOTPOSSTARI:
+ case OP_PLUS:
+ case OP_PLUSI:
+ case OP_NOTPLUS:
+ case OP_NOTPLUSI:
+ case OP_MINPLUS:
+ case OP_MINPLUSI:
+ case OP_NOTMINPLUS:
+ case OP_NOTMINPLUSI:
+ case OP_POSPLUS:
+ case OP_POSPLUSI:
+ case OP_NOTPOSPLUS:
+ case OP_NOTPOSPLUSI:
+ case OP_QUERY:
+ case OP_QUERYI:
+ case OP_NOTQUERY:
+ case OP_NOTQUERYI:
+ case OP_MINQUERY:
+ case OP_MINQUERYI:
+ case OP_NOTMINQUERY:
+ case OP_NOTMINQUERYI:
+ case OP_POSQUERY:
+ case OP_POSQUERYI:
+ case OP_NOTPOSQUERY:
+ case OP_NOTPOSQUERYI:
+ if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
+ break;
+ }
+#else
+ (void)(utf); /* Keep compiler happy by referencing function argument */
+#endif /* MAYBE_UTF_MULTI */
+ }
+ }
+}
+
+/* End of pcre2_find_bracket.c */