summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2013-11-05 18:05:29 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2013-11-05 18:05:29 +0000
commit946ecf2a3210621f86d8baf69440397bb5e4394b (patch)
tree4e7c74576e0009bc264e684cbbb404dffed1aecd
parent0e5112832b6f2c6b0f98a90bf2ef9531cd2e87a2 (diff)
downloadpcre-946ecf2a3210621f86d8baf69440397bb5e4394b.tar.gz
Implement compile-time nested parentheses limit, specified at build time.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1389 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--CMakeLists.txt5
-rw-r--r--ChangeLog5
-rw-r--r--README18
-rw-r--r--config-cmake.h.in1
-rw-r--r--configure.ac12
-rw-r--r--doc/pcre_config.33
-rw-r--r--doc/pcreapi.311
-rw-r--r--doc/pcrelimits.39
-rw-r--r--pcre.h.in1
-rw-r--r--pcre_compile.c22
-rw-r--r--pcre_config.c4
-rw-r--r--pcre_internal.h4
-rw-r--r--pcreposix.c3
-rw-r--r--pcretest.c2
14 files changed, 83 insertions, 17 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c558784..ccd48e7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -64,6 +64,7 @@
# 2013-07-01 PH realized that the "support" for GCOV was a total nonsense and
# so it has been removed.
# 2013-10-08 PH got rid of the "source" command, which is a bash-ism (use ".")
+# 2013-11-05 PH added support for PARENS_NEST_LIMIT
PROJECT(PCRE C CXX)
@@ -132,6 +133,9 @@ SET(PCRE_EBCDIC_NL25 OFF CACHE BOOL
SET(PCRE_LINK_SIZE "2" CACHE STRING
"Internal link size (2, 3 or 4 allowed). See LINK_SIZE in config.h.in for details.")
+SET(PCRE_PARENS_NEST_LIMIT "250" CACHE STRING
+ "Default nested parentheses limit. See PARENS_NEST_LIMIT in config.h.in for details.")
+
SET(PCRE_MATCH_LIMIT "10000000" CACHE STRING
"Default limit on internal looping. See MATCH_LIMIT in config.h.in for details.")
@@ -911,6 +915,7 @@ IF(PCRE_SHOW_REPORT)
MESSAGE(STATUS " No stack recursion .............. : ${PCRE_NO_RECURSE}")
MESSAGE(STATUS " POSIX mem threshold ............. : ${PCRE_POSIX_MALLOC_THRESHOLD}")
MESSAGE(STATUS " Internal link size .............. : ${PCRE_LINK_SIZE}")
+ MESSAGE(STATUS " Parentheses nest limit .......... : ${PCRE_PARENS_NEST_LIMIT}")
MESSAGE(STATUS " Match limit ..................... : ${PCRE_MATCH_LIMIT}")
MESSAGE(STATUS " Match limit recursion ........... : ${PCRE_MATCH_LIMIT_RECURSION}")
MESSAGE(STATUS " Build shared libs ............... : ${BUILD_SHARED_LIBS}")
diff --git a/ChangeLog b/ChangeLog
index cf072a7..6d61196 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -154,6 +154,11 @@ Version 8.34 xx-xxxx-201x
32. Added the "forbid" facility to pcretest so that putting tests into the
wrong test files can sometimes be quickly detected.
+
+33. There is now a limit (default 250) on the depth of nesting of parentheses.
+ This limit is imposed to control the amount of system stack used at compile
+ time. It can be changed at build time by --with-parens-nest-limit=xxx or
+ the equivalent in CMake.
Version 8.33 28-May-2013
diff --git a/README b/README
index c5f120c..11312e0 100644
--- a/README
+++ b/README
@@ -268,10 +268,18 @@ library. They are also documented in the pcrebuild man page.
--with-posix-malloc-threshold=20
on the "configure" command.
-
-. PCRE has a counter that can be set to limit the amount of resources it uses.
- If the limit is exceeded during a match, the match fails. The default is ten
- million. You can change the default by setting, for example,
+
+. PCRE has a counter that limits the depth of nesting of parentheses in a
+ pattern. This limits the amount of system stack that a pattern uses when it
+ is compiled. The default is 250, but you can change it by setting, for
+ example,
+
+ --with-parens-nest-limit=500
+
+. PCRE has a counter that can be set to limit the amount of resources it uses
+ when matching a pattern. If the limit is exceeded during a match, the match
+ fails. The default is ten million. You can change the default by setting, for
+ example,
--with-match-limit=500000
@@ -979,4 +987,4 @@ pcre_xxx, one with the name pcre16_xx, and a third with the name pcre32_xxx.
Philip Hazel
Email local part: ph10
Email domain: cam.ac.uk
-Last updated: 02 July 2013
+Last updated: 05 November 2013
diff --git a/config-cmake.h.in b/config-cmake.h.in
index ef6ed28..77d9011 100644
--- a/config-cmake.h.in
+++ b/config-cmake.h.in
@@ -46,6 +46,7 @@
#define NEWLINE @NEWLINE@
#define POSIX_MALLOC_THRESHOLD @PCRE_POSIX_MALLOC_THRESHOLD@
#define LINK_SIZE @PCRE_LINK_SIZE@
+#define PARENS_NEST_LIMIT @PCRE_PARENS_NEST_LIMIT@
#define MATCH_LIMIT @PCRE_MATCH_LIMIT@
#define MATCH_LIMIT_RECURSION @PCRE_MATCH_LIMIT_RECURSION@
#define PCREGREP_BUFSIZE @PCREGREP_BUFSIZE@
diff --git a/configure.ac b/configure.ac
index 1d7343a..ecb9415 100644
--- a/configure.ac
+++ b/configure.ac
@@ -274,6 +274,12 @@ AC_ARG_WITH(link-size,
AS_HELP_STRING([--with-link-size=N],
[internal link size (2, 3, or 4 allowed; default=2)]),
, with_link_size=2)
+
+# Handle --with-parens-nest-limit=N
+AC_ARG_WITH(parens-nest-limit,
+ AS_HELP_STRING([--with-parens-nest-limit=N],
+ [nested parentheses limit (default=250)]),
+ , with_parens_nest_limit=250)
# Handle --with-match-limit=N
AC_ARG_WITH(match-limit,
@@ -783,6 +789,11 @@ AC_DEFINE_UNQUOTED([POSIX_MALLOC_THRESHOLD], [$with_posix_malloc_threshold], [
small, the wrapper function uses space on the stack, because this is
faster than using malloc() for each call. The threshold above which
the stack is no longer used is defined by POSIX_MALLOC_THRESHOLD.])
+
+AC_DEFINE_UNQUOTED([PARENS_NEST_LIMIT], [$with_parens_nest_limit], [
+ The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
+ parentheses (of any kind) in a pattern. This limits the amount of system
+ stack that is used while compiling a pattern.])
AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [
The value of MATCH_LIMIT determines the default number of times the
@@ -1071,6 +1082,7 @@ $PACKAGE-$VERSION configuration summary:
Use stack recursion ............. : ${enable_stack_for_recursion}
POSIX mem threshold ............. : ${with_posix_malloc_threshold}
Internal link size .............. : ${with_link_size}
+ Nested parentheses limit ........ : ${with_parens_nest_limit}
Match limit ..................... : ${with_match_limit}
Match limit recursion ........... : ${with_match_limit_recursion}
Build shared libs ............... : ${enable_shared}
diff --git a/doc/pcre_config.3 b/doc/pcre_config.3
index 5a6e6be..8900416 100644
--- a/doc/pcre_config.3
+++ b/doc/pcre_config.3
@@ -1,4 +1,4 @@
-.TH PCRE_CONFIG 3 "24 June 2012" "PCRE 8.30"
+.TH PCRE_CONFIG 3 "05 November 2013" "PCRE 8.34"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
@@ -33,6 +33,7 @@ point to an unsigned long integer. The available codes are:
target architecture for the JIT compiler,
or NULL if there is no JIT support
PCRE_CONFIG_LINK_SIZE Internal link size: 2, 3, or 4
+ PCRE_CONFIG_PARENS_LIMIT Parentheses nesting limit
PCRE_CONFIG_MATCH_LIMIT Internal resource limit
PCRE_CONFIG_MATCH_LIMIT_RECURSION
Internal recursion depth limit
diff --git a/doc/pcreapi.3 b/doc/pcreapi.3
index 7138d1d..ed71b77 100644
--- a/doc/pcreapi.3
+++ b/doc/pcreapi.3
@@ -1,4 +1,4 @@
-.TH PCREAPI 3 "08 October 2013" "PCRE 8.34"
+.TH PCREAPI 3 "05 November 2013" "PCRE 8.34"
.SH NAME
PCRE - Perl-compatible regular expressions
.sp
@@ -460,6 +460,13 @@ the
.\"
documentation.
.sp
+ PCRE_CONFIG_PARENS_LIMIT
+.sp
+The output is a long integer that gives the maximum depth of nesting of
+parentheses (of any kind) in a pattern. This limit is imposed to cap the amount
+of system stack used when a pattern is compiled. It is specified when PCRE is
+built; the default is 250.
+.sp
PCRE_CONFIG_MATCH_LIMIT
.sp
The output is a long integer that gives the default limit for the number of
@@ -2870,6 +2877,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
-Last updated: 08 October 2013
+Last updated: 05 November 2013
Copyright (c) 1997-2013 University of Cambridge.
.fi
diff --git a/doc/pcrelimits.3 b/doc/pcrelimits.3
index 1541a27..423d6a2 100644
--- a/doc/pcrelimits.3
+++ b/doc/pcrelimits.3
@@ -1,4 +1,4 @@
-.TH PCRELIMITS 3 "15 August 2013" "PCRE 8.34"
+.TH PCRELIMITS 3 "05 November 2013" "PCRE 8.34"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH "SIZE AND OTHER LIMITATIONS"
@@ -24,7 +24,10 @@ However, the speed of execution is slower.
All values in repeating quantifiers must be less than 65536.
.P
There is no limit to the number of parenthesized subpatterns, but there can be
-no more than 65535 capturing subpatterns.
+no more than 65535 capturing subpatterns. There is, however, a limit to the
+depth of nesting of parenthesized subpatterns of all kinds. This is imposed in
+order to limit the amount of system stack used at compile time. The limit can
+be specified when PCRE is built; the default is 250.
.P
There is a limit to the number of forward references to subsequent subpatterns
of around 200,000. Repeated forward references with fixed upper limits, for
@@ -63,6 +66,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
-Last updated: 15 August 2013
+Last updated: 05 November 2013
Copyright (c) 1997-2013 University of Cambridge.
.fi
diff --git a/pcre.h.in b/pcre.h.in
index 55e4827..a96d3e1 100644
--- a/pcre.h.in
+++ b/pcre.h.in
@@ -298,6 +298,7 @@ compatible. */
#define PCRE_CONFIG_UTF16 10
#define PCRE_CONFIG_JITTARGET 11
#define PCRE_CONFIG_UTF32 12
+#define PCRE_CONFIG_PARENS_LIMIT 13
/* Request types for pcre_study(). Do not re-arrange, in order to remain
compatible. */
diff --git a/pcre_compile.c b/pcre_compile.c
index 56c4747..4e93386 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -531,6 +531,7 @@ static const char error_texts[] =
/* 80 */
"non-octal character in \\o{} (closing brace missing?)\0"
"missing opening brace after \\o\0"
+ "parentheses are too deeply nested\0"
;
/* Table to identify digits and hex digits. This is used when compiling
@@ -7290,10 +7291,19 @@ for (;; ptr++)
skipbytes = IMM2_SIZE;
}
- /* Process nested bracketed regex. Assertions used not to be repeatable,
- but this was changed for Perl compatibility, so all kinds can now be
- repeated. We copy code into a non-register variable (tempcode) in order to
- be able to pass its address because some compilers complain otherwise. */
+ /* Process nested bracketed regex. First check for parentheses nested too
+ deeply. */
+
+ if ((cd->parens_depth += 1) > PARENS_NEST_LIMIT)
+ {
+ *errorcodeptr = ERR82;
+ goto FAILED;
+ }
+
+ /* Assertions used not to be repeatable, but this was changed for Perl
+ compatibility, so all kinds can now be repeated. We copy code into a
+ non-register variable (tempcode) in order to be able to pass its address
+ because some compilers complain otherwise. */
previous = code; /* For handling repetition */
*code = bravalue;
@@ -7323,6 +7333,8 @@ for (;; ptr++)
&length_prevgroup /* Pre-compile phase */
))
goto FAILED;
+
+ cd->parens_depth -= 1;
/* If this was an atomic group and there are no capturing groups within it,
generate OP_ONCE_NC instead of OP_ONCE. */
@@ -8898,6 +8910,7 @@ cd->named_group_list_size = NAMED_GROUP_LIST_SIZE;
cd->start_pattern = (const pcre_uchar *)pattern;
cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern));
cd->req_varyopt = 0;
+cd->parens_depth = 0;
cd->assert_depth = 0;
cd->max_lookbehind = 0;
cd->external_options = options;
@@ -8983,6 +8996,7 @@ field; this time it's used for remembering forward references to subpatterns.
*/
cd->final_bracount = cd->bracount; /* Save for checking forward references */
+cd->parens_depth = 0;
cd->assert_depth = 0;
cd->bracount = 0;
cd->max_lookbehind = 0;
diff --git a/pcre_config.c b/pcre_config.c
index 3d5689f..1cbdd9c 100644
--- a/pcre_config.c
+++ b/pcre_config.c
@@ -161,6 +161,10 @@ switch (what)
*((int *)where) = POSIX_MALLOC_THRESHOLD;
break;
+ case PCRE_CONFIG_PARENS_LIMIT:
+ *((unsigned long int *)where) = PARENS_NEST_LIMIT;
+ break;
+
case PCRE_CONFIG_MATCH_LIMIT:
*((unsigned long int *)where) = MATCH_LIMIT;
break;
diff --git a/pcre_internal.h b/pcre_internal.h
index 597842c..9ded34d 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -2335,9 +2335,10 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,
ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79,
- ERR80, ERR81, ERRCOUNT };
+ ERR80, ERR81, ERR82, ERRCOUNT };
/* JIT compiling modes. The function list is indexed by them. */
+
enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE,
JIT_NUMBER_OF_COMPILE_MODES };
@@ -2490,6 +2491,7 @@ typedef struct compile_data {
int top_backref; /* Maximum back reference */
unsigned int backref_map; /* Bitmap of low back refs */
unsigned int namedrefcount; /* Number of backreferences by name */
+ int parens_depth; /* Depth of nested parentheses */
int assert_depth; /* Depth of nested assertions */
pcre_uint32 external_options; /* External (initial) options */
pcre_uint32 external_flags; /* External flag bits to be set */
diff --git a/pcreposix.c b/pcreposix.c
index 28e199f..2f27c01 100644
--- a/pcreposix.c
+++ b/pcreposix.c
@@ -167,7 +167,8 @@ static const int eint[] = {
REG_BADPAT, /* non-hex character in \\x{} (closing brace missing?) */
/* 80 */
REG_BADPAT, /* non-octal character in \o{} (closing brace missing?) */
- REG_BADPAT /* missing opening brace after \o */
+ REG_BADPAT, /* missing opening brace after \o */
+ REG_BADPAT /* parentheses too deeply nested */
};
/* Table of texts corresponding to POSIX error codes */
diff --git a/pcretest.c b/pcretest.c
index dd34fed..a6d0055 100644
--- a/pcretest.c
+++ b/pcretest.c
@@ -3303,6 +3303,8 @@ are set, either both UTFs are supported or both are not supported. */
printf(" Internal link size = %d\n", rc);
(void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
printf(" POSIX malloc threshold = %d\n", rc);
+ (void)PCRE_CONFIG(PCRE_CONFIG_PARENS_LIMIT, &lrc);
+ printf(" Parentheses nest limit = %ld\n", lrc);
(void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
printf(" Default match limit = %ld\n", lrc);
(void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);