summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2010-01-06 10:26:55 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2010-01-06 10:26:55 +0000
commitcfc45deebc76f4ec7b490293f43fff28e3ddf056 (patch)
treeb1f4ce292dc4466e00a5e288ce27566fb7cffc23
parentd93627b2961938679366bd402b51ac39c180f417 (diff)
downloadpcre-cfc45deebc76f4ec7b490293f43fff28e3ddf056.tar.gz
Tidying updates for 8.01-RC1 release.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@487 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--configure.ac6
-rw-r--r--doc/html/pcredemo.html20
-rw-r--r--doc/html/pcrestack.html61
-rw-r--r--doc/pcre.txt133
-rw-r--r--doc/pcredemo.320
-rw-r--r--doc/pcrestack.32
-rw-r--r--pcre_compile.c6
-rw-r--r--pcre_dfa_exec.c2
-rw-r--r--pcre_internal.h10
-rw-r--r--pcre_study.c2
-rw-r--r--pcredemo.c2
11 files changed, 155 insertions, 109 deletions
diff --git a/configure.ac b/configure.ac
index 83d7093..a9b1af1 100644
--- a/configure.ac
+++ b/configure.ac
@@ -88,10 +88,10 @@ if test "$PCRE_MINOR" = "08" -o "$PCRE_MINOR" = "09"
then
echo "***"
echo "*** Minor version number $PCRE_MINOR must not be used. ***"
- echo "*** Use only 01 to 07 or 10 onwards, to avoid octal issues. ***"
- echo "***"
+ echo "*** Use only 01 to 07 or 10 onwards, to avoid octal issues. ***"
+ echo "***"
exit 1
-fi
+fi
AC_SUBST(PCRE_MAJOR)
AC_SUBST(PCRE_MINOR)
diff --git a/doc/html/pcredemo.html b/doc/html/pcredemo.html
index 3978560..6277b80 100644
--- a/doc/html/pcredemo.html
+++ b/doc/html/pcredemo.html
@@ -24,14 +24,24 @@ of calling the PCRE regular expression library from a C program. See the
pcresample documentation for a short discussion ("man pcresample" if you have
the PCRE man pages installed).
-In Unix-like environments, compile this program thuswise:
+In Unix-like environments, if PCRE is installed in your standard system
+libraries, you should be able to compile this program using this command:
- gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \
- -R/usr/local/lib -lpcre
+gcc -Wall pcredemo.c -lpcre -o pcredemo
+
+If PCRE is not installed in a standard place, it is likely to be installed with
+support for the pkg-config mechanism. If you have pkg-config, you can compile
+this program using this command:
+
+gcc -Wall pcredemo.c `pkg-config --cflags --libs libpcre` -o pcredemo
+
+If you do not have pkg-config, you may have to use this:
+
+gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \
+ -R/usr/local/lib -lpcre -o pcredemo
Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
-library files for PCRE are installed on your system. You don't need -I and -L
-if PCRE is installed in the standard system libraries. Only some operating
+library files for PCRE are installed on your system. Only some operating
systems (e.g. Solaris) use the -R option.
Building under Windows:
diff --git a/doc/html/pcrestack.html b/doc/html/pcrestack.html
index 6048828..4423d22 100644
--- a/doc/html/pcrestack.html
+++ b/doc/html/pcrestack.html
@@ -31,19 +31,22 @@ current call (a "tail recursion"), the function is just restarted instead.
</P>
<P>
The <b>pcre_dfa_exec()</b> function operates in an entirely different way, and
-hardly uses recursion at all. The limit on its complexity is the amount of
-workspace it is given. The comments that follow do NOT apply to
-<b>pcre_dfa_exec()</b>; they are relevant only for <b>pcre_exec()</b>.
+uses recursion only when there is a regular expression recursion or subroutine
+call in the pattern. This includes the processing of assertion and "once-only"
+subpatterns, which are handled like subroutine calls. Normally, these are never
+very deep, and the limit on the complexity of <b>pcre_dfa_exec()</b> is
+controlled by the amount of workspace it is given. However, it is possible to
+write patterns with runaway infinite recursions; such patterns will cause
+<b>pcre_dfa_exec()</b> to run out of stack. At present, there is no protection
+against this.
</P>
<P>
-You can set limits on the number of times that <b>match()</b> is called, both in
-total and recursively. If the limit is exceeded, an error occurs. For details,
-see the
-<a href="pcreapi.html#extradata">section on extra data for <b>pcre_exec()</b></a>
-in the
-<a href="pcreapi.html"><b>pcreapi</b></a>
-documentation.
+The comments that follow do NOT apply to <b>pcre_dfa_exec()</b>; they are
+relevant only for <b>pcre_exec()</b>.
</P>
+<br><b>
+Reducing <b>pcre_exec()</b>'s stack usage
+</b><br>
<P>
Each time that <b>match()</b> is actually called recursively, it uses memory
from the process stack. For certain kinds of pattern and data, very large
@@ -78,13 +81,13 @@ subject strings is to write repeated parenthesized subpatterns to match more
than one character whenever possible.
</P>
<br><b>
-Compiling PCRE to use heap instead of stack
+Compiling PCRE to use heap instead of stack for <b>pcre_exec()</b>
</b><br>
<P>
In environments where stack memory is constrained, you might want to compile
-PCRE to use heap memory instead of stack for remembering back-up points. This
-makes it run a lot more slowly, however. Details of how to do this are given in
-the
+PCRE to use heap memory instead of stack for remembering back-up points when
+<b>pcre_exec()</b> is running. This makes it run a lot more slowly, however.
+Details of how to do this are given in the
<a href="pcrebuild.html"><b>pcrebuild</b></a>
documentation. When built in this way, instead of using the stack, PCRE obtains
and frees memory by calling the functions that are pointed to by the
@@ -95,16 +98,19 @@ same, and are always freed in reverse order, it may be possible to implement
customized memory handlers that are more efficient than the standard functions.
</P>
<br><b>
-Limiting PCRE's stack usage
+Limiting <b>pcre_exec()</b>'s stack usage
</b><br>
<P>
-PCRE has an internal counter that can be used to limit the depth of recursion,
-and thus cause <b>pcre_exec()</b> to give an error code before it runs out of
-stack. By default, the limit is very large, and unlikely ever to operate. It
-can be changed when PCRE is built, and it can also be set when
+You can set limits on the number of times that <b>match()</b> is called, both in
+total and recursively. If a limit is exceeded, <b>pcre_exec()</b> returns an
+error code. Setting suitable limits should prevent it from running out of
+stack. The default values of the limits are very large, and unlikely ever to
+operate. They can be changed when PCRE is built, and they can also be set when
<b>pcre_exec()</b> is called. For details of these interfaces, see the
<a href="pcrebuild.html"><b>pcrebuild</b></a>
-and
+documentation and the
+<a href="pcreapi.html#extradata">section on extra data for <b>pcre_exec()</b></a>
+in the
<a href="pcreapi.html"><b>pcreapi</b></a>
documentation.
</P>
@@ -112,8 +118,15 @@ documentation.
As a very rough rule of thumb, you should reckon on about 500 bytes per
recursion. Thus, if you want to limit your stack usage to 8Mb, you
should set the limit at 16000 recursions. A 64Mb stack, on the other hand, can
-support around 128000 recursions. The <b>pcretest</b> test program has a command
-line option (<b>-S</b>) that can be used to increase the size of its stack.
+support around 128000 recursions.
+</P>
+<P>
+In Unix-like environments, the <b>pcretest</b> test program has a command line
+option (<b>-S</b>) that can be used to increase the size of its stack. As long
+as the stack is large enough, another option (<b>-M</b>) can be used to find the
+smallest limits that allow a particular pattern to match a given subject
+string. This is done by calling <b>pcre_exec()</b> repeatedly with different
+limits.
</P>
<br><b>
Changing stack size in Unix-like systems
@@ -163,9 +176,9 @@ Cambridge CB2 3QH, England.
REVISION
</b><br>
<P>
-Last updated: 09 July 2008
+Last updated: 03 January 2010
<br>
-Copyright &copy; 1997-2008 University of Cambridge.
+Copyright &copy; 1997-2010 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.
diff --git a/doc/pcre.txt b/doc/pcre.txt
index 2ccc7bb..22d0e49 100644
--- a/doc/pcre.txt
+++ b/doc/pcre.txt
@@ -6958,87 +6958,100 @@ PCRE DISCUSSION OF STACK USAGE
restarted instead.
The pcre_dfa_exec() function operates in an entirely different way, and
- hardly uses recursion at all. The limit on its complexity is the amount
- of workspace it is given. The comments that follow do NOT apply to
- pcre_dfa_exec(); they are relevant only for pcre_exec().
-
- You can set limits on the number of times that match() is called, both
- in total and recursively. If the limit is exceeded, an error occurs.
- For details, see the section on extra data for pcre_exec() in the
- pcreapi documentation.
-
- Each time that match() is actually called recursively, it uses memory
- from the process stack. For certain kinds of pattern and data, very
- large amounts of stack may be needed, despite the recognition of "tail
- recursion". You can often reduce the amount of recursion, and there-
- fore the amount of stack used, by modifying the pattern that is being
+ uses recursion only when there is a regular expression recursion or
+ subroutine call in the pattern. This includes the processing of asser-
+ tion and "once-only" subpatterns, which are handled like subroutine
+ calls. Normally, these are never very deep, and the limit on the com-
+ plexity of pcre_dfa_exec() is controlled by the amount of workspace it
+ is given. However, it is possible to write patterns with runaway infi-
+ nite recursions; such patterns will cause pcre_dfa_exec() to run out of
+ stack. At present, there is no protection against this.
+
+ The comments that follow do NOT apply to pcre_dfa_exec(); they are rel-
+ evant only for pcre_exec().
+
+ Reducing pcre_exec()'s stack usage
+
+ Each time that match() is actually called recursively, it uses memory
+ from the process stack. For certain kinds of pattern and data, very
+ large amounts of stack may be needed, despite the recognition of "tail
+ recursion". You can often reduce the amount of recursion, and there-
+ fore the amount of stack used, by modifying the pattern that is being
matched. Consider, for example, this pattern:
([^<]|<(?!inet))+
- It matches from wherever it starts until it encounters "<inet" or the
- end of the data, and is the kind of pattern that might be used when
+ It matches from wherever it starts until it encounters "<inet" or the
+ end of the data, and is the kind of pattern that might be used when
processing an XML file. Each iteration of the outer parentheses matches
- either one character that is not "<" or a "<" that is not followed by
- "inet". However, each time a parenthesis is processed, a recursion
+ either one character that is not "<" or a "<" that is not followed by
+ "inet". However, each time a parenthesis is processed, a recursion
occurs, so this formulation uses a stack frame for each matched charac-
- ter. For a long string, a lot of stack is required. Consider now this
+ ter. For a long string, a lot of stack is required. Consider now this
rewritten pattern, which matches exactly the same strings:
([^<]++|<(?!inet))+
- This uses very much less stack, because runs of characters that do not
- contain "<" are "swallowed" in one item inside the parentheses. Recur-
- sion happens only when a "<" character that is not followed by "inet"
- is encountered (and we assume this is relatively rare). A possessive
- quantifier is used to stop any backtracking into the runs of non-"<"
+ This uses very much less stack, because runs of characters that do not
+ contain "<" are "swallowed" in one item inside the parentheses. Recur-
+ sion happens only when a "<" character that is not followed by "inet"
+ is encountered (and we assume this is relatively rare). A possessive
+ quantifier is used to stop any backtracking into the runs of non-"<"
characters, but that is not related to stack usage.
- This example shows that one way of avoiding stack problems when match-
+ This example shows that one way of avoiding stack problems when match-
ing long subject strings is to write repeated parenthesized subpatterns
to match more than one character whenever possible.
- Compiling PCRE to use heap instead of stack
-
- In environments where stack memory is constrained, you might want to
- compile PCRE to use heap memory instead of stack for remembering back-
- up points. This makes it run a lot more slowly, however. Details of how
- to do this are given in the pcrebuild documentation. When built in this
- way, instead of using the stack, PCRE obtains and frees memory by call-
- ing the functions that are pointed to by the pcre_stack_malloc and
- pcre_stack_free variables. By default, these point to malloc() and
- free(), but you can replace the pointers to cause PCRE to use your own
- functions. Since the block sizes are always the same, and are always
- freed in reverse order, it may be possible to implement customized mem-
- ory handlers that are more efficient than the standard functions.
-
- Limiting PCRE's stack usage
-
- PCRE has an internal counter that can be used to limit the depth of
- recursion, and thus cause pcre_exec() to give an error code before it
- runs out of stack. By default, the limit is very large, and unlikely
- ever to operate. It can be changed when PCRE is built, and it can also
- be set when pcre_exec() is called. For details of these interfaces, see
- the pcrebuild and pcreapi documentation.
+ Compiling PCRE to use heap instead of stack for pcre_exec()
+
+ In environments where stack memory is constrained, you might want to
+ compile PCRE to use heap memory instead of stack for remembering back-
+ up points when pcre_exec() is running. This makes it run a lot more
+ slowly, however. Details of how to do this are given in the pcrebuild
+ documentation. When built in this way, instead of using the stack, PCRE
+ obtains and frees memory by calling the functions that are pointed to
+ by the pcre_stack_malloc and pcre_stack_free variables. By default,
+ these point to malloc() and free(), but you can replace the pointers to
+ cause PCRE to use your own functions. Since the block sizes are always
+ the same, and are always freed in reverse order, it may be possible to
+ implement customized memory handlers that are more efficient than the
+ standard functions.
+
+ Limiting pcre_exec()'s stack usage
+
+ You can set limits on the number of times that match() is called, both
+ in total and recursively. If a limit is exceeded, pcre_exec() returns
+ an error code. Setting suitable limits should prevent it from running
+ out of stack. The default values of the limits are very large, and
+ unlikely ever to operate. They can be changed when PCRE is built, and
+ they can also be set when pcre_exec() is called. For details of these
+ interfaces, see the pcrebuild documentation and the section on extra
+ data for pcre_exec() in the pcreapi documentation.
As a very rough rule of thumb, you should reckon on about 500 bytes per
recursion. Thus, if you want to limit your stack usage to 8Mb, you
should set the limit at 16000 recursions. A 64Mb stack, on the other
- hand, can support around 128000 recursions. The pcretest test program
- has a command line option (-S) that can be used to increase the size of
- its stack.
+ hand, can support around 128000 recursions.
+
+ In Unix-like environments, the pcretest test program has a command line
+ option (-S) that can be used to increase the size of its stack. As long
+ as the stack is large enough, another option (-M) can be used to find
+ the smallest limits that allow a particular pattern to match a given
+ subject string. This is done by calling pcre_exec() repeatedly with
+ different limits.
Changing stack size in Unix-like systems
- In Unix-like environments, there is not often a problem with the stack
- unless very long strings are involved, though the default limit on
- stack size varies from system to system. Values from 8Mb to 64Mb are
+ In Unix-like environments, there is not often a problem with the stack
+ unless very long strings are involved, though the default limit on
+ stack size varies from system to system. Values from 8Mb to 64Mb are
common. You can find your default limit by running the command:
ulimit -s
- Unfortunately, the effect of running out of stack is often SIGSEGV,
- though sometimes a more explicit error message is given. You can nor-
+ Unfortunately, the effect of running out of stack is often SIGSEGV,
+ though sometimes a more explicit error message is given. You can nor-
mally increase the limit on stack size by code such as this:
struct rlimit rlim;
@@ -7046,15 +7059,15 @@ PCRE DISCUSSION OF STACK USAGE
rlim.rlim_cur = 100*1024*1024;
setrlimit(RLIMIT_STACK, &rlim);
- This reads the current limits (soft and hard) using getrlimit(), then
- attempts to increase the soft limit to 100Mb using setrlimit(). You
+ This reads the current limits (soft and hard) using getrlimit(), then
+ attempts to increase the soft limit to 100Mb using setrlimit(). You
must do this before calling pcre_exec().
Changing stack size in Mac OS X
Using setrlimit(), as described above, should also work on Mac OS X. It
is also possible to set a stack size when linking a program. There is a
- discussion about stack sizes in Mac OS X at this web site:
+ discussion about stack sizes in Mac OS X at this web site:
http://developer.apple.com/qa/qa2005/qa1419.html.
@@ -7067,8 +7080,8 @@ AUTHOR
REVISION
- Last updated: 09 July 2008
- Copyright (c) 1997-2008 University of Cambridge.
+ Last updated: 03 January 2010
+ Copyright (c) 1997-2010 University of Cambridge.
------------------------------------------------------------------------------
diff --git a/doc/pcredemo.3 b/doc/pcredemo.3
index 5d2926d..01decf7 100644
--- a/doc/pcredemo.3
+++ b/doc/pcredemo.3
@@ -24,14 +24,24 @@ of calling the PCRE regular expression library from a C program. See the
pcresample documentation for a short discussion ("man pcresample" if you have
the PCRE man pages installed).
-In Unix-like environments, compile this program thuswise:
+In Unix-like environments, if PCRE is installed in your standard system
+libraries, you should be able to compile this program using this command:
- gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \e
- -R/usr/local/lib -lpcre
+gcc -Wall pcredemo.c -lpcre -o pcredemo
+
+If PCRE is not installed in a standard place, it is likely to be installed with
+support for the pkg-config mechanism. If you have pkg-config, you can compile
+this program using this command:
+
+gcc -Wall pcredemo.c `pkg-config --cflags --libs libpcre` -o pcredemo
+
+If you do not have pkg-config, you may have to use this:
+
+gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \e
+ -R/usr/local/lib -lpcre -o pcredemo
Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
-library files for PCRE are installed on your system. You don't need -I and -L
-if PCRE is installed in the standard system libraries. Only some operating
+library files for PCRE are installed on your system. Only some operating
systems (e.g. Solaris) use the -R option.
Building under Windows:
diff --git a/doc/pcrestack.3 b/doc/pcrestack.3
index 686fe6a..c8e50c1 100644
--- a/doc/pcrestack.3
+++ b/doc/pcrestack.3
@@ -110,7 +110,7 @@ documentation.
As a very rough rule of thumb, you should reckon on about 500 bytes per
recursion. Thus, if you want to limit your stack usage to 8Mb, you
should set the limit at 16000 recursions. A 64Mb stack, on the other hand, can
-support around 128000 recursions.
+support around 128000 recursions.
.P
In Unix-like environments, the \fBpcretest\fP test program has a command line
option (\fB-S\fP) that can be used to increase the size of its stack. As long
diff --git a/pcre_compile.c b/pcre_compile.c
index c1ad784..6d3b9de 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -1997,7 +1997,7 @@ could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,
{
while (bcptr != NULL && bcptr->current_branch >= code)
{
- if (!could_be_empty_branch(bcptr->current_branch, endcode, utf8))
+ if (!could_be_empty_branch(bcptr->current_branch, endcode, utf8))
return FALSE;
bcptr = bcptr->outer;
}
@@ -4222,7 +4222,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
{
int delta = (repeat_min - 1)*length_prevgroup;
if ((INT64_OR_DOUBLE)(repeat_min - 1)*
- (INT64_OR_DOUBLE)length_prevgroup >
+ (INT64_OR_DOUBLE)length_prevgroup >
(INT64_OR_DOUBLE)INT_MAX ||
OFLOW_MAX - *lengthptr < delta)
{
@@ -4269,7 +4269,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
just adjust the length as if we had. For each repetition we must add 1
to the length for BRAZERO and for all but the last repetition we must
add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some
- paranoid checks to avoid integer overflow. The INT64_OR_DOUBLE type is
+ paranoid checks to avoid integer overflow. The INT64_OR_DOUBLE type is
a 64-bit integer type when available, otherwise double. */
if (lengthptr != NULL && repeat_max > 0)
diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c
index c6bea00..419b4b6 100644
--- a/pcre_dfa_exec.c
+++ b/pcre_dfa_exec.c
@@ -2298,7 +2298,7 @@ for (;;)
ims, /* the current ims flags */
rlevel, /* function recursion level */
recursing); /* pass on regex recursion */
-
+
if (rc == PCRE_ERROR_DFA_UITEM) return rc;
if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
{ ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); }
diff --git a/pcre_internal.h b/pcre_internal.h
index 48fd431..3c13e4f 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -184,11 +184,11 @@ preprocessor time in standard C environments. */
#error Cannot determine a type for 32-bit unsigned integers
#endif
-/* When checking for integer overflow in pcre_compile(), we need to handle
-large integers. If a 64-bit integer type is available, we can use that.
-Otherwise we have to cast to double, which of course requires floating point
-arithmetic. Handle this by defining a macro for the appropriate type. If
-stdint.h is available, include it; it may define INT64_MAX. The macro int64_t
+/* When checking for integer overflow in pcre_compile(), we need to handle
+large integers. If a 64-bit integer type is available, we can use that.
+Otherwise we have to cast to double, which of course requires floating point
+arithmetic. Handle this by defining a macro for the appropriate type. If
+stdint.h is available, include it; it may define INT64_MAX. The macro int64_t
may be set by "configure". */
#if HAVE_STDINT_H
diff --git a/pcre_study.c b/pcre_study.c
index d937b95..bd00a53 100644
--- a/pcre_study.c
+++ b/pcre_study.c
@@ -444,7 +444,7 @@ Returns: nothing
*/
static void
-set_table_bit(uschar *start_bits, unsigned int c, BOOL caseless,
+set_table_bit(uschar *start_bits, unsigned int c, BOOL caseless,
compile_data *cd)
{
start_bits[c/8] |= (1 << (c&7));
diff --git a/pcredemo.c b/pcredemo.c
index 3ab6525..d565aec 100644
--- a/pcredemo.c
+++ b/pcredemo.c
@@ -7,7 +7,7 @@ of calling the PCRE regular expression library from a C program. See the
pcresample documentation for a short discussion ("man pcresample" if you have
the PCRE man pages installed).
-In Unix-like environments, if PCRE is installed in your standard system
+In Unix-like environments, if PCRE is installed in your standard system
libraries, you should be able to compile this program using this command:
gcc -Wall pcredemo.c -lpcre -o pcredemo