summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2010-01-02 18:21:30 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2010-01-02 18:21:30 +0000
commitcdec198759de48457aac680223f5c64fad7b2b2c (patch)
tree6dfc710f0defec72cb79f28823d34c358e35ce87
parent24654bb77f039f6e0ef4abd80c48bd49fe771557 (diff)
downloadpcre-cdec198759de48457aac680223f5c64fad7b2b2c.tar.gz
Tidies to allow easier embedded compilation; avoid (double) where possible.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@475 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--CMakeLists.txt2
-rw-r--r--ChangeLog24
-rw-r--r--configure.ac5
-rw-r--r--pcre_compile.c45
-rw-r--r--pcre_dfa_exec.c6
-rw-r--r--pcre_exec.c16
-rw-r--r--pcre_internal.h29
-rw-r--r--pcre_printint.src5
-rw-r--r--pcre_study.c11
9 files changed, 97 insertions, 46 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 344fb0f..44e119f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -35,6 +35,7 @@
# to disable the final configuration report.
# 2009-04-11 PH applied Christian Ehrlicher's patch to show compiler flags that
# are set by specifying a release type.
+# 2010-01-02 PH added test for stdint.h
PROJECT(PCRE C CXX)
@@ -55,6 +56,7 @@ INCLUDE(CheckFunctionExists)
INCLUDE(CheckTypeSize)
CHECK_INCLUDE_FILE(dirent.h HAVE_DIRENT_H)
+CHECK_INCLUDE_FILE(stdint.h HAVE_STDINT_H)
CHECK_INCLUDE_FILE(sys/stat.h HAVE_SYS_STAT_H)
CHECK_INCLUDE_FILE(sys/types.h HAVE_SYS_TYPES_H)
CHECK_INCLUDE_FILE(unistd.h HAVE_UNISTD_H)
diff --git a/ChangeLog b/ChangeLog
index bb10913..64588db 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -30,6 +30,30 @@ Version 8.01 11-Dec-09
5. The C++ GlobalReplace function was not working like Perl for the special
situation when an empty string is matched. It now does the fancy magic
stuff that is necessary.
+
+6. In pcre_internal.h, obsolete includes to setjmp.h and stdarg.h have been
+ removed. (These were left over from very, very early versions of PCRE.)
+
+7. Some cosmetic changes to the code to make life easier when compiling it
+ as part of something else:
+
+ (a) Change DEBUG to PCRE_DEBUG.
+
+ (b) In pcre_compile(), rename the member of the "branch_chain" structure
+ called "current" as "current_branch", to prevent a collision with the
+ Linux macro when compiled as a kernel module.
+
+ (c) In pcre_study(), rename the function set_bit() as set_table_bit(), to
+ prevent a collision with the Linux macro when compiled as a kernel
+ module.
+
+8. In pcre_compile() there are some checks for integer overflows that used to
+ cast potentially large values to (double). This has been changed to that
+ when building, a check for int64_t is made, and if it is found, it is used
+ instead, thus avoiding the use of floating point arithmetic. (There is no
+ other use of FP in PCRE.) If int64_t is not found, the fallback is to
+ double.
+
Version 8.00 19-Oct-09
diff --git a/configure.ac b/configure.ac
index fc96ec4..1765441 100644
--- a/configure.ac
+++ b/configure.ac
@@ -9,7 +9,7 @@ dnl empty.
m4_define(pcre_major, [8])
m4_define(pcre_minor, [01])
m4_define(pcre_prerelease, [])
-m4_define(pcre_date, [2009-12-11])
+m4_define(pcre_date, [2010-01-02])
# Libtool shared library interface versions (current:revision:age)
m4_define(libpcre_version, [0:1:0])
@@ -66,6 +66,9 @@ AC_LANG_PUSH(C++)
AC_COMPILE_IFELSE(AC_LANG_PROGRAM([],[]),, CXX=""; CXXCP=""; CXXFLAGS="")
AC_LANG_POP
+# Check for a 64-bit integer type
+AC_TYPE_INT64_T
+
AC_PROG_INSTALL
AC_LIBTOOL_WIN32_DLL
AC_PROG_LIBTOOL
diff --git a/pcre_compile.c b/pcre_compile.c
index 0605a6e..2065829 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2009 University of Cambridge
+ Copyright (c) 1997-2010 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -53,10 +53,11 @@ supporting internal functions that are not used by other modules. */
#include "pcre_internal.h"
-/* When DEBUG is defined, we need the pcre_printint() function, which is also
-used by pcretest. DEBUG is not defined when building a production library. */
+/* When PCRE_DEBUG is defined, we need the pcre_printint() function, which is
+also used by pcretest. PCRE_DEBUG is not defined when building a production
+library. */
-#ifdef DEBUG
+#ifdef PCRE_DEBUG
#include "pcre_printint.src"
#endif
@@ -1994,9 +1995,10 @@ static BOOL
could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,
BOOL utf8)
{
-while (bcptr != NULL && bcptr->current >= code)
+while (bcptr != NULL && bcptr->current_branch >= code)
{
- if (!could_be_empty_branch(bcptr->current, endcode, utf8)) return FALSE;
+ if (!could_be_empty_branch(bcptr->current_branch, endcode, utf8))
+ return FALSE;
bcptr = bcptr->outer;
}
return TRUE;
@@ -2658,7 +2660,7 @@ BOOL utf8 = FALSE;
uschar *utf8_char = NULL;
#endif
-#ifdef DEBUG
+#ifdef PCRE_DEBUG
if (lengthptr != NULL) DPRINTF((">> start branch\n"));
#endif
@@ -2717,7 +2719,7 @@ for (;; ptr++)
if (lengthptr != NULL)
{
-#ifdef DEBUG
+#ifdef PCRE_DEBUG
if (code > cd->hwm) cd->hwm = code; /* High water info */
#endif
if (code > cd->start_workspace + COMPILE_WORK_SIZE) /* Check for overrun */
@@ -4213,13 +4215,15 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
{
/* In the pre-compile phase, we don't actually do the replication. We
just adjust the length as if we had. Do some paranoid checks for
- potential integer overflow. */
+ potential integer overflow. The INT64_OR_DOUBLE type is a 64-bit
+ integer type when available, otherwise double. */
if (lengthptr != NULL)
{
int delta = (repeat_min - 1)*length_prevgroup;
- if ((double)(repeat_min - 1)*(double)length_prevgroup >
- (double)INT_MAX ||
+ if ((INT64_OR_DOUBLE)(repeat_min - 1)*
+ (INT64_OR_DOUBLE)length_prevgroup >
+ (INT64_OR_DOUBLE)INT_MAX ||
OFLOW_MAX - *lengthptr < delta)
{
*errorcodeptr = ERR20;
@@ -4265,15 +4269,16 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
just adjust the length as if we had. For each repetition we must add 1
to the length for BRAZERO and for all but the last repetition we must
add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some
- paranoid checks to avoid integer overflow. */
+ paranoid checks to avoid integer overflow. The INT64_OR_DOUBLE type is
+ a 64-bit integer type when available, otherwise double. */
if (lengthptr != NULL && repeat_max > 0)
{
int delta = repeat_max * (length_prevgroup + 1 + 2 + 2*LINK_SIZE) -
2 - 2*LINK_SIZE; /* Last one doesn't nest */
- if ((double)repeat_max *
- (double)(length_prevgroup + 1 + 2 + 2*LINK_SIZE)
- > (double)INT_MAX ||
+ if ((INT64_OR_DOUBLE)repeat_max *
+ (INT64_OR_DOUBLE)(length_prevgroup + 1 + 2 + 2*LINK_SIZE)
+ > (INT64_OR_DOUBLE)INT_MAX ||
OFLOW_MAX - *lengthptr < delta)
{
*errorcodeptr = ERR20;
@@ -5787,7 +5792,7 @@ int old_external_options = cd->external_options;
branch_chain bc;
bc.outer = bcptr;
-bc.current = code;
+bc.current_branch = code;
firstbyte = reqbyte = REQ_UNSET;
@@ -6028,7 +6033,7 @@ for (;;)
{
*code = OP_ALT;
PUT(code, 1, code - last_branch);
- bc.current = last_branch = code;
+ bc.current_branch = last_branch = code;
code += 1 + LINK_SIZE;
}
@@ -6641,7 +6646,7 @@ if debugging, leave the test till after things are printed out. */
*code++ = OP_END;
-#ifndef DEBUG
+#ifndef PCRE_DEBUG
if (code - codestart > length) errorcode = ERR23;
#endif
@@ -6765,7 +6770,7 @@ if (reqbyte >= 0 &&
/* Print out the compiled data if debugging is enabled. This is never the
case when building a production library. */
-#ifdef DEBUG
+#ifdef PCRE_DEBUG
printf("Length = %d top_bracket = %d top_backref = %d\n",
length, re->top_bracket, re->top_backref);
@@ -6803,7 +6808,7 @@ if (code - codestart > length)
if (errorcodeptr != NULL) *errorcodeptr = ERR23;
return NULL;
}
-#endif /* DEBUG */
+#endif /* PCRE_DEBUG */
return (pcre *)re;
}
diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c
index 5c1875a..11a3fba 100644
--- a/pcre_dfa_exec.c
+++ b/pcre_dfa_exec.c
@@ -255,7 +255,7 @@ typedef struct stateblock {
#define INTS_PER_STATEBLOCK (sizeof(stateblock)/sizeof(int))
-#ifdef DEBUG
+#ifdef PCRE_DEBUG
/*************************************************
* Print character string *
*************************************************/
@@ -559,7 +559,7 @@ for (;;)
workspace[0] ^= 1; /* Remember for the restarting feature */
workspace[1] = active_count;
-#ifdef DEBUG
+#ifdef PCRE_DEBUG
printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
pchars((uschar *)ptr, strlen((char *)ptr), stdout);
printf("\"\n");
@@ -605,7 +605,7 @@ for (;;)
int state_offset = current_state->offset;
int count, codevalue, rrc;
-#ifdef DEBUG
+#ifdef PCRE_DEBUG
printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
if (clen == 0) printf("EOL\n");
else if (c > 32 && c < 127) printf("'%c'\n", c);
diff --git a/pcre_exec.c b/pcre_exec.c
index fd6866b..899b92a 100644
--- a/pcre_exec.c
+++ b/pcre_exec.c
@@ -89,7 +89,7 @@ static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
-#ifdef DEBUG
+#ifdef PCRE_DEBUG
/*************************************************
* Debugging function to print chars *
*************************************************/
@@ -141,7 +141,7 @@ match_ref(int offset, register USPTR eptr, int length, match_data *md,
{
USPTR p = md->start_subject + md->offset_vector[offset];
-#ifdef DEBUG
+#ifdef PCRE_DEBUG
if (eptr >= md->end_subject)
printf("matching subject <null>");
else
@@ -254,7 +254,7 @@ actuall used in this definition. */
#ifndef NO_RECURSE
#define REGISTER register
-#ifdef DEBUG
+#ifdef PCRE_DEBUG
#define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
{ \
printf("match() called in line %d\n", __LINE__); \
@@ -622,7 +622,7 @@ TAIL_RECURSE:
/* OK, now we can get on with the real code of the function. Recursive calls
are specified by the macro RMATCH and RRETURN is used to return. When
NO_RECURSE is *not* defined, these just turn into a recursive call to match()
-and a "return", respectively (possibly with some debugging if DEBUG is
+and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
defined). However, RMATCH isn't like a function call because it's quite a
complicated macro. It has to be used in one particular way. This shouldn't,
however, impact performance when true recursion is being used. */
@@ -713,7 +713,7 @@ for (;;)
number = GET2(ecode, 1+LINK_SIZE);
offset = number << 1;
-#ifdef DEBUG
+#ifdef PCRE_DEBUG
printf("start bracket %d\n", number);
printf("subject=");
pchars(eptr, 16, TRUE, md);
@@ -1039,7 +1039,7 @@ for (;;)
number = GET2(ecode, 1);
offset = number << 1;
-#ifdef DEBUG
+#ifdef PCRE_DEBUG
printf("end bracket %d at *ACCEPT", number);
printf("\n");
#endif
@@ -1468,7 +1468,7 @@ for (;;)
number = GET2(prev, 1+LINK_SIZE);
offset = number << 1;
-#ifdef DEBUG
+#ifdef PCRE_DEBUG
printf("end bracket %d", number);
printf("\n");
#endif
@@ -5635,7 +5635,7 @@ for(;;)
}
}
-#ifdef DEBUG /* Sigh. Some compilers never learn. */
+#ifdef PCRE_DEBUG /* Sigh. Some compilers never learn. */
printf(">>>> Match against: ");
pchars(start_match, end_subject - start_match, TRUE, md);
printf("\n");
diff --git a/pcre_internal.h b/pcre_internal.h
index de09614..28cbab2 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -7,7 +7,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2009 University of Cambridge
+ Copyright (c) 1997-2010 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -45,10 +45,10 @@ functions whose names all begin with "_pcre_". */
#ifndef PCRE_INTERNAL_H
#define PCRE_INTERNAL_H
-/* Define DEBUG to get debugging output on stdout. */
+/* Define PCRE_DEBUG to get debugging output on stdout. */
#if 0
-#define DEBUG
+#define PCRE_DEBUG
#endif
/* We do not support both EBCDIC and UTF-8 at the same time. The "configure"
@@ -74,7 +74,7 @@ It turns out that the Mac Debugging.h header also defines the macro DPRINTF, so
be absolutely sure we get our version. */
#undef DPRINTF
-#ifdef DEBUG
+#ifdef PCRE_DEBUG
#define DPRINTF(p) printf p
#else
#define DPRINTF(p) /* Nothing */
@@ -86,8 +86,6 @@ setjmp and stdarg are used is when NO_RECURSE is set. */
#include <ctype.h>
#include <limits.h>
-#include <setjmp.h>
-#include <stdarg.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
@@ -186,6 +184,23 @@ preprocessor time in standard C environments. */
#error Cannot determine a type for 32-bit unsigned integers
#endif
+/* When checking for integer overflow in pcre_compile(), we need to handle
+large integers. If a 64-bit integer type is available, we can use that.
+Otherwise we have to cast to double, which of course requires floating point
+arithmetic. Handle this by defining a macro for the appropriate type. If
+stdint.h is available, include it; it may define INT64_MAX. The macro int64_t
+may be set by "configure". */
+
+#if HAVE_STDINT_H
+#include <stdint.h>
+#endif
+
+#if defined INT64_MAX || defined int64_t
+#define INT64_OR_DOUBLE int64_t
+#else
+#define INT64_OR_DOUBLE double
+#endif
+
/* All character handling must be done as unsigned characters. Otherwise there
are problems with top-bit-set characters and functions such as isspace().
However, we leave the interface to the outside world as char *, because that
@@ -1579,7 +1594,7 @@ branches, for testing for left recursion. */
typedef struct branch_chain {
struct branch_chain *outer;
- uschar *current;
+ uschar *current_branch;
} branch_chain;
/* Structure for items in a linked list that represents an explicit recursive
diff --git a/pcre_printint.src b/pcre_printint.src
index acfc4ca..9b2def1 100644
--- a/pcre_printint.src
+++ b/pcre_printint.src
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2009 University of Cambridge
+ Copyright (c) 1997-2010 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -43,7 +43,8 @@ internal form of a compiled regular expression, along with some supporting
local functions. This source file is used in two places:
(1) It is #included by pcre_compile.c when it is compiled in debugging mode
-(DEBUG defined in pcre_internal.h). It is not included in production compiles.
+(PCRE_DEBUG defined in pcre_internal.h). It is not included in production
+compiles.
(2) It is always #included by pcretest.c, which can be asked to print out a
compiled regex for debugging purposes. */
diff --git a/pcre_study.c b/pcre_study.c
index 957a1fe..d937b95 100644
--- a/pcre_study.c
+++ b/pcre_study.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2009 University of Cambridge
+ Copyright (c) 1997-2010 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -444,7 +444,8 @@ Returns: nothing
*/
static void
-set_bit(uschar *start_bits, unsigned int c, BOOL caseless, compile_data *cd)
+set_table_bit(uschar *start_bits, unsigned int c, BOOL caseless,
+ compile_data *cd)
{
start_bits[c/8] |= (1 << (c&7));
if (caseless && (cd->ctypes[c] & ctype_letter) != 0)
@@ -606,7 +607,7 @@ do
case OP_QUERY:
case OP_MINQUERY:
case OP_POSQUERY:
- set_bit(start_bits, tcode[1], caseless, cd);
+ set_table_bit(start_bits, tcode[1], caseless, cd);
tcode += 2;
#ifdef SUPPORT_UTF8
if (utf8 && tcode[-1] >= 0xc0)
@@ -619,7 +620,7 @@ do
case OP_UPTO:
case OP_MINUPTO:
case OP_POSUPTO:
- set_bit(start_bits, tcode[3], caseless, cd);
+ set_table_bit(start_bits, tcode[3], caseless, cd);
tcode += 4;
#ifdef SUPPORT_UTF8
if (utf8 && tcode[-1] >= 0xc0)
@@ -637,7 +638,7 @@ do
case OP_PLUS:
case OP_MINPLUS:
case OP_POSPLUS:
- set_bit(start_bits, tcode[1], caseless, cd);
+ set_table_bit(start_bits, tcode[1], caseless, cd);
try_next = FALSE;
break;