diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2010-01-02 18:21:30 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2010-01-02 18:21:30 +0000 |
commit | cdec198759de48457aac680223f5c64fad7b2b2c (patch) | |
tree | 6dfc710f0defec72cb79f28823d34c358e35ce87 | |
parent | 24654bb77f039f6e0ef4abd80c48bd49fe771557 (diff) | |
download | pcre-cdec198759de48457aac680223f5c64fad7b2b2c.tar.gz |
Tidies to allow easier embedded compilation; avoid (double) where possible.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@475 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | CMakeLists.txt | 2 | ||||
-rw-r--r-- | ChangeLog | 24 | ||||
-rw-r--r-- | configure.ac | 5 | ||||
-rw-r--r-- | pcre_compile.c | 45 | ||||
-rw-r--r-- | pcre_dfa_exec.c | 6 | ||||
-rw-r--r-- | pcre_exec.c | 16 | ||||
-rw-r--r-- | pcre_internal.h | 29 | ||||
-rw-r--r-- | pcre_printint.src | 5 | ||||
-rw-r--r-- | pcre_study.c | 11 |
9 files changed, 97 insertions, 46 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 344fb0f..44e119f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -35,6 +35,7 @@ # to disable the final configuration report. # 2009-04-11 PH applied Christian Ehrlicher's patch to show compiler flags that # are set by specifying a release type. +# 2010-01-02 PH added test for stdint.h PROJECT(PCRE C CXX) @@ -55,6 +56,7 @@ INCLUDE(CheckFunctionExists) INCLUDE(CheckTypeSize) CHECK_INCLUDE_FILE(dirent.h HAVE_DIRENT_H) +CHECK_INCLUDE_FILE(stdint.h HAVE_STDINT_H) CHECK_INCLUDE_FILE(sys/stat.h HAVE_SYS_STAT_H) CHECK_INCLUDE_FILE(sys/types.h HAVE_SYS_TYPES_H) CHECK_INCLUDE_FILE(unistd.h HAVE_UNISTD_H) @@ -30,6 +30,30 @@ Version 8.01 11-Dec-09 5. The C++ GlobalReplace function was not working like Perl for the special situation when an empty string is matched. It now does the fancy magic stuff that is necessary. + +6. In pcre_internal.h, obsolete includes to setjmp.h and stdarg.h have been + removed. (These were left over from very, very early versions of PCRE.) + +7. Some cosmetic changes to the code to make life easier when compiling it + as part of something else: + + (a) Change DEBUG to PCRE_DEBUG. + + (b) In pcre_compile(), rename the member of the "branch_chain" structure + called "current" as "current_branch", to prevent a collision with the + Linux macro when compiled as a kernel module. + + (c) In pcre_study(), rename the function set_bit() as set_table_bit(), to + prevent a collision with the Linux macro when compiled as a kernel + module. + +8. In pcre_compile() there are some checks for integer overflows that used to + cast potentially large values to (double). This has been changed to that + when building, a check for int64_t is made, and if it is found, it is used + instead, thus avoiding the use of floating point arithmetic. (There is no + other use of FP in PCRE.) If int64_t is not found, the fallback is to + double. + Version 8.00 19-Oct-09 diff --git a/configure.ac b/configure.ac index fc96ec4..1765441 100644 --- a/configure.ac +++ b/configure.ac @@ -9,7 +9,7 @@ dnl empty. m4_define(pcre_major, [8]) m4_define(pcre_minor, [01]) m4_define(pcre_prerelease, []) -m4_define(pcre_date, [2009-12-11]) +m4_define(pcre_date, [2010-01-02]) # Libtool shared library interface versions (current:revision:age) m4_define(libpcre_version, [0:1:0]) @@ -66,6 +66,9 @@ AC_LANG_PUSH(C++) AC_COMPILE_IFELSE(AC_LANG_PROGRAM([],[]),, CXX=""; CXXCP=""; CXXFLAGS="") AC_LANG_POP +# Check for a 64-bit integer type +AC_TYPE_INT64_T + AC_PROG_INSTALL AC_LIBTOOL_WIN32_DLL AC_PROG_LIBTOOL diff --git a/pcre_compile.c b/pcre_compile.c index 0605a6e..2065829 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2009 University of Cambridge + Copyright (c) 1997-2010 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -53,10 +53,11 @@ supporting internal functions that are not used by other modules. */ #include "pcre_internal.h" -/* When DEBUG is defined, we need the pcre_printint() function, which is also -used by pcretest. DEBUG is not defined when building a production library. */ +/* When PCRE_DEBUG is defined, we need the pcre_printint() function, which is +also used by pcretest. PCRE_DEBUG is not defined when building a production +library. */ -#ifdef DEBUG +#ifdef PCRE_DEBUG #include "pcre_printint.src" #endif @@ -1994,9 +1995,10 @@ static BOOL could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr, BOOL utf8) { -while (bcptr != NULL && bcptr->current >= code) +while (bcptr != NULL && bcptr->current_branch >= code) { - if (!could_be_empty_branch(bcptr->current, endcode, utf8)) return FALSE; + if (!could_be_empty_branch(bcptr->current_branch, endcode, utf8)) + return FALSE; bcptr = bcptr->outer; } return TRUE; @@ -2658,7 +2660,7 @@ BOOL utf8 = FALSE; uschar *utf8_char = NULL; #endif -#ifdef DEBUG +#ifdef PCRE_DEBUG if (lengthptr != NULL) DPRINTF((">> start branch\n")); #endif @@ -2717,7 +2719,7 @@ for (;; ptr++) if (lengthptr != NULL) { -#ifdef DEBUG +#ifdef PCRE_DEBUG if (code > cd->hwm) cd->hwm = code; /* High water info */ #endif if (code > cd->start_workspace + COMPILE_WORK_SIZE) /* Check for overrun */ @@ -4213,13 +4215,15 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ { /* In the pre-compile phase, we don't actually do the replication. We just adjust the length as if we had. Do some paranoid checks for - potential integer overflow. */ + potential integer overflow. The INT64_OR_DOUBLE type is a 64-bit + integer type when available, otherwise double. */ if (lengthptr != NULL) { int delta = (repeat_min - 1)*length_prevgroup; - if ((double)(repeat_min - 1)*(double)length_prevgroup > - (double)INT_MAX || + if ((INT64_OR_DOUBLE)(repeat_min - 1)* + (INT64_OR_DOUBLE)length_prevgroup > + (INT64_OR_DOUBLE)INT_MAX || OFLOW_MAX - *lengthptr < delta) { *errorcodeptr = ERR20; @@ -4265,15 +4269,16 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */ just adjust the length as if we had. For each repetition we must add 1 to the length for BRAZERO and for all but the last repetition we must add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some - paranoid checks to avoid integer overflow. */ + paranoid checks to avoid integer overflow. The INT64_OR_DOUBLE type is + a 64-bit integer type when available, otherwise double. */ if (lengthptr != NULL && repeat_max > 0) { int delta = repeat_max * (length_prevgroup + 1 + 2 + 2*LINK_SIZE) - 2 - 2*LINK_SIZE; /* Last one doesn't nest */ - if ((double)repeat_max * - (double)(length_prevgroup + 1 + 2 + 2*LINK_SIZE) - > (double)INT_MAX || + if ((INT64_OR_DOUBLE)repeat_max * + (INT64_OR_DOUBLE)(length_prevgroup + 1 + 2 + 2*LINK_SIZE) + > (INT64_OR_DOUBLE)INT_MAX || OFLOW_MAX - *lengthptr < delta) { *errorcodeptr = ERR20; @@ -5787,7 +5792,7 @@ int old_external_options = cd->external_options; branch_chain bc; bc.outer = bcptr; -bc.current = code; +bc.current_branch = code; firstbyte = reqbyte = REQ_UNSET; @@ -6028,7 +6033,7 @@ for (;;) { *code = OP_ALT; PUT(code, 1, code - last_branch); - bc.current = last_branch = code; + bc.current_branch = last_branch = code; code += 1 + LINK_SIZE; } @@ -6641,7 +6646,7 @@ if debugging, leave the test till after things are printed out. */ *code++ = OP_END; -#ifndef DEBUG +#ifndef PCRE_DEBUG if (code - codestart > length) errorcode = ERR23; #endif @@ -6765,7 +6770,7 @@ if (reqbyte >= 0 && /* Print out the compiled data if debugging is enabled. This is never the case when building a production library. */ -#ifdef DEBUG +#ifdef PCRE_DEBUG printf("Length = %d top_bracket = %d top_backref = %d\n", length, re->top_bracket, re->top_backref); @@ -6803,7 +6808,7 @@ if (code - codestart > length) if (errorcodeptr != NULL) *errorcodeptr = ERR23; return NULL; } -#endif /* DEBUG */ +#endif /* PCRE_DEBUG */ return (pcre *)re; } diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c index 5c1875a..11a3fba 100644 --- a/pcre_dfa_exec.c +++ b/pcre_dfa_exec.c @@ -255,7 +255,7 @@ typedef struct stateblock { #define INTS_PER_STATEBLOCK (sizeof(stateblock)/sizeof(int)) -#ifdef DEBUG +#ifdef PCRE_DEBUG /************************************************* * Print character string * *************************************************/ @@ -559,7 +559,7 @@ for (;;) workspace[0] ^= 1; /* Remember for the restarting feature */ workspace[1] = active_count; -#ifdef DEBUG +#ifdef PCRE_DEBUG printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP); pchars((uschar *)ptr, strlen((char *)ptr), stdout); printf("\"\n"); @@ -605,7 +605,7 @@ for (;;) int state_offset = current_state->offset; int count, codevalue, rrc; -#ifdef DEBUG +#ifdef PCRE_DEBUG printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset); if (clen == 0) printf("EOL\n"); else if (c > 32 && c < 127) printf("'%c'\n", c); diff --git a/pcre_exec.c b/pcre_exec.c index fd6866b..899b92a 100644 --- a/pcre_exec.c +++ b/pcre_exec.c @@ -89,7 +89,7 @@ static const char rep_max[] = { 0, 0, 0, 0, 1, 1 }; -#ifdef DEBUG +#ifdef PCRE_DEBUG /************************************************* * Debugging function to print chars * *************************************************/ @@ -141,7 +141,7 @@ match_ref(int offset, register USPTR eptr, int length, match_data *md, { USPTR p = md->start_subject + md->offset_vector[offset]; -#ifdef DEBUG +#ifdef PCRE_DEBUG if (eptr >= md->end_subject) printf("matching subject <null>"); else @@ -254,7 +254,7 @@ actuall used in this definition. */ #ifndef NO_RECURSE #define REGISTER register -#ifdef DEBUG +#ifdef PCRE_DEBUG #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \ { \ printf("match() called in line %d\n", __LINE__); \ @@ -622,7 +622,7 @@ TAIL_RECURSE: /* OK, now we can get on with the real code of the function. Recursive calls are specified by the macro RMATCH and RRETURN is used to return. When NO_RECURSE is *not* defined, these just turn into a recursive call to match() -and a "return", respectively (possibly with some debugging if DEBUG is +and a "return", respectively (possibly with some debugging if PCRE_DEBUG is defined). However, RMATCH isn't like a function call because it's quite a complicated macro. It has to be used in one particular way. This shouldn't, however, impact performance when true recursion is being used. */ @@ -713,7 +713,7 @@ for (;;) number = GET2(ecode, 1+LINK_SIZE); offset = number << 1; -#ifdef DEBUG +#ifdef PCRE_DEBUG printf("start bracket %d\n", number); printf("subject="); pchars(eptr, 16, TRUE, md); @@ -1039,7 +1039,7 @@ for (;;) number = GET2(ecode, 1); offset = number << 1; -#ifdef DEBUG +#ifdef PCRE_DEBUG printf("end bracket %d at *ACCEPT", number); printf("\n"); #endif @@ -1468,7 +1468,7 @@ for (;;) number = GET2(prev, 1+LINK_SIZE); offset = number << 1; -#ifdef DEBUG +#ifdef PCRE_DEBUG printf("end bracket %d", number); printf("\n"); #endif @@ -5635,7 +5635,7 @@ for(;;) } } -#ifdef DEBUG /* Sigh. Some compilers never learn. */ +#ifdef PCRE_DEBUG /* Sigh. Some compilers never learn. */ printf(">>>> Match against: "); pchars(start_match, end_subject - start_match, TRUE, md); printf("\n"); diff --git a/pcre_internal.h b/pcre_internal.h index de09614..28cbab2 100644 --- a/pcre_internal.h +++ b/pcre_internal.h @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2009 University of Cambridge + Copyright (c) 1997-2010 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -45,10 +45,10 @@ functions whose names all begin with "_pcre_". */ #ifndef PCRE_INTERNAL_H #define PCRE_INTERNAL_H -/* Define DEBUG to get debugging output on stdout. */ +/* Define PCRE_DEBUG to get debugging output on stdout. */ #if 0 -#define DEBUG +#define PCRE_DEBUG #endif /* We do not support both EBCDIC and UTF-8 at the same time. The "configure" @@ -74,7 +74,7 @@ It turns out that the Mac Debugging.h header also defines the macro DPRINTF, so be absolutely sure we get our version. */ #undef DPRINTF -#ifdef DEBUG +#ifdef PCRE_DEBUG #define DPRINTF(p) printf p #else #define DPRINTF(p) /* Nothing */ @@ -86,8 +86,6 @@ setjmp and stdarg are used is when NO_RECURSE is set. */ #include <ctype.h> #include <limits.h> -#include <setjmp.h> -#include <stdarg.h> #include <stddef.h> #include <stdio.h> #include <stdlib.h> @@ -186,6 +184,23 @@ preprocessor time in standard C environments. */ #error Cannot determine a type for 32-bit unsigned integers #endif +/* When checking for integer overflow in pcre_compile(), we need to handle +large integers. If a 64-bit integer type is available, we can use that. +Otherwise we have to cast to double, which of course requires floating point +arithmetic. Handle this by defining a macro for the appropriate type. If +stdint.h is available, include it; it may define INT64_MAX. The macro int64_t +may be set by "configure". */ + +#if HAVE_STDINT_H +#include <stdint.h> +#endif + +#if defined INT64_MAX || defined int64_t +#define INT64_OR_DOUBLE int64_t +#else +#define INT64_OR_DOUBLE double +#endif + /* All character handling must be done as unsigned characters. Otherwise there are problems with top-bit-set characters and functions such as isspace(). However, we leave the interface to the outside world as char *, because that @@ -1579,7 +1594,7 @@ branches, for testing for left recursion. */ typedef struct branch_chain { struct branch_chain *outer; - uschar *current; + uschar *current_branch; } branch_chain; /* Structure for items in a linked list that represents an explicit recursive diff --git a/pcre_printint.src b/pcre_printint.src index acfc4ca..9b2def1 100644 --- a/pcre_printint.src +++ b/pcre_printint.src @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2009 University of Cambridge + Copyright (c) 1997-2010 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -43,7 +43,8 @@ internal form of a compiled regular expression, along with some supporting local functions. This source file is used in two places: (1) It is #included by pcre_compile.c when it is compiled in debugging mode -(DEBUG defined in pcre_internal.h). It is not included in production compiles. +(PCRE_DEBUG defined in pcre_internal.h). It is not included in production +compiles. (2) It is always #included by pcretest.c, which can be asked to print out a compiled regex for debugging purposes. */ diff --git a/pcre_study.c b/pcre_study.c index 957a1fe..d937b95 100644 --- a/pcre_study.c +++ b/pcre_study.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2009 University of Cambridge + Copyright (c) 1997-2010 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -444,7 +444,8 @@ Returns: nothing */ static void -set_bit(uschar *start_bits, unsigned int c, BOOL caseless, compile_data *cd) +set_table_bit(uschar *start_bits, unsigned int c, BOOL caseless, + compile_data *cd) { start_bits[c/8] |= (1 << (c&7)); if (caseless && (cd->ctypes[c] & ctype_letter) != 0) @@ -606,7 +607,7 @@ do case OP_QUERY: case OP_MINQUERY: case OP_POSQUERY: - set_bit(start_bits, tcode[1], caseless, cd); + set_table_bit(start_bits, tcode[1], caseless, cd); tcode += 2; #ifdef SUPPORT_UTF8 if (utf8 && tcode[-1] >= 0xc0) @@ -619,7 +620,7 @@ do case OP_UPTO: case OP_MINUPTO: case OP_POSUPTO: - set_bit(start_bits, tcode[3], caseless, cd); + set_table_bit(start_bits, tcode[3], caseless, cd); tcode += 4; #ifdef SUPPORT_UTF8 if (utf8 && tcode[-1] >= 0xc0) @@ -637,7 +638,7 @@ do case OP_PLUS: case OP_MINPLUS: case OP_POSPLUS: - set_bit(start_bits, tcode[1], caseless, cd); + set_table_bit(start_bits, tcode[1], caseless, cd); try_next = FALSE; break; |