diff options
author | zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2011-12-14 11:18:01 +0000 |
---|---|---|
committer | zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2011-12-14 11:18:01 +0000 |
commit | 99b75fb8c82fcc4587c405f3c58df82a056a5b3c (patch) | |
tree | b1f0f1fa48e9832e3101c7055819a5272c917145 | |
parent | 04e5d49a66f466c4f1fb47170231fa7bc74111f4 (diff) | |
download | pcre-99b75fb8c82fcc4587c405f3c58df82a056a5b3c.tar.gz |
PUBL macro added, single char optimization is fixed, MAX_255 checks are added, pcre_jit_test now copy the default tables to help valgrind
git-svn-id: svn://vcs.exim.org/pcre/code/branches/pcre16@804 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | Makefile.am | 1 | ||||
-rw-r--r-- | pcre.h.in | 12 | ||||
-rw-r--r-- | pcre16_globals.c | 45 | ||||
-rw-r--r-- | pcre_compile.c | 169 | ||||
-rw-r--r-- | pcre_dfa_exec.c | 8 | ||||
-rw-r--r-- | pcre_exec.c | 91 | ||||
-rw-r--r-- | pcre_get.c | 14 | ||||
-rw-r--r-- | pcre_globals.c | 20 | ||||
-rw-r--r-- | pcre_internal.h | 2 | ||||
-rw-r--r-- | pcre_jit_compile.c | 4 | ||||
-rw-r--r-- | pcre_jit_test.c | 71 | ||||
-rw-r--r-- | pcre_maketables.c | 6 | ||||
-rw-r--r-- | pcre_study.c | 4 | ||||
-rw-r--r-- | pcreposix.c | 2 |
14 files changed, 287 insertions, 162 deletions
diff --git a/Makefile.am b/Makefile.am index d67d167..9b091cb 100644 --- a/Makefile.am +++ b/Makefile.am @@ -217,6 +217,7 @@ libpcre16_la_SOURCES = \ pcre16_exec.c \ pcre16_fullinfo.c \ pcre16_get.c \ + pcre16_globals.c \ pcre16_info.c \ pcre16_jit_compile.c \ pcre16_maketables.c \ @@ -341,12 +341,24 @@ PCRE_EXP_DECL void (*pcre_free)(void *); PCRE_EXP_DECL void *(*pcre_stack_malloc)(size_t); PCRE_EXP_DECL void (*pcre_stack_free)(void *); PCRE_EXP_DECL int (*pcre_callout)(pcre_callout_block *); + +PCRE_EXP_DECL void *(*pcre16_malloc)(size_t); +PCRE_EXP_DECL void (*pcre16_free)(void *); +PCRE_EXP_DECL void *(*pcre16_stack_malloc)(size_t); +PCRE_EXP_DECL void (*pcre16_stack_free)(void *); +PCRE_EXP_DECL int (*pcre16_callout)(pcre_callout_block *); #else /* VPCOMPAT */ PCRE_EXP_DECL void *pcre_malloc(size_t); PCRE_EXP_DECL void pcre_free(void *); PCRE_EXP_DECL void *pcre_stack_malloc(size_t); PCRE_EXP_DECL void pcre_stack_free(void *); PCRE_EXP_DECL int pcre_callout(pcre_callout_block *); + +PCRE_EXP_DECL void *pcre16_malloc(size_t); +PCRE_EXP_DECL void pcre16_free(void *); +PCRE_EXP_DECL void *pcre16_stack_malloc(size_t); +PCRE_EXP_DECL void pcre16_stack_free(void *); +PCRE_EXP_DECL int pcre16_callout(pcre_callout_block *); #endif /* VPCOMPAT */ /* User defined callback which provides a stack just before the match starts. */ diff --git a/pcre16_globals.c b/pcre16_globals.c new file mode 100644 index 0000000..292525c --- /dev/null +++ b/pcre16_globals.c @@ -0,0 +1,45 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Copyright (c) 1997-2011 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +/* Generate code with 16 bit character support. */ +#define COMPILE_PCRE16 + +#include "pcre_globals.c" + +/* End of pcre16_globals.c */ diff --git a/pcre_compile.c b/pcre_compile.c index fcc734f..cd3de55 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -681,13 +681,13 @@ if (cd->workspace_size >= COMPILE_WORK_SIZE_MAX || newsize - cd->workspace_size < WORK_SIZE_SAFETY_MARGIN) return ERR72; -newspace = (pcre_malloc)(newsize); +newspace = (PUBL(malloc))(newsize); if (newspace == NULL) return ERR21; memcpy(newspace, cd->start_workspace, cd->workspace_size); cd->hwm = (pcre_uchar *)newspace + (cd->hwm - cd->start_workspace); if (cd->workspace_size > COMPILE_WORK_SIZE) - (pcre_free)((void *)cd->start_workspace); + (PUBL(free))((void *)cd->start_workspace); cd->start_workspace = newspace; cd->workspace_size = newsize; return 0; @@ -2956,7 +2956,7 @@ if ((options & PCRE_EXTENDED) != 0) { for (;;) { - while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++; + while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_space) != 0) ptr++; if (*ptr == CHAR_NUMBER_SIGN) { ptr++; @@ -2998,7 +2998,7 @@ if ((options & PCRE_EXTENDED) != 0) { for (;;) { - while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++; + while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_space) != 0) ptr++; if (*ptr == CHAR_NUMBER_SIGN) { ptr++; @@ -3462,7 +3462,6 @@ for (;; ptr++) BOOL reset_bracount; int class_has_8bitchar; int class_single_char; - int class_lastchar; int newoptions; int recno; int refsign; @@ -3600,7 +3599,7 @@ for (;; ptr++) if ((options & PCRE_EXTENDED) != 0) { - if ((cd->ctypes[c] & ctype_space) != 0) continue; + if (MAX_255(*ptr) && (cd->ctypes[c] & ctype_space) != 0) continue; if (c == CHAR_NUMBER_SIGN) { ptr++; @@ -3767,7 +3766,6 @@ for (;; ptr++) class_has_8bitchar = 0; class_single_char = 0; - class_lastchar = -1; /* Initialize the 32-char bit map to all zeros. We build the map in a temporary bit of memory, in case the class contains only 1 character (less @@ -4417,10 +4415,61 @@ for (;; ptr++) /* Only the value of 1 matters for class_single_char. */ if (class_single_char < 2) class_single_char++; - class_lastchar = c; - /* Handle a character that cannot go in the bit map */ - + /* If class_charcount is 1, we saw precisely one character. As long as + there were no negated characters >= 128 and there was no use of \p or \P, + in other words, no use of any XCLASS features, we can optimize. + + In UTF-8 mode, we can optimize the negative case only if there were no + characters >= 128 because OP_NOT and the related opcodes like OP_NOTSTAR + operate on single-bytes characters only. This is an historical hangover. + Maybe one day we can tidy these opcodes to handle multi-byte characters. + + The optimization throws away the bit map. We turn the item into a + 1-character OP_CHAR[I] if it's positive, or OP_NOT[I] if it's negative. + Note that OP_NOT[I] does not support multibyte characters. In the positive + case, it can cause firstchar to be set. Otherwise, there can be no first + char if this item is first, whatever repeat count may follow. In the case + of reqchar, save the previous value for reinstating. */ + +#ifdef SUPPORT_UTF + if (class_single_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET + && (!utf || !negate_class || c < (MAX_VALUE_FOR_SINGLE_CHAR + 1))) +#else + if (class_single_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) +#endif + { + ptr++; + zeroreqchar = reqchar; + + /* The OP_NOT[I] opcodes work on single characters only. */ + + if (negate_class) + { + if (firstchar == REQ_UNSET) firstchar = REQ_NONE; + zerofirstchar = firstchar; + *code++ = ((options & PCRE_CASELESS) != 0)? OP_NOTI: OP_NOT; + *code++ = c; + goto NOT_CHAR; + } + + /* For a single, positive character, get the value into mcbuffer, and + then we can handle this with the normal one-character code. */ + +#ifdef SUPPORT_UTF + if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR) + mclength = PRIV(ord2utf)(c, mcbuffer); + else +#endif + { + mcbuffer[0] = c; + mclength = 1; + } + goto ONE_CHAR; + } /* End of 1-char optimization */ + + /* Handle a character that cannot go in the bit map. */ + #if defined SUPPORT_UTF && !(defined COMPILE_PCRE8) if ((c > 255) || (utf && ((options & PCRE_CASELESS) != 0 && c > 127))) #elif defined SUPPORT_UTF @@ -4458,19 +4507,6 @@ for (;; ptr++) { *class_uchardata++ = XCL_SINGLE; class_uchardata += PRIV(ord2utf)(othercase, class_uchardata); - - /* In the first pass, we must accumulate the space used here for - the following reason: If this ends up as the only character in the - class, it will later be optimized down to a single character. - However, that uses less memory, and so if this happens to be at the - end of the regex, there will not be enough memory in the real - compile for this temporary storage. */ - - if (lengthptr != NULL) - { - *lengthptr += class_uchardata - class_uchardata_base; - class_uchardata = class_uchardata_base; - } } } #endif /* SUPPORT_UCP */ @@ -4508,61 +4544,9 @@ for (;; ptr++) goto FAILED; } - /* If class_charcount is 1, we saw precisely one character. As long as - there were no negated characters >= 128 and there was no use of \p or \P, - in other words, no use of any XCLASS features, we can optimize. - - In UTF-8 mode, we can optimize the negative case only if there were no - characters >= 128 because OP_NOT and the related opcodes like OP_NOTSTAR - operate on single-bytes characters only. This is an historical hangover. - Maybe one day we can tidy these opcodes to handle multi-byte characters. - - The optimization throws away the bit map. We turn the item into a - 1-character OP_CHAR[I] if it's positive, or OP_NOT[I] if it's negative. - Note that OP_NOT[I] does not support multibyte characters. In the positive - case, it can cause firstchar to be set. Otherwise, there can be no first - char if this item is first, whatever repeat count may follow. In the case - of reqchar, save the previous value for reinstating. */ - -#ifdef SUPPORT_UTF - if (class_single_char == 1 && (!utf || !negate_class - || class_lastchar < (MAX_VALUE_FOR_SINGLE_CHAR + 1))) -#else - if (class_single_char == 1) -#endif - { - zeroreqchar = reqchar; - - /* The OP_NOT[I] opcodes work on single characters only. */ - - if (negate_class) - { - if (firstchar == REQ_UNSET) firstchar = REQ_NONE; - zerofirstchar = firstchar; - *code++ = ((options & PCRE_CASELESS) != 0)? OP_NOTI: OP_NOT; - *code++ = class_lastchar; - break; - } - - /* For a single, positive character, get the value into mcbuffer, and - then we can handle this with the normal one-character code. */ - -#ifdef SUPPORT_UTF - if (utf && class_lastchar > MAX_VALUE_FOR_SINGLE_CHAR) - mclength = PRIV(ord2utf)(class_lastchar, mcbuffer); - else -#endif - { - mcbuffer[0] = class_lastchar; - mclength = 1; - } - goto ONE_CHAR; - } /* End of 1-char optimization */ - - /* The general case - not the one-char optimization. If this is the first - thing in the branch, there can be no first char setting, whatever the - repeat count. Any reqchar setting must remain unchanged after any kind of - repeat. */ + /* If this is the first thing in the branch, there can be no first char + setting, whatever the repeat count. Any reqchar setting must remain + unchanged after any kind of repeat. */ if (firstchar == REQ_UNSET) firstchar = REQ_NONE; zerofirstchar = firstchar; @@ -4623,6 +4607,7 @@ for (;; ptr++) memcpy(code, classbits, 32); } code += 32 / sizeof(pcre_uchar); + NOT_CHAR: break; @@ -5510,8 +5495,9 @@ for (;; ptr++) /* First deal with various "verbs" that can be introduced by '*'. */ - if (*(++ptr) == CHAR_ASTERISK && - ((cd->ctypes[ptr[1]] & ctype_letter) != 0 || ptr[1] == ':')) + ptr++; + if (ptr[0] == CHAR_ASTERISK && (ptr[1] == ':' + || (MAX_255(ptr[1]) && ((cd->ctypes[ptr[1]] & ctype_letter) != 0)))) { int i, namelen; int arglen = 0; @@ -5519,7 +5505,8 @@ for (;; ptr++) const pcre_uchar *name = ptr + 1; const pcre_uchar *arg = NULL; previous = NULL; - while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {}; + ptr++; + while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_letter) != 0) ptr++; namelen = (int)(ptr - name); /* It appears that Perl allows any characters whatsoever, other than @@ -5705,7 +5692,7 @@ for (;; ptr++) /* We now expect to read a name; any thing else is an error */ - if ((cd->ctypes[ptr[1]] & ctype_word) == 0) + if (!MAX_255(ptr[1]) || (cd->ctypes[ptr[1]] & ctype_word) == 0) { ptr += 1; /* To get the right offset */ *errorcodeptr = ERR28; @@ -5716,7 +5703,7 @@ for (;; ptr++) recno = 0; name = ++ptr; - while ((cd->ctypes[*ptr] & ctype_word) != 0) + while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) { if (recno >= 0) recno = (IS_DIGIT(*ptr))? recno * 10 + *ptr - CHAR_0 : -1; @@ -5887,7 +5874,8 @@ for (;; ptr++) break; default: /* Could be name define, else bad */ - if ((cd->ctypes[ptr[1]] & ctype_word) != 0) goto DEFINE_NAME; + if (MAX_255(ptr[1]) && (cd->ctypes[ptr[1]] & ctype_word) != 0) + goto DEFINE_NAME; ptr++; /* Correct offset for error */ *errorcodeptr = ERR24; goto FAILED; @@ -5956,7 +5944,7 @@ for (;; ptr++) CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE; name = ++ptr; - while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++; + while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) ptr++; namelen = (int)(ptr - name); /* In the pre-compile phase, just do a syntax check. */ @@ -6086,7 +6074,7 @@ for (;; ptr++) NAMED_REF_OR_RECURSE: name = ++ptr; - while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++; + while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) ptr++; namelen = (int)(ptr - name); /* In the pre-compile phase, do a syntax check. We used to just set @@ -6672,6 +6660,7 @@ for (;; ptr++) BOOL isnumber = TRUE; for (p = ptr + 1; *p != 0 && *p != terminator; p++) { + if (!MAX_255(*p)) { isnumber = FALSE; break; } if ((cd->ctypes[*p] & ctype_digit) == 0) isnumber = FALSE; if ((cd->ctypes[*p] & ctype_word) == 0) break; } @@ -7788,7 +7777,7 @@ because nowadays we limit the maximum value of cd->names_found and cd->name_entry_size. */ size = sizeof(real_pcre) + (length + cd->names_found * cd->name_entry_size) * sizeof(pcre_uchar); -re = (real_pcre *)(pcre_malloc)(size); +re = (real_pcre *)(PUBL(malloc))(size); if (re == NULL) { @@ -7890,7 +7879,7 @@ if (cd->hwm > cd->start_workspace) /* If the workspace had to be expanded, free the new memory. */ if (cd->workspace_size > COMPILE_WORK_SIZE) - (pcre_free)((void *)cd->start_workspace); + (PUBL(free))((void *)cd->start_workspace); /* Give an error if there's back reference to a non-existent capturing subpattern. */ @@ -7944,7 +7933,7 @@ if (cd->check_lookbehind) if (errorcode != 0) { - (pcre_free)(re); + (PUBL(free))(re); PCRE_EARLY_ERROR_RETURN: *erroroffset = (int)(ptr - (const pcre_uchar *)pattern); PCRE_EARLY_ERROR_RETURN2: @@ -8079,7 +8068,7 @@ was compiled can be seen. */ if (code - codestart > length) { - (pcre_free)(re); + (PUBL(free))(re); *errorptr = find_error_text(ERR23); *erroroffset = ptr - (pcre_uchar *)pattern; if (errorcodeptr != NULL) *errorcodeptr = ERR23; diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c index 5df9cce..9bfe614 100644 --- a/pcre_dfa_exec.c +++ b/pcre_dfa_exec.c @@ -2550,7 +2550,7 @@ for (;;) if (code[LINK_SIZE+1] == OP_CALLOUT) { rrc = 0; - if (pcre_callout != NULL) + if (PUBL(callout) != NULL) { pcre_callout_block cb; cb.version = 1; /* Version 1 of the callout block */ @@ -2566,7 +2566,7 @@ for (;;) cb.capture_last = -1; cb.callout_data = md->callout_data; cb.mark = NULL; /* No (*MARK) support */ - if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc; /* Abandon */ + if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc; /* Abandon */ } if (rrc > 0) break; /* Fail this thread */ code += PRIV(OP_lengths)[OP_CALLOUT]; /* Skip callout data */ @@ -2883,7 +2883,7 @@ for (;;) case OP_CALLOUT: rrc = 0; - if (pcre_callout != NULL) + if (PUBL(callout) != NULL) { pcre_callout_block cb; cb.version = 1; /* Version 1 of the callout block */ @@ -2899,7 +2899,7 @@ for (;;) cb.capture_last = -1; cb.callout_data = md->callout_data; cb.mark = NULL; /* No (*MARK) support */ - if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc; /* Abandon */ + if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc; /* Abandon */ } if (rrc == 0) { ADD_ACTIVE(state_offset + PRIV(OP_lengths)[OP_CALLOUT], 0); } diff --git a/pcre_exec.c b/pcre_exec.c index 1d17e86..d699445 100644 --- a/pcre_exec.c +++ b/pcre_exec.c @@ -307,7 +307,7 @@ argument of match(), which never changes. */ #define RMATCH(ra,rb,rc,rd,re,rw)\ {\ - heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\ + heapframe *newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\ if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\ frame->Xwhere = rw; \ newframe->Xeptr = ra;\ @@ -328,7 +328,7 @@ argument of match(), which never changes. */ {\ heapframe *oldframe = frame;\ frame = oldframe->Xprevframe;\ - (pcre_stack_free)(oldframe);\ + (PUBL(stack_free))(oldframe);\ if (frame != NULL)\ {\ rrc = ra;\ @@ -486,7 +486,7 @@ heap storage. Set up the top-level frame here; others are obtained from the heap whenever RMATCH() does a "recursion". See the macro definitions above. */ #ifdef NO_RECURSE -heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe)); +heapframe *frame = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe)); if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY); frame->Xprevframe = NULL; /* Marks the top level */ @@ -1217,7 +1217,7 @@ for (;;) if (ecode[LINK_SIZE+1] == OP_CALLOUT) { - if (pcre_callout != NULL) + if (PUBL(callout) != NULL) { pcre_callout_block cb; cb.version = 2; /* Version 1 of the callout block */ @@ -1233,7 +1233,7 @@ for (;;) cb.capture_last = md->capture_last; cb.callout_data = md->callout_data; cb.mark = md->nomatch_mark; - if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH); + if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH); if (rrc < 0) RRETURN(rrc); } ecode += PRIV(OP_lengths)[OP_CALLOUT]; @@ -1627,7 +1627,7 @@ for (;;) function is able to force a failure. */ case OP_CALLOUT: - if (pcre_callout != NULL) + if (PUBL(callout) != NULL) { pcre_callout_block cb; cb.version = 2; /* Version 1 of the callout block */ @@ -1643,7 +1643,7 @@ for (;;) cb.capture_last = md->capture_last; cb.callout_data = md->callout_data; cb.mark = md->nomatch_mark; - if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH); + if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH); if (rrc < 0) RRETURN(rrc); } ecode += 2 + 2*LINK_SIZE; @@ -1702,7 +1702,7 @@ for (;;) else { new_recursive.offset_save = - (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int)); + (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int)); if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY); } memcpy(new_recursive.offset_save, md->offset_vector, @@ -1726,7 +1726,7 @@ for (;;) { DPRINTF(("Recursion matched\n")); if (new_recursive.offset_save != stacksave) - (pcre_free)(new_recursive.offset_save); + (PUBL(free))(new_recursive.offset_save); /* Set where we got to in the subject, and reset the start in case it was changed by \K. This *is* propagated back out of a recursion, @@ -1744,7 +1744,7 @@ for (;;) { DPRINTF(("Recursion gave error %d\n", rrc)); if (new_recursive.offset_save != stacksave) - (pcre_free)(new_recursive.offset_save); + (PUBL(free))(new_recursive.offset_save); RRETURN(rrc); } @@ -1756,7 +1756,7 @@ for (;;) DPRINTF(("Recursion didn't match\n")); md->recursive = new_recursive.prevrec; if (new_recursive.offset_save != stacksave) - (pcre_free)(new_recursive.offset_save); + (PUBL(free))(new_recursive.offset_save); RRETURN(MATCH_NOMATCH); } @@ -2141,7 +2141,8 @@ for (;;) } else #endif - prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0); + prev_is_word = MAX_255(eptr[-1]) + && ((md->ctypes[eptr[-1]] & ctype_word) != 0); } /* Get status of next character */ @@ -2164,7 +2165,8 @@ for (;;) } else #endif - cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0); + cur_is_word = MAX_255(*eptr) + && ((md->ctypes[*eptr] & ctype_word) != 0); } /* Now see if the situation is what we want */ @@ -4332,8 +4334,9 @@ for (;;) SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0) + if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH); + eptr++; /* No need to skip more bytes - we know it's a 1-byte character */ } break; @@ -4361,8 +4364,9 @@ for (;;) SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0) + if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_space) == 0) RRETURN(MATCH_NOMATCH); + eptr++; /* No need to skip more bytes - we know it's a 1-byte character */ } break; @@ -4390,8 +4394,9 @@ for (;;) SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0) + if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_word) == 0) RRETURN(MATCH_NOMATCH); + eptr++; /* No need to skip more bytes - we know it's a 1-byte character */ } break; @@ -4555,7 +4560,9 @@ for (;;) SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH); + if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) + RRETURN(MATCH_NOMATCH); + eptr++; } break; @@ -4567,7 +4574,9 @@ for (;;) SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH); + if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) + RRETURN(MATCH_NOMATCH); + eptr++; } break; @@ -4579,7 +4588,9 @@ for (;;) SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH); + if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) + RRETURN(MATCH_NOMATCH); + eptr++; } break; @@ -4591,7 +4602,9 @@ for (;;) SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH); + if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) + RRETURN(MATCH_NOMATCH); + eptr++; } break; @@ -4603,8 +4616,9 @@ for (;;) SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - if ((md->ctypes[*eptr++] & ctype_word) != 0) + if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) RRETURN(MATCH_NOMATCH); + eptr++; } break; @@ -4616,8 +4630,9 @@ for (;;) SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - if ((md->ctypes[*eptr++] & ctype_word) == 0) + if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) RRETURN(MATCH_NOMATCH); + eptr++; } break; @@ -4987,7 +5002,7 @@ for (;;) break; case OP_WHITESPACE: - if (c >= 256 || (md->ctypes[c] & ctype_space) == 0) + if (c >= 256 || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH); break; @@ -5098,27 +5113,27 @@ for (;;) break; case OP_NOT_DIGIT: - if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH); + if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH); break; case OP_DIGIT: - if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH); + if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH); break; case OP_NOT_WHITESPACE: - if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH); + if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH); break; case OP_WHITESPACE: - if ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH); + if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH); break; case OP_NOT_WORDCHAR: - if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH); + if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH); break; case OP_WORDCHAR: - if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH); + if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH); break; default: @@ -5764,7 +5779,7 @@ for (;;) SCHECK_PARTIAL(); break; } - if ((md->ctypes[*eptr] & ctype_digit) != 0) break; + if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) break; eptr++; } break; @@ -5777,7 +5792,7 @@ for (;;) SCHECK_PARTIAL(); break; } - if ((md->ctypes[*eptr] & ctype_digit) == 0) break; + if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) break; eptr++; } break; @@ -5790,7 +5805,7 @@ for (;;) SCHECK_PARTIAL(); break; } - if ((md->ctypes[*eptr] & ctype_space) != 0) break; + if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) break; eptr++; } break; @@ -5803,7 +5818,7 @@ for (;;) SCHECK_PARTIAL(); break; } - if ((md->ctypes[*eptr] & ctype_space) == 0) break; + if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) break; eptr++; } break; @@ -5816,7 +5831,7 @@ for (;;) SCHECK_PARTIAL(); break; } - if ((md->ctypes[*eptr] & ctype_word) != 0) break; + if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) break; eptr++; } break; @@ -5829,7 +5844,7 @@ for (;;) SCHECK_PARTIAL(); break; } - if ((md->ctypes[*eptr] & ctype_word) == 0) break; + if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) break; eptr++; } break; @@ -6267,7 +6282,7 @@ arg_offset_max = (2*ocount)/3; if (re->top_backref > 0 && re->top_backref >= ocount/3) { ocount = re->top_backref * 3 + 3; - md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int)); + md->offset_vector = (int *)(PUBL(malloc))(ocount * sizeof(int)); if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY; using_temporary_offsets = TRUE; DPRINTF(("Got memory to hold back references\n")); @@ -6670,7 +6685,7 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT) } if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE; DPRINTF(("Freeing temporary memory\n")); - (pcre_free)(md->offset_vector); + (PUBL(free))(md->offset_vector); } /* Set the return code to the number of captured strings, or 0 if there were @@ -6722,7 +6737,7 @@ attempt has failed at all permitted starting positions. */ if (using_temporary_offsets) { DPRINTF(("Freeing temporary memory\n")); - (pcre_free)(md->offset_vector); + (PUBL(free))(md->offset_vector); } /* For anything other than nomatch or partial match, just return the code. */ @@ -376,7 +376,7 @@ pcre_uchar *p; for (i = 0; i < double_count; i += 2) size += sizeof(pcre_uchar *) + IN_UCHARS(ovector[i+1] - ovector[i] + 1); -stringlist = (pcre_uchar **)(pcre_malloc)(size); +stringlist = (pcre_uchar **)(PUBL(malloc))(size); if (stringlist == NULL) return PCRE_ERROR_NOMEMORY; #ifdef COMPILE_PCRE8 @@ -406,7 +406,8 @@ return 0; *************************************************/ /* This function exists for the benefit of people calling PCRE from non-C -programs that can call its functions, but not free() or (pcre_free)() directly. +programs that can call its functions, but not free() or (PUBL(free))() +directly. Argument: the result of a previous pcre_get_substring_list() Returns: nothing @@ -420,7 +421,7 @@ PCRE_EXP_DEFN void PCRE_CALL_CONVENTION pcre16_free_substring_list(PCRE_SPTR16 *pointer) #endif { -(pcre_free)((void *)pointer); +(PUBL(free))((void *)pointer); } @@ -466,7 +467,7 @@ if (stringnumber < 0 || stringnumber >= stringcount) return PCRE_ERROR_NOSUBSTRING; stringnumber *= 2; yield = ovector[stringnumber+1] - ovector[stringnumber]; -substring = (pcre_uchar *)(pcre_malloc)(IN_UCHARS(yield + 1)); +substring = (pcre_uchar *)(PUBL(malloc))(IN_UCHARS(yield + 1)); if (substring == NULL) return PCRE_ERROR_NOMEMORY; memcpy(substring, subject + ovector[stringnumber], IN_UCHARS(yield)); substring[yield] = 0; @@ -534,7 +535,8 @@ return pcre16_get_substring(subject, ovector, stringcount, n, stringptr); *************************************************/ /* This function exists for the benefit of people calling PCRE from non-C -programs that can call its functions, but not free() or (pcre_free)() directly. +programs that can call its functions, but not free() or (PUBL(free))() +directly. Argument: the result of a previous pcre_get_substring() Returns: nothing @@ -548,7 +550,7 @@ PCRE_EXP_DEFN void PCRE_CALL_CONVENTION pcre16_free_substring(PCRE_SPTR16 pointer) #endif { -(pcre_free)((void *)pointer); +(PUBL(free))((void *)pointer); } /* End of pcre_get.c */ diff --git a/pcre_globals.c b/pcre_globals.c index 4562e0a..01874c1 100644 --- a/pcre_globals.c +++ b/pcre_globals.c @@ -67,18 +67,18 @@ static void LocalPcreFree(void* aPtr) { free(aPtr); } -PCRE_EXP_DATA_DEFN void *(*pcre_malloc)(size_t) = LocalPcreMalloc; -PCRE_EXP_DATA_DEFN void (*pcre_free)(void *) = LocalPcreFree; -PCRE_EXP_DATA_DEFN void *(*pcre_stack_malloc)(size_t) = LocalPcreMalloc; -PCRE_EXP_DATA_DEFN void (*pcre_stack_free)(void *) = LocalPcreFree; -PCRE_EXP_DATA_DEFN int (*pcre_callout)(pcre_callout_block *) = NULL; +PCRE_EXP_DATA_DEFN void *(*PUBL(malloc))(size_t) = LocalPcreMalloc; +PCRE_EXP_DATA_DEFN void (*PUBL(free))(void *) = LocalPcreFree; +PCRE_EXP_DATA_DEFN void *(*PUBL(stack_malloc))(size_t) = LocalPcreMalloc; +PCRE_EXP_DATA_DEFN void (*PUBL(stack_free))(void *) = LocalPcreFree; +PCRE_EXP_DATA_DEFN int (*PUBL(callout))(pcre_callout_block *) = NULL; #elif !defined VPCOMPAT -PCRE_EXP_DATA_DEFN void *(*pcre_malloc)(size_t) = malloc; -PCRE_EXP_DATA_DEFN void (*pcre_free)(void *) = free; -PCRE_EXP_DATA_DEFN void *(*pcre_stack_malloc)(size_t) = malloc; -PCRE_EXP_DATA_DEFN void (*pcre_stack_free)(void *) = free; -PCRE_EXP_DATA_DEFN int (*pcre_callout)(pcre_callout_block *) = NULL; +PCRE_EXP_DATA_DEFN void *(*PUBL(malloc))(size_t) = malloc; +PCRE_EXP_DATA_DEFN void (*PUBL(free))(void *) = free; +PCRE_EXP_DATA_DEFN void *(*PUBL(stack_malloc))(size_t) = malloc; +PCRE_EXP_DATA_DEFN void (*PUBL(stack_free))(void *) = free; +PCRE_EXP_DATA_DEFN int (*PUBL(callout))(pcre_callout_block *) = NULL; #endif /* End of pcre_globals.c */ diff --git a/pcre_internal.h b/pcre_internal.h index da0a826..6453fbd 100644 --- a/pcre_internal.h +++ b/pcre_internal.h @@ -2172,9 +2172,11 @@ total length. */ /* Internal function prefix */ #ifdef COMPILE_PCRE8 +#define PUBL(name) pcre_##name #define PRIV(name) _pcre_##name #else #ifdef COMPILE_PCRE16 +#define PUBL(name) pcre16_##name #define PRIV(name) _pcre16_##name #else #error Unsupported compiling mode diff --git a/pcre_jit_compile.c b/pcre_jit_compile.c index a1b66c6..bcdbd5a 100644 --- a/pcre_jit_compile.c +++ b/pcre_jit_compile.c @@ -52,8 +52,8 @@ POSSIBILITY OF SUCH DAMAGE. we just include it. This way we don't need to touch the build system files. */ -#define SLJIT_MALLOC(size) (pcre_malloc)(size) -#define SLJIT_FREE(ptr) (pcre_free)(ptr) +#define SLJIT_MALLOC(size) (PUBL(malloc))(size) +#define SLJIT_FREE(ptr) (PUBL(free))(ptr) #define SLJIT_CONFIG_AUTO 1 #define SLJIT_CONFIG_STATIC 1 #define SLJIT_VERBOSE 0 diff --git a/pcre_jit_test.c b/pcre_jit_test.c index a1fd47b..8ba9509 100644 --- a/pcre_jit_test.c +++ b/pcre_jit_test.c @@ -633,6 +633,11 @@ static struct regression_test_case regression_test_cases[] = { { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" }, { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" }, { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" }, + { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" }, + { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" }, + { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" }, + { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" }, + { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" }, /* Deep recursion. */ { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " }, @@ -649,6 +654,59 @@ static struct regression_test_case regression_test_cases[] = { { 0, 0, NULL, NULL } }; +static const unsigned char *tables(int release) +{ + /* The purpose of this function to allow valgrind + for reporting invalid reads and writes. */ + static unsigned char *tables_copy; + pcre *regex; + const char *errorptr; + int erroroffset; + const unsigned char *default_tables; +#ifdef SUPPORT_PCRE8 + char null_str[1] = { 0 }; +#else + PCRE_SCHAR16 null_str[1] = { 0 }; +#endif + + if (release) { + if (tables_copy) + free(tables_copy); + tables_copy = NULL; + return NULL; + } + + if (tables_copy) + return tables_copy; + + default_tables = NULL; +#ifdef SUPPORT_PCRE8 + regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL); + if (regex) { + pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables); + pcre_free(regex); + } +#else + regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL); + if (regex) { + pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables); + pcre16_free(regex); + } +#endif + /* Shouldn't ever happen. */ + if (!default_tables) + return NULL; + + /* This value cannot get from pcre_fullinfo. Since this is a test program, + we can live with it at the moment. */ + tables_copy = (unsigned char *)malloc(1088); + if (!tables_copy) + return NULL; + + memcpy(tables_copy, default_tables, 1088); + return tables_copy; +} + static pcre_jit_stack* callback(void *arg) { return (pcre_jit_stack *)arg; @@ -802,7 +860,7 @@ static int regression_tests(void) if (!(current->start_offset & F_NO8)) re8 = pcre_compile(current->pattern, current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags8), - &error, &err_offs, NULL); + &error, &err_offs, tables(0)); extra8 = NULL; if (re8) { @@ -832,7 +890,7 @@ static int regression_tests(void) if (!(current->start_offset & F_NO16)) re16 = pcre16_compile(regtest_buf, current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags16), - &error, &err_offs, NULL); + &error, &err_offs, tables(0)); extra16 = NULL; if (re16) { @@ -840,13 +898,13 @@ static int regression_tests(void) extra16 = pcre16_study(re16, PCRE_STUDY_JIT_COMPILE, &error); if (!extra16) { printf("\n16 bit: Cannot study pattern: %s\n", current->pattern); - pcre_free(re16); + pcre16_free(re16); re16 = NULL; } if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) { printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern); - pcre_free_study(extra16); - pcre_free(re16); + pcre16_free_study(extra16); + pcre16_free(re16); re16 = NULL; } } else if (utf16 && ucp16 && !(current->start_offset & F_NO16)) @@ -1011,7 +1069,7 @@ static int regression_tests(void) #ifdef SUPPORT_PCRE16 if (re16) { pcre16_free_study(extra16); - pcre_free(re16); + pcre16_free(re16); } #endif @@ -1020,6 +1078,7 @@ static int regression_tests(void) fflush(stdout); current++; } + tables(1); if (total == successful) { printf("\nAll JIT regression tests are successfully passed.\n"); diff --git a/pcre_maketables.c b/pcre_maketables.c index 9f8ce31..1ae6408 100644 --- a/pcre_maketables.c +++ b/pcre_maketables.c @@ -59,8 +59,8 @@ compilation of dftables.c, in which case the macro DFTABLES is defined. */ /* This function builds a set of character tables for use by PCRE and returns a pointer to them. They are build using the ctype functions, and consequently their contents will depend upon the current locale setting. When compiled as -part of the library, the store is obtained via pcre_malloc(), but when compiled -inside dftables, use malloc(). +part of the library, the store is obtained via PUBL(malloc)(), but when +compiled inside dftables, use malloc(). Arguments: none Returns: pointer to the contiguous block of data @@ -78,7 +78,7 @@ unsigned char *yield, *p; int i; #ifndef DFTABLES -yield = (unsigned char*)(pcre_malloc)(tables_length); +yield = (unsigned char*)(PUBL(malloc))(tables_length); #else yield = (unsigned char*)malloc(tables_length); #endif diff --git a/pcre_study.c b/pcre_study.c index e04eea7..4914c62 100644 --- a/pcre_study.c +++ b/pcre_study.c @@ -1409,7 +1409,7 @@ if (bits_set || min > 0 #endif ) { - extra = (pcre_extra *)(pcre_malloc) + extra = (pcre_extra *)(PUBL(malloc)) (sizeof(pcre_extra) + sizeof(pcre_study_data)); if (extra == NULL) { @@ -1501,7 +1501,7 @@ if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra->executable_jit != NULL) PRIV(jit_free)(extra->executable_jit); #endif -pcre_free(extra); +PUBL(free)(extra); } /* End of pcre_study.c */ diff --git a/pcreposix.c b/pcreposix.c index 8e82ba6..d931c63 100644 --- a/pcreposix.c +++ b/pcreposix.c @@ -228,7 +228,7 @@ return length + addlength; PCREPOSIX_EXP_DEFN void PCRE_CALL_CONVENTION regfree(regex_t *preg) { -(pcre_free)(preg->re_pcre); +(PUBL(free))(preg->re_pcre); } |