diff options
author | zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2011-12-19 11:04:45 +0000 |
---|---|---|
committer | zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2011-12-19 11:04:45 +0000 |
commit | 1183e193897ab3e03c2cebec978a1a053fcb179d (patch) | |
tree | 7e2f24e08a6c8552114823bd85f33d3c9ae76e58 | |
parent | a8cda0f054f0037a0a961b3e78ce0c5a00ebf63b (diff) | |
download | pcre-1183e193897ab3e03c2cebec978a1a053fcb179d.tar.gz |
fixing existing and adding new byte-order related functions
git-svn-id: svn://vcs.exim.org/pcre/code/branches/pcre16@809 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | CMakeLists.txt | 4 | ||||
-rw-r--r-- | Makefile.am | 11 | ||||
-rw-r--r-- | NON-UNIX-USE | 2 | ||||
-rwxr-xr-x | PrepareRelease | 2 | ||||
-rw-r--r-- | README | 2 | ||||
-rw-r--r-- | libpcre.a.dev | 2 | ||||
-rw-r--r-- | makevp_c.txt | 2 | ||||
-rw-r--r-- | makevp_l.txt | 2 | ||||
-rw-r--r-- | pcre.h.in | 9 | ||||
-rw-r--r-- | pcre16_byte_order.c (renamed from pcre16_try_flipped.c) | 4 | ||||
-rw-r--r-- | pcre16_utf16_utils.c | 22 | ||||
-rw-r--r-- | pcre_byte_order.c | 286 | ||||
-rw-r--r-- | pcre_dfa_exec.c | 15 | ||||
-rw-r--r-- | pcre_exec.c | 16 | ||||
-rw-r--r-- | pcre_fullinfo.c | 14 | ||||
-rw-r--r-- | pcre_info.c | 14 | ||||
-rw-r--r-- | pcre_internal.h | 10 | ||||
-rw-r--r-- | pcre_jit_compile.c | 1 | ||||
-rw-r--r-- | pcre_try_flipped.c | 139 | ||||
-rw-r--r-- | pcretest.c | 66 | ||||
-rw-r--r-- | sljit/sljitExecAllocator.c | 3 |
21 files changed, 409 insertions, 217 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 784657d..956c09d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -333,7 +333,8 @@ ENDIF(PCRE_REBUILD_CHARTABLES) SET(PCRE_HEADERS ${PROJECT_BINARY_DIR}/pcre.h) SET(PCRE_SOURCES - ${PROJECT_BINARY_DIR}/pcre_chartables.c + ${PROJECT_BINARY_DIR}/pcre_byte_order.c + pcre_chartables.c pcre_compile.c pcre_config.c pcre_dfa_exec.c @@ -349,7 +350,6 @@ SET(PCRE_SOURCES pcre_refcount.c pcre_study.c pcre_tables.c - pcre_try_flipped.c pcre_ucd.c pcre_valid_utf8.c pcre_version.c diff --git a/Makefile.am b/Makefile.am index cc7d6e6..100825d 100644 --- a/Makefile.am +++ b/Makefile.am @@ -176,6 +176,7 @@ endif # WITH_REBUILD_CHARTABLES if WITH_PCRE8 lib_LTLIBRARIES += libpcre.la libpcre_la_SOURCES = \ + pcre_byte_order.c \ pcre_compile.c \ pcre_config.c \ pcre_dfa_exec.c \ @@ -193,7 +194,6 @@ libpcre_la_SOURCES = \ pcre_string_utils.c \ pcre_study.c \ pcre_tables.c \ - pcre_try_flipped.c \ pcre_ucd.c \ pcre_valid_utf8.c \ pcre_version.c \ @@ -210,6 +210,7 @@ endif # WITH_PCRE8 if WITH_PCRE16 lib_LTLIBRARIES += libpcre16.la libpcre16_la_SOURCES = \ + pcre16_byte_order.c \ pcre16_chartables.c \ pcre16_compile.c \ pcre16_config.c \ @@ -227,7 +228,6 @@ libpcre16_la_SOURCES = \ pcre16_string_utils.c \ pcre16_study.c \ pcre16_tables.c \ - pcre16_try_flipped.c \ pcre16_ucd.c \ pcre16_utf16_utils.c \ pcre16_valid_utf16.c \ @@ -423,13 +423,12 @@ test: check ; # A PCRE user submitted the following addition, saying that it "will allow # anyone using the 'mingw32' compiler to simply type 'make pcre.dll' and get a # nice DLL for Windows use". (It is used by the pcre.dll target.) -DLL_OBJS= pcre_compile.o pcre_config.o \ +DLL_OBJS= pcre_byte_order.o pcre_compile.o pcre_config.o \ pcre_dfa_exec.o pcre_exec.o pcre_fullinfo.o pcre_get.o \ pcre_globals.o pcre_info.o pcre_jit_compile.o pcre_maketables.o \ pcre_newline.o pcre_ord2utf8.o pcre_refcount.o \ - pcre_study.o pcre_tables.o pcre_try_flipped.o \ - pcre_ucd.o pcre_valid_utf8.o pcre_version.o \ - pcre_chartables.o \ + pcre_study.o pcre_tables.o pcre_ucd.o \ + pcre_valid_utf8.o pcre_version.o pcre_chartables.o \ pcre_xclass.o # A PCRE user submitted the following addition, saying that it "will allow diff --git a/NON-UNIX-USE b/NON-UNIX-USE index 69443e7..a2657e6 100644 --- a/NON-UNIX-USE +++ b/NON-UNIX-USE @@ -97,6 +97,7 @@ hand": option if you have set up config.h with your configuration, or else use other -D settings to change the configuration as required. + pcre_byte_order.c pcre_chartables.c pcre_compile.c pcre_config.c @@ -112,7 +113,6 @@ hand": pcre_refcount.c pcre_study.c pcre_tables.c - pcre_try_flipped.c pcre_ucd.c pcre_valid_utf8.c pcre_version.c diff --git a/PrepareRelease b/PrepareRelease index 7123be6..643318e 100755 --- a/PrepareRelease +++ b/PrepareRelease @@ -194,6 +194,7 @@ files="\ pcreposix.h \ pcre.h.in \ pcre_internal.h + pcre_byte_order.c \ pcre_compile.c \ pcre_config.c \ pcre_dfa_exec.c \ @@ -210,7 +211,6 @@ files="\ pcre_refcount.c \ pcre_study.c \ pcre_tables.c \ - pcre_try_flipped.c \ pcre_ucp_searchfuncs.c \ pcre_valid_utf8.c \ pcre_version.c \ @@ -713,6 +713,7 @@ The distribution should contain the following files: specified, by copying to pcre_chartables.c pcreposix.c ) + pcre_byte_order.c ) pcre_compile.c ) pcre_config.c ) pcre_dfa_exec.c ) @@ -728,7 +729,6 @@ The distribution should contain the following files: pcre_refcount.c ) pcre_study.c ) pcre_tables.c ) - pcre_try_flipped.c ) pcre_ucd.c ) pcre_valid_utf8.c ) pcre_version.c ) diff --git a/libpcre.a.dev b/libpcre.a.dev index eafe9ac..33a8f0c 100644 --- a/libpcre.a.dev +++ b/libpcre.a.dev @@ -190,7 +190,7 @@ OverrideBuildCmd=0 BuildCmd= [Unit17] -FileName=pcre_try_flipped.c +FileName=pcre_byte_order.c CompileCpp=0 Folder=libpcre.a Compile=1 diff --git a/makevp_c.txt b/makevp_c.txt index 931b8ab..a7cf8a0 100644 --- a/makevp_c.txt +++ b/makevp_c.txt @@ -1,3 +1,4 @@ +pcre_byte_order.c
pcre_chartables.c
pcre_compile.c
pcre_config.c
@@ -13,7 +14,6 @@ pcre_ord2utf8.c pcre_refcount.c
pcre_study.c
pcre_tables.c
-pcre_try_flipped.c
pcre_ucd.c
pcre_valid_utf8.c
pcre_version.c
diff --git a/makevp_l.txt b/makevp_l.txt index 6de1cb4..5d3c70c 100644 --- a/makevp_l.txt +++ b/makevp_l.txt @@ -1,3 +1,4 @@ ++pcre_byte_order.obj &
+pcre_chartables.obj &
+pcre_compile.obj &
+pcre_config.obj &
@@ -13,7 +14,6 @@ +pcre_refcount.obj &
+pcre_study.obj &
+pcre_tables.obj &
-+pcre_try_flipped.obj &
+pcre_ucd.obj &
+pcre_valid_utf8.obj &
+pcre_version.obj &
@@ -175,6 +175,7 @@ compiling). */ #define PCRE_ERROR_RECURSELOOP (-26) #define PCRE_ERROR_JIT_STACKLIMIT (-27) #define PCRE_ERROR_BADMODE (-28) +#define PCRE_ERROR_BADENDIANNESS (-29) /* Specific error codes for UTF-8 validity checks */ @@ -432,9 +433,13 @@ PCRE_EXP_DECL void pcre16_free_study(pcre_extra *); PCRE_EXP_DECL const char *pcre_version(void); PCRE_EXP_DECL const char *pcre16_version(void); -/* Utility functions. */ +/* Utility functions for byte order swaps. */ +PCRE_EXP_DECL int pcre_pattern_to_host_byte_order(pcre *, pcre_extra *, + const unsigned char *); +PCRE_EXP_DECL int pcre16_pattern_to_host_byte_order(pcre *, pcre_extra *, + const unsigned char *); PCRE_EXP_DECL int pcre16_utf16_to_host_byte_order(PCRE_SCHAR16 *, - PCRE_SPTR16, int, int); + PCRE_SPTR16, int, int *, int); /* JIT compiler related functions. */ diff --git a/pcre16_try_flipped.c b/pcre16_byte_order.c index b646f28..622c0ab 100644 --- a/pcre16_try_flipped.c +++ b/pcre16_byte_order.c @@ -40,6 +40,6 @@ POSSIBILITY OF SUCH DAMAGE. /* Generate code with 16 bit character support. */ #define COMPILE_PCRE16 -#include "pcre_try_flipped.c" +#include "pcre_byte_order.c" -/* End of pcre16_try_flipped.c */ +/* End of pcre16_byte_order.c */ diff --git a/pcre16_utf16_utils.c b/pcre16_utf16_utils.c index b927458..358376f 100644 --- a/pcre16_utf16_utils.c +++ b/pcre16_utf16_utils.c @@ -67,6 +67,13 @@ Arguments: input any UTF-16 string length the number of characters in the input string can be less than zero for zero terminated strings + host_byte_order + A non-zero value means the input is in host byte + order, which can be dynamically changed by BOMs later. + Initially it contains the starting byte order and returns + with the last byte order so it can be used for stream + processing. It can be NULL, which set the host byte + order mode by default. keep_boms for a non-zero value, the BOM (0xfeff) characters are copied as well @@ -75,12 +82,13 @@ Returns: the number of characters placed into the output buffer, */ int -pcre16_utf16_to_host_byte_order(PCRE_SCHAR16 *output, PCRE_SPTR16 input, int length, int keep_boms) +pcre16_utf16_to_host_byte_order(PCRE_SCHAR16 *output, PCRE_SPTR16 input, + int length, int *host_byte_order, int keep_boms) { #ifdef SUPPORT_UTF -/* This function converts any UTF-16 string to host byte order and optionally removes -any Byte Order Marks (BOMS). Returns with the remainig length. */ -BOOL same_bo = TRUE; +/* This function converts any UTF-16 string to host byte order and optionally +removes any Byte Order Marks (BOMS). Returns with the remainig length. */ +int host_bo = host_byte_order != NULL ? *host_byte_order : 1; pcre_uchar *optr = (pcre_uchar *)output; const pcre_uchar *iptr = (const pcre_uchar *)input; const pcre_uchar *end; @@ -98,15 +106,17 @@ while (iptr < end) { /* Detecting the byte order of the machine is unnecessary, it is enough to know that the UTF-16 string has the same byte order or not. */ - same_bo = c == 0xfeff; + host_bo = c == 0xfeff; if (keep_boms != 0) *optr++ = 0xfeff; else length--; } else - *optr++ = same_bo ? c : ((c >> 8) | (c << 8)); /* Flip bytes if needed. */ + *optr++ = host_bo ? c : ((c >> 8) | (c << 8)); /* Flip bytes if needed. */ } +if (host_byte_order != NULL) + *host_byte_order = host_bo; #else /* SUPPORT_UTF */ (void)(output); /* Keep picky compilers happy */ diff --git a/pcre_byte_order.c b/pcre_byte_order.c new file mode 100644 index 0000000..286dd95 --- /dev/null +++ b/pcre_byte_order.c @@ -0,0 +1,286 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Copyright (c) 1997-2009 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module contains an internal function that tests a compiled pattern to +see if it was compiled with the opposite endianness. If so, it uses an +auxiliary local function to flip the appropriate bytes. */ + + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "pcre_internal.h" + + +/************************************************* +* Swap byte functions * +*************************************************/ + +/* The following functions swap the bytes of a pcre_uint16 +and pcre_uint32 value. + +Arguments: + value any number + +Returns: the byte swapped value +*/ + +static pcre_uint32 +swap_uint32(pcre_uint32 value) +{ +return ((value & 0x000000ff) << 24) | + ((value & 0x0000ff00) << 8) | + ((value & 0x00ff0000) >> 8) | + (value >> 24); +} + +static pcre_uint16 +swap_uint16(pcre_uint16 value) +{ +return (value >> 8) | (value << 8); +} + + +/************************************************* +* Test for a byte-flipped compiled regex * +*************************************************/ + +/* This function swaps the bytes of a compiled pattern usually +loaeded form the disk. It also sets the tables pointer, which +is likely an invalid pointer after reload. + +Arguments: + argument_re points to the compiled expression + extra_data points to extra data or is NULL + tables points to the character tables or NULL + +Returns: 0 if the swap is successful, negative on error +*/ + +#ifdef COMPILE_PCRE8 +PCRE_EXP_DECL int pcre_pattern_to_host_byte_order(pcre *argument_re, + pcre_extra *extra_data, const unsigned char *tables) +#else +PCRE_EXP_DECL int pcre16_pattern_to_host_byte_order(pcre *argument_re, + pcre_extra *extra_data, const unsigned char *tables) +#endif +{ +real_pcre *re = (real_pcre *)argument_re; +pcre_study_data *study; +#ifndef COMPILE_PCRE8 +pcre_uchar *ptr; +int length; +#ifdef SUPPORT_UTF +BOOL utf; +BOOL utf16_char; +#endif /* SUPPORT_UTF */ +#endif /* !COMPILE_PCRE8 */ + +if (re == NULL) return PCRE_ERROR_NULL; +if (re->magic_number == MAGIC_NUMBER) + { + if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE; + re->tables = tables; + return 0; + } + +if (re->magic_number != REVERSED_MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; +if ((swap_uint16(re->flags) & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE; + +re->magic_number = MAGIC_NUMBER; +re->size = swap_uint32(re->size); +re->options = swap_uint32(re->options); +re->flags = swap_uint16(re->flags); +re->top_bracket = swap_uint16(re->top_bracket); +re->top_backref = swap_uint16(re->top_backref); +re->first_char = swap_uint16(re->first_char); +re->req_char = swap_uint16(re->req_char); +re->name_table_offset = swap_uint16(re->name_table_offset); +re->name_entry_size = swap_uint16(re->name_entry_size); +re->name_count = swap_uint16(re->name_count); +re->ref_count = swap_uint16(re->ref_count); + +if (extra_data != NULL && (re->flags & PCRE_EXTRA_STUDY_DATA) != 0) + { + study = (pcre_study_data *)extra_data->study_data; + study->size = swap_uint32(study->size); + study->flags = swap_uint32(study->flags); + study->minlength = swap_uint32(study->minlength); + } + +#ifndef COMPILE_PCRE8 +ptr = (pcre_uchar *)re + re->name_table_offset; +length = re->name_count * re->name_entry_size; +#ifdef SUPPORT_UTF +utf = (re->options & PCRE_UTF16) != 0; +utf16_char = FALSE; +#endif + +while(TRUE) + { + /* Swap previous characters. */ + while (length-- > 0) + { + *ptr = swap_uint16(*ptr); + ptr++; + } +#ifdef SUPPORT_UTF + if (utf16_char) + { + if (HAS_EXTRALEN(ptr[-1])) + { + /* We know that there is only one extra character in UTF-16. */ + *ptr = swap_uint16(*ptr); + ptr++; + } + } + utf16_char = FALSE; +#endif /* SUPPORT_UTF */ + + /* Get next opcode. */ + length = 0; + *ptr = swap_uint16(*ptr); + switch (*ptr) + { + case OP_END: + return 0; + + case OP_CHAR: + case OP_CHARI: + case OP_NOT: + case OP_NOTI: + case OP_STAR: + case OP_MINSTAR: + case OP_PLUS: + case OP_MINPLUS: + case OP_QUERY: + case OP_MINQUERY: + case OP_UPTO: + case OP_MINUPTO: + case OP_EXACT: + case OP_POSSTAR: + case OP_POSPLUS: + case OP_POSQUERY: + case OP_POSUPTO: + case OP_STARI: + case OP_MINSTARI: + case OP_PLUSI: + case OP_MINPLUSI: + case OP_QUERYI: + case OP_MINQUERYI: + case OP_UPTOI: + case OP_MINUPTOI: + case OP_EXACTI: + case OP_POSSTARI: + case OP_POSPLUSI: + case OP_POSQUERYI: + case OP_POSUPTOI: + case OP_NOTSTAR: + case OP_NOTMINSTAR: + case OP_NOTPLUS: + case OP_NOTMINPLUS: + case OP_NOTQUERY: + case OP_NOTMINQUERY: + case OP_NOTUPTO: + case OP_NOTMINUPTO: + case OP_NOTEXACT: + case OP_NOTPOSSTAR: + case OP_NOTPOSPLUS: + case OP_NOTPOSQUERY: + case OP_NOTPOSUPTO: + case OP_NOTSTARI: + case OP_NOTMINSTARI: + case OP_NOTPLUSI: + case OP_NOTMINPLUSI: + case OP_NOTQUERYI: + case OP_NOTMINQUERYI: + case OP_NOTUPTOI: + case OP_NOTMINUPTOI: + case OP_NOTEXACTI: + case OP_NOTPOSSTARI: + case OP_NOTPOSPLUSI: + case OP_NOTPOSQUERYI: + case OP_NOTPOSUPTOI: + utf16_char = TRUE; + length = PRIV(OP_lengths)[*ptr] - 1; + break; + + case OP_CLASS: + case OP_NCLASS: + /* Skip the character bit map. */ + ptr += 32/sizeof(pcre_uchar); + length = 0; + break; + + case OP_XCLASS: + /* Reverse the size of the XCLASS instance. */ + ptr++; + *ptr = swap_uint16(*ptr); + if (LINK_SIZE > 1) + { + /* LINK_SIZE can be 1 or 2 in 16 bit mode. */ + ptr++; + *ptr = swap_uint16(*ptr); + } + ptr++; + length = (GET(ptr, -LINK_SIZE)) - (1 + LINK_SIZE + 1); + *ptr = swap_uint16(*ptr); + if ((*ptr & XCL_MAP) != 0) + { + /* Skip the character bit map. */ + ptr += 32/sizeof(pcre_uchar); + length -= 32/sizeof(pcre_uchar); + } + break; + + default: + length = PRIV(OP_lengths)[*ptr] - 1; + break; + } + ptr++; + } +/* Control should never reach here in 16 bit mode. */ +#endif /* !COMPILE_PCRE8 */ + +return 0; +} + +/* End of pcre_byte_order.c */ diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c index a7d2037..6356e46 100644 --- a/pcre_dfa_exec.c +++ b/pcre_dfa_exec.c @@ -3019,9 +3019,7 @@ BOOL utf, anchored, startline, firstline; const pcre_uchar *current_subject, *end_subject; const pcre_uint8 *lcc; -pcre_study_data internal_study; const pcre_study_data *study = NULL; -real_pcre internal_re; const pcre_uchar *req_char_ptr; const pcre_uint8 *start_bits = NULL; @@ -3065,16 +3063,13 @@ if (extra_data != NULL) } /* Check that the first field in the block is the magic number. If it is not, -test for a regex that was compiled on a host of opposite endianness. If this is -the case, flipped values are put in internal_re and internal_study if there was -study data too. */ +return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to +REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which +means that the pattern is likely compiled with different endianness. */ if (re->magic_number != MAGIC_NUMBER) - { - re = PRIV(try_flipped)(re, &internal_re, study, &internal_study); - if (re == NULL) return PCRE_ERROR_BADMAGIC; - if (study != NULL) study = &internal_study; - } + return re->magic_number == REVERSED_MAGIC_NUMBER? + PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC; if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE; /* Set some local values */ diff --git a/pcre_exec.c b/pcre_exec.c index 3631845..d0984e6 100644 --- a/pcre_exec.c +++ b/pcre_exec.c @@ -6041,10 +6041,7 @@ PCRE_PUCHAR end_subject; PCRE_PUCHAR start_partial = NULL; PCRE_PUCHAR req_char_ptr = start_match - 1; -pcre_study_data internal_study; const pcre_study_data *study; - -real_pcre internal_re; const real_pcre *external_re = (const real_pcre *)argument_re; const real_pcre *re = external_re; @@ -6151,16 +6148,13 @@ in other programs later. */ if (tables == NULL) tables = PRIV(default_tables); /* Check that the first field in the block is the magic number. If it is not, -test for a regex that was compiled on a host of opposite endianness. If this is -the case, flipped values are put in internal_re and internal_study if there was -study data too. */ +return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to +REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which +means that the pattern is likely compiled with different endianness. */ if (re->magic_number != MAGIC_NUMBER) - { - re = PRIV(try_flipped)(re, &internal_re, study, &internal_study); - if (re == NULL) return PCRE_ERROR_BADMAGIC; - if (study != NULL) study = &internal_study; - } + return re->magic_number == REVERSED_MAGIC_NUMBER? + PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC; if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE; /* Set up other data */ diff --git a/pcre_fullinfo.c b/pcre_fullinfo.c index 39ee028..f0f6b21 100644 --- a/pcre_fullinfo.c +++ b/pcre_fullinfo.c @@ -75,8 +75,6 @@ pcre16_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what, void *where) #endif { -real_pcre internal_re; -pcre_study_data internal_study; const real_pcre *re = (const real_pcre *)argument_re; const pcre_study_data *study = NULL; @@ -85,12 +83,14 @@ if (re == NULL || where == NULL) return PCRE_ERROR_NULL; if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0) study = (const pcre_study_data *)extra_data->study_data; +/* Check that the first field in the block is the magic number. If it is not, +return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to +REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which +means that the pattern is likely compiled with different endianness. */ + if (re->magic_number != MAGIC_NUMBER) - { - re = PRIV(try_flipped)(re, &internal_re, study, &internal_study); - if (re == NULL) return PCRE_ERROR_BADMAGIC; - if (study != NULL) study = &internal_study; - } + return re->magic_number == REVERSED_MAGIC_NUMBER? + PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC; if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE; switch (what) diff --git a/pcre_info.c b/pcre_info.c index 52d593a..498c442 100644 --- a/pcre_info.c +++ b/pcre_info.c @@ -80,15 +80,19 @@ PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre16_info(const pcre *argument_re, int *optptr, int *first_char) #endif { -real_pcre internal_re; const real_pcre *re = (const real_pcre *)argument_re; if (re == NULL) return PCRE_ERROR_NULL; + +/* Check that the first field in the block is the magic number. If it is not, +return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to +REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which +means that the pattern is likely compiled with different endianness. */ + if (re->magic_number != MAGIC_NUMBER) - { - re = PRIV(try_flipped)(re, &internal_re, NULL, NULL); - if (re == NULL) return PCRE_ERROR_BADMAGIC; - } + return re->magic_number == REVERSED_MAGIC_NUMBER? + PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC; if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE; + if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_COMPILE_OPTIONS); if (first_char != NULL) *first_char = ((re->flags & PCRE_FIRSTSET) != 0)? re->first_char : diff --git a/pcre_internal.h b/pcre_internal.h index 6453fbd..b437df5 100644 --- a/pcre_internal.h +++ b/pcre_internal.h @@ -889,11 +889,15 @@ time, run time, or study time, respectively. */ #define PUBLIC_STUDY_OPTIONS \ PCRE_STUDY_JIT_COMPILE -/* Magic number to provide a small check against being handed junk. Also used -to detect whether a pattern was compiled on a host of different endianness. */ +/* Magic number to provide a small check against being handed junk. */ #define MAGIC_NUMBER 0x50435245UL /* 'PCRE' */ +/* This variable is used to detect a loaded regular expression +in different endianness. */ + +#define REVERSED_MAGIC_NUMBER 0x45524350UL /* 'ERCP' */ + /* Negative values for the firstchar and reqchar variables */ #define REQ_UNSET (-2) @@ -2264,8 +2268,6 @@ extern const pcre_uchar *PRIV(find_bracket)(const pcre_uchar *, BOOL, int); extern BOOL PRIV(is_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR, int *, BOOL); extern int PRIV(ord2utf)(pcre_uint32, pcre_uchar *); -extern real_pcre *PRIV(try_flipped)(const real_pcre *, real_pcre *, - const pcre_study_data *, pcre_study_data *); extern int PRIV(valid_utf)(PCRE_PUCHAR, int, int *); extern BOOL PRIV(was_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR, int *, BOOL); diff --git a/pcre_jit_compile.c b/pcre_jit_compile.c index bcdbd5a..2543fef 100644 --- a/pcre_jit_compile.c +++ b/pcre_jit_compile.c @@ -510,7 +510,6 @@ switch(*cc) case OP_CHARI: case OP_NOT: case OP_NOTI: - case OP_STAR: case OP_MINSTAR: case OP_PLUS: diff --git a/pcre_try_flipped.c b/pcre_try_flipped.c deleted file mode 100644 index d09a10f..0000000 --- a/pcre_try_flipped.c +++ /dev/null @@ -1,139 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel - Copyright (c) 1997-2009 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - - -/* This module contains an internal function that tests a compiled pattern to -see if it was compiled with the opposite endianness. If so, it uses an -auxiliary local function to flip the appropriate bytes. */ - - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "pcre_internal.h" - - -/************************************************* -* Flip bytes in an integer * -*************************************************/ - -/* This function is called when the magic number in a regex doesn't match, in -order to flip its bytes to see if we are dealing with a pattern that was -compiled on a host of different endianness. If so, this function is used to -flip other byte values. - -Arguments: - value the number to flip - n the number of bytes to flip (assumed to be 2 or 4) - -Returns: the flipped value -*/ - -static unsigned long int -byteflip(unsigned long int value, int n) -{ -if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8); -return ((value & 0x000000ff) << 24) | - ((value & 0x0000ff00) << 8) | - ((value & 0x00ff0000) >> 8) | - ((value & 0xff000000) >> 24); -} - - - -/************************************************* -* Test for a byte-flipped compiled regex * -*************************************************/ - -/* This function is called from pcre_exec(), pcre_dfa_exec(), and also from -pcre_fullinfo(). Its job is to test whether the regex is byte-flipped - that -is, it was compiled on a system of opposite endianness. The function is called -only when the native MAGIC_NUMBER test fails. If the regex is indeed flipped, -we flip all the relevant values into a different data block, and return it. - -Arguments: - re points to the regex - study points to study data, or NULL - internal_re points to a new regex block - internal_study points to a new study block - -Returns: the new block if is is indeed a byte-flipped regex - NULL if it is not -*/ - -real_pcre * -PRIV(try_flipped)(const real_pcre *re, real_pcre *internal_re, - const pcre_study_data *study, pcre_study_data *internal_study) -{ -if (byteflip(re->magic_number, sizeof(re->magic_number)) != MAGIC_NUMBER) - return NULL; - -*internal_re = *re; /* To copy other fields */ -internal_re->size = byteflip(re->size, sizeof(re->size)); -internal_re->options = byteflip(re->options, sizeof(re->options)); -internal_re->flags = (pcre_uint16)byteflip(re->flags, sizeof(re->flags)); -internal_re->top_bracket = - (pcre_uint16)byteflip(re->top_bracket, sizeof(re->top_bracket)); -internal_re->top_backref = - (pcre_uint16)byteflip(re->top_backref, sizeof(re->top_backref)); -internal_re->first_char = - (pcre_uint16)byteflip(re->first_char, sizeof(re->first_char)); -internal_re->req_char = - (pcre_uint16)byteflip(re->req_char, sizeof(re->req_char)); -internal_re->name_table_offset = - (pcre_uint16)byteflip(re->name_table_offset, sizeof(re->name_table_offset)); -internal_re->name_entry_size = - (pcre_uint16)byteflip(re->name_entry_size, sizeof(re->name_entry_size)); -internal_re->name_count = - (pcre_uint16)byteflip(re->name_count, sizeof(re->name_count)); - -if (study != NULL) - { - *internal_study = *study; /* To copy other fields */ - internal_study->size = byteflip(study->size, sizeof(study->size)); - internal_study->flags = byteflip(study->flags, sizeof(study->flags)); - internal_study->minlength = byteflip(study->minlength, - sizeof(study->minlength)); - } - -return internal_re; -} - -/* End of pcre_tryflipped.c */ @@ -197,7 +197,11 @@ use these in the definitions of generic macros. */ #define PCRE_STUDY8(extra, re, options, error) \ extra = pcre_study(re, options, error) -#endif + +#define PCRE_FREE_STUDY8(extra) \ + pcre_free_study(extra) + +#endif /* SUPPORT_PCRE8 */ #ifdef SUPPORT_PCRE16 @@ -217,7 +221,11 @@ use these in the definitions of generic macros. */ #define PCRE_STUDY16(extra, re, options, error) \ extra = pcre16_study(re, options, error) -#endif + +#define PCRE_FREE_STUDY16(extra) \ + pcre16_free_study(extra) + +#endif /* SUPPORT_PCRE16 */ /* ----- Both modes are supported; a runtime test is needed ----- */ @@ -257,23 +265,31 @@ use these in the definitions of generic macros. */ else \ PCRE_STUDY8(extra, re, options, error) +#define PCRE_FREE_STUDY(extra) \ + if (use_pcre16) \ + PCRE_FREE_STUDY16(extra); \ + else \ + PCRE_FREE_STUDY8(extra) + /* ----- Only 8-bit mode is supported ----- */ #elif defined SUPPORT_PCRE8 -#define PCHARS PCHARS8 -#define PCHARSV PCHARSV8 -#define PCRE_COMPILE PCRE_COMPILE8 -#define PCRE_EXEC PCRE_EXEC8 -#define PCRE_STUDY PCRE_STUDY8 +#define PCHARS PCHARS8 +#define PCHARSV PCHARSV8 +#define PCRE_COMPILE PCRE_COMPILE8 +#define PCRE_EXEC PCRE_EXEC8 +#define PCRE_STUDY PCRE_STUDY8 +#define PCRE_FREE_STUDY PCRE_FREE_STUDY8 /* ----- Only 16-bit mode is supported ----- */ #else -#define PCHARS PCHARS16 -#define PCHARSV PCHARSV16 -#define PCRE_COMPILE PCRE_COMPILE16 -#define PCRE_EXEC PCRE_EXEC16 -#define PCRE_STUDY PCRE_STUDY16 +#define PCHARS PCHARS16 +#define PCHARSV PCHARSV16 +#define PCRE_COMPILE PCRE_COMPILE16 +#define PCRE_EXEC PCRE_EXEC16 +#define PCRE_STUDY PCRE_STUDY16 +#define PCRE_FREE_STUDY PCRE_FREE_STUDY16 #endif /* ----- End of mode-specific function call macros ----- */ @@ -1861,7 +1877,10 @@ while (!done) { FAIL_READ: fprintf(outfile, "Failed to read data from %s\n", p); - if (extra != NULL) pcre_free_study(extra); + if (extra != NULL) + { + PCRE_FREE_STUDY(extra); + } if (re != NULL) new_free(re); fclose(f); continue; @@ -2192,7 +2211,10 @@ while (!done) PCRE_STUDY(extra, re, study_options | force_study_options, &error); } time_taken = clock() - start_time; - if (extra != NULL) pcre_free_study(extra); + if (extra != NULL) + { + PCRE_FREE_STUDY(extra); + } fprintf(outfile, " Study time %.4f milliseconds\n", (((double)time_taken * 1000.0) / (double)timeit) / (double)CLOCKS_PER_SEC); @@ -2270,10 +2292,16 @@ while (!done) if (do_debug) { fprintf(outfile, "------------------------------------------------------------------\n"); +#if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16 if (use_pcre16) pcre16_printint(re, outfile, debug_lengths); else pcre_printint(re, outfile, debug_lengths); +#elif defined SUPPORT_PCRE8 + pcre_printint(re, outfile, debug_lengths); +#else + pcre16_printint(re, outfile, debug_lengths); +#endif } /* We already have the options in get_options (see above) */ @@ -2558,7 +2586,10 @@ while (!done) } new_free(re); - if (extra != NULL) pcre_free_study(extra); + if (extra != NULL) + { + PCRE_FREE_STUDY(extra); + } if (locale_set) { new_free((void *)tables); @@ -3424,7 +3455,10 @@ while (!done) #endif if (re != NULL) new_free(re); - if (extra != NULL) pcre_free_study(extra); + if (extra != NULL) + { + PCRE_FREE_STUDY(extra); + } if (locale_set) { new_free((void *)tables); diff --git a/sljit/sljitExecAllocator.c b/sljit/sljitExecAllocator.c index cdea346..f613d02 100644 --- a/sljit/sljitExecAllocator.c +++ b/sljit/sljitExecAllocator.c @@ -263,8 +263,11 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr) header->prev_size = free_block->size; } + /* The whole chunk is free. */ if (SLJIT_UNLIKELY(!free_block->header.prev_size && header->size == 1)) { + /* If this block is freed, we still have (allocated_size / 2) free space. */ if (total_size - free_block->size > (allocated_size * 3 / 2)) { + total_size -= free_block->size; sljit_remove_free_block(free_block); free_chunk(free_block, free_block->size + sizeof(struct block_header)); } |