diff options
author | zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2011-11-26 12:48:56 +0000 |
---|---|---|
committer | zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2011-11-26 12:48:56 +0000 |
commit | af0099785014bcb1968b7665a8a6f85cd299bb8f (patch) | |
tree | e46ed87c8614436a91016636e6ce2fe47a294a2a | |
parent | 950a27a0fd444ac862e956d0438d2d196519bd1a (diff) | |
download | pcre-af0099785014bcb1968b7665a8a6f85cd299bb8f.tar.gz |
Make simple patterns work in PCRE16
git-svn-id: svn://vcs.exim.org/pcre/code/branches/pcre16@767 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | pcre.h.in | 11 | ||||
-rw-r--r-- | pcre_compile.c | 45 | ||||
-rw-r--r-- | pcre_exec.c | 16 | ||||
-rw-r--r-- | pcre_internal.h | 27 | ||||
-rw-r--r-- | pcre_jit_compile.c | 193 | ||||
-rw-r--r-- | pcre_printint.src | 23 | ||||
-rw-r--r-- | pcre_study.c | 12 | ||||
-rw-r--r-- | sljit/sljitConfigInternal.h | 4 | ||||
-rw-r--r-- | sljit/sljitExecAllocator.c | 4 |
9 files changed, 240 insertions, 95 deletions
@@ -361,6 +361,8 @@ PCRE_EXP_DECL int pcre_dfa_exec(const pcre *, const pcre_extra *, const char *, int, int, int, int *, int , int *, int); PCRE_EXP_DECL int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR, int, int, int, int *, int); +PCRE_EXP_DECL int pcre16_exec(const pcre *, const pcre_extra *, PCRE_SPTR16, + int, int, int, int *, int); PCRE_EXP_DECL void pcre_free_substring(const char *); PCRE_EXP_DECL void pcre_free_substring_list(const char **); PCRE_EXP_DECL int pcre_fullinfo(const pcre *, const pcre_extra *, int, @@ -380,14 +382,21 @@ PCRE_EXP_DECL int pcre_refcount(pcre *, int); PCRE_EXP_DECL int pcre16_utf16_to_host_byte_order(PCRE_SCHAR16 *, PCRE_SPTR16, int, int); PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **); +PCRE_EXP_DECL pcre_extra *pcre16_study(const pcre *, int, const char **); PCRE_EXP_DECL void pcre_free_study(pcre_extra *); +PCRE_EXP_DECL void pcre16_free_study(pcre_extra *); PCRE_EXP_DECL const char *pcre_version(void); /* JIT compiler related functions. */ PCRE_EXP_DECL pcre_jit_stack *pcre_jit_stack_alloc(int, int); +PCRE_EXP_DECL pcre_jit_stack *pcre16_jit_stack_alloc(int, int); PCRE_EXP_DECL void pcre_jit_stack_free(pcre_jit_stack *); -PCRE_EXP_DECL void pcre_assign_jit_stack(pcre_extra *, pcre_jit_callback, void *); +PCRE_EXP_DECL void pcre16_jit_stack_free(pcre_jit_stack *); +PCRE_EXP_DECL void pcre_assign_jit_stack(pcre_extra *, + pcre_jit_callback, void *); +PCRE_EXP_DECL void pcre16_assign_jit_stack(pcre_extra *, + pcre_jit_callback, void *); #ifdef __cplusplus } /* extern "C" */ diff --git a/pcre_compile.c b/pcre_compile.c index 38cd815..b0e6367 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -3419,7 +3419,7 @@ for (;; ptr++) { if (previous > orig_code) { - memmove(orig_code, previous, code - previous); + memmove(orig_code, previous, IN_UCHARS(code - previous)); code -= previous - orig_code; previous = orig_code; } @@ -4484,7 +4484,7 @@ for (;; ptr++) if (*previous == OP_RECURSE) { - memmove(previous + 1 + LINK_SIZE, previous, 1 + LINK_SIZE); + memmove(previous + 1 + LINK_SIZE, previous, IN_UCHARS(1 + LINK_SIZE)); *previous = OP_ONCE; PUT(previous, 1, 2 + 2*LINK_SIZE); previous[2 + 2*LINK_SIZE] = OP_KET; @@ -4862,7 +4862,7 @@ for (;; ptr++) { *code = OP_END; adjust_recurse(previous, 1, utf8, cd, save_hwm); - memmove(previous+1, previous, len); + memmove(previous + 1, previous, IN_UCHARS(len)); code++; if (repeat_max == 0) { @@ -4886,7 +4886,7 @@ for (;; ptr++) int offset; *code = OP_END; adjust_recurse(previous, 2 + LINK_SIZE, utf8, cd, save_hwm); - memmove(previous + 2 + LINK_SIZE, previous, len); + memmove(previous + 2 + LINK_SIZE, previous, IN_UCHARS(len)); code += 2 + LINK_SIZE; *previous++ = OP_BRAZERO + repeat_type; *previous++ = OP_BRA; @@ -4941,7 +4941,7 @@ for (;; ptr++) { pcre_uchar *hc; pcre_uchar *this_hwm = cd->hwm; - memcpy(code, previous, len); + memcpy(code, previous, IN_UCHARS(len)); for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE) { PUT(cd->hwm, 0, GET(hc, 0) + len); @@ -5008,7 +5008,7 @@ for (;; ptr++) PUTINC(code, 0, offset); } - memcpy(code, previous, len); + memcpy(code, previous, IN_UCHARS(len)); for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE) { PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1)); @@ -5111,7 +5111,7 @@ for (;; ptr++) int nlen = (int)(code - bracode); *code = OP_END; adjust_recurse(bracode, 1 + LINK_SIZE, utf8, cd, save_hwm); - memmove(bracode + 1+LINK_SIZE, bracode, nlen); + memmove(bracode + 1 + LINK_SIZE, bracode, IN_UCHARS(nlen)); code += 1 + LINK_SIZE; nlen += 1 + LINK_SIZE; *bracode = OP_BRAPOS; @@ -5226,7 +5226,7 @@ for (;; ptr++) default: *code = OP_END; adjust_recurse(tempcode, 1 + LINK_SIZE, utf8, cd, save_hwm); - memmove(tempcode + 1+LINK_SIZE, tempcode, len); + memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len)); code += 1 + LINK_SIZE; len += 1 + LINK_SIZE; tempcode[0] = OP_ONCE; @@ -5343,7 +5343,7 @@ for (;; ptr++) *code = verbs[i].op_arg; if (*code++ == OP_THEN_ARG) cd->external_flags |= PCRE_HASTHEN; *code++ = arglen; - memcpy(code, arg, arglen); + memcpy(code, arg, IN_UCHARS(arglen)); code += arglen; *code++ = 0; } @@ -5779,7 +5779,7 @@ for (;; ptr++) if (crc < 0) { memmove(slot + cd->name_entry_size, slot, - (cd->names_found - i) * cd->name_entry_size); + IN_UCHARS((cd->names_found - i) * cd->name_entry_size)); break; } @@ -5810,8 +5810,8 @@ for (;; ptr++) } PUT2(slot, 0, cd->bracount + 1); - memcpy(slot + 2, name, namelen); - slot[2+namelen] = 0; + memcpy(slot + 2, name, IN_UCHARS(namelen)); + slot[2 + namelen] = 0; } } @@ -6877,7 +6877,7 @@ for (;;) if (cd->open_caps->flag) { memmove(start_bracket + 1 + LINK_SIZE, start_bracket, - code - start_bracket); + IN_UCHARS(code - start_bracket)); *start_bracket = OP_ONCE; code += 1 + LINK_SIZE; PUT(start_bracket, 1, (int)(code - start_bracket)); @@ -7247,7 +7247,7 @@ Returns: pointer to compiled data block, or NULL on error, with errorptr and erroroffset set */ -#ifndef COMPILE_PCRE16 +#ifdef COMPILE_PCRE8 PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION pcre_compile(const char *pattern, int options, const char **errorptr, int *erroroffset, const unsigned char *tables) @@ -7257,7 +7257,7 @@ pcre16_compile(PCRE_SPTR16 pattern, int options, const char **errorptr, int *erroroffset, const unsigned char *tables) #endif { -#ifndef COMPILE_PCRE16 +#ifdef COMPILE_PCRE8 return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables); #else return pcre16_compile2(pattern, options, NULL, errorptr, erroroffset, tables); @@ -7265,7 +7265,7 @@ return pcre16_compile2(pattern, options, NULL, errorptr, erroroffset, tables); } -#ifndef COMPILE_PCRE16 +#ifdef COMPILE_PCRE8 PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION pcre_compile2(const char *pattern, int options, int *errorcodeptr, const char **errorptr, int *erroroffset, const unsigned char *tables) @@ -7469,7 +7469,10 @@ cd->backref_map = 0; /* Reflect pattern for debugging output */ DPRINTF(("------------------------------------------------------------------\n")); -DPRINTF(("%s\n", pattern)); +#ifdef PCRE_DEBUG +print_puchar(stdout, (PCRE_PUCHAR)pattern); +#endif +DPRINTF(("\n")); /* Pretend to compile the pattern while actually just accumulating the length of memory required. This behaviour is triggered by passing a non-NULL final @@ -7486,7 +7489,7 @@ cd->start_workspace = cworkspace; cd->start_code = cworkspace; cd->hwm = cworkspace; cd->start_pattern = (const pcre_uchar *)pattern; -cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC(pattern)); +cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern)); cd->req_varyopt = 0; cd->external_options = options; cd->external_flags = 0; @@ -7506,7 +7509,7 @@ code = cworkspace; if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN; DPRINTF(("end pre-compile: length=%d workspace=%d\n", length, - cd->hwm - cworkspace)); + (int)(cd->hwm - cworkspace))); if (length > MAX_PATTERN_SIZE) { @@ -7519,7 +7522,7 @@ externally provided function. Integer overflow should no longer be possible because nowadays we limit the maximum value of cd->names_found and cd->name_entry_size. */ -size = length + sizeof(real_pcre) + cd->names_found * (cd->name_entry_size + 3); +size = sizeof(real_pcre) + (length + cd->names_found * (cd->name_entry_size + 3)) * sizeof(pcre_uchar); re = (real_pcre *)(pcre_malloc)(size); if (re == NULL) @@ -7541,7 +7544,7 @@ re->flags = cd->external_flags; re->dummy1 = 0; re->first_byte = 0; re->req_byte = 0; -re->name_table_offset = sizeof(real_pcre); +re->name_table_offset = sizeof(real_pcre) / sizeof(pcre_uchar); re->name_entry_size = cd->name_entry_size; re->name_count = cd->names_found; re->ref_count = 0; diff --git a/pcre_exec.c b/pcre_exec.c index 4d69630..4aa4a0a 100644 --- a/pcre_exec.c +++ b/pcre_exec.c @@ -1232,7 +1232,7 @@ for (;;) cb.capture_top = offset_top/2; cb.capture_last = md->capture_last; cb.callout_data = md->callout_data; - cb.mark = markptr; + cb.mark = (unsigned char *)markptr; if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH); if (rrc < 0) RRETURN(rrc); } @@ -1643,7 +1643,7 @@ for (;;) cb.capture_top = offset_top/2; cb.capture_last = md->capture_last; cb.callout_data = md->callout_data; - cb.mark = markptr; + cb.mark = (unsigned char *)markptr; if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH); if (rrc < 0) RRETURN(rrc); } @@ -5926,10 +5926,17 @@ Returns: > 0 => success; value is the number of elements filled in < -1 => some kind of unexpected problem */ +#ifdef COMPILE_PCRE8 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre_exec(const pcre *argument_re, const pcre_extra *extra_data, PCRE_SPTR subject, int length, int start_offset, int options, int *offsets, int offsetcount) +#else +PCRE_EXP_DEFN int PCRE_CALL_CONVENTION +pcre16_exec(const pcre *argument_re, const pcre_extra *extra_data, + PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets, + int offsetcount) +#endif { int rc, ocount, arg_offset_max; int first_byte = -1; @@ -6015,8 +6022,9 @@ if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_TABLES) == 0 && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0) - return PRIV(jit_exec)(re, extra_data->executable_jit, subject, length, - start_offset, options, ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) + return PRIV(jit_exec)(re, extra_data->executable_jit, + (const pcre_uchar *)subject, length, start_offset, options, + ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount); #endif diff --git a/pcre_internal.h b/pcre_internal.h index 2ca7698..ec7a9ff 100644 --- a/pcre_internal.h +++ b/pcre_internal.h @@ -220,16 +220,27 @@ for the current character representation (either 8 or 16 bit) to save lots of typing. I tried "uchar", but it causes problems on Digital Unix, where it is defined in sys/types, so use "uschar" instead. */ -#ifndef COMPILE_PCRE16 +#ifdef COMPILE_PCRE8 + typedef unsigned char pcre_uchar; +#define IN_UCHARS(x) (x) + #else + +#ifdef COMPILE_PCRE16 #if USHRT_MAX != 65535 /* This is a warning message. Change PCRE_SCHAR16 to a 16 bit data type in pcre.h(.in) and disable (comment out) this message. */ #error Warning: PCRE_SCHAR16 is not a 16 bit data type. #endif typedef pcre_uint16 pcre_uchar; -#endif +#define IN_UCHARS(x) ((x) << 1) + +#else +#error Unsupported compiling mode +#endif /* COMPILE_PCRE16 */ + +#endif /* COMPILE_PCRE8 */ /* This is an unsigned int value that no character can ever have. UTF-8 characters only go up to 0x7fffffff (though Unicode doesn't go beyond @@ -288,9 +299,6 @@ must begin with PCRE_. */ #define PCRE_PUCHAR CUSTOM_SUBJECT_PTR #else #define PCRE_PUCHAR const pcre_uchar * - -/* PCRE_SPTR is defined in pcre.h. */ -#define USPTR const uschar * #endif /* Include the public PCRE header and the definitions of UCP character property @@ -424,6 +432,9 @@ is automated on Unix systems via the "configure" command. */ #elif LINK_SIZE == 3 || LINK_SIZE == 4 +#undef LINK_SIZE +#define LINK_SIZE 2 + #define PUT(a,n,d) \ (a[n] = (d) >> 16), \ (a[(n)+1] = (d) & 65536) @@ -2033,7 +2044,7 @@ sense, but are not part of the PCRE public API. */ strncmp((char *)(str1), (char *)(str2), (num)) #define STRNCMP_UC_C8(str1, str2, num) \ strncmp((char *)(str1), (str2), (num)) -#define STRLEN_UC(str) strlen(str) +#define STRLEN_UC(str) strlen((const char *)str) #else @@ -2076,8 +2087,8 @@ extern BOOL PRIV(xclass)(int, const pcre_uchar *); #ifdef SUPPORT_JIT extern void PRIV(jit_compile)(const real_pcre *, pcre_extra *); -extern int PRIV(jit_exec)(const real_pcre *, void *, PCRE_SPTR, - int, int, int, int, int *, int); +extern int PRIV(jit_exec)(const real_pcre *, void *, + const pcre_uchar *, int, int, int, int, int *, int); extern void PRIV(jit_free)(void *); #endif diff --git a/pcre_jit_compile.c b/pcre_jit_compile.c index 17e70c4..c1d1140 100644 --- a/pcre_jit_compile.c +++ b/pcre_jit_compile.c @@ -62,7 +62,7 @@ system files. */ #include "sljit/sljitLir.c" #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED -#error "Unsupported architecture" +#error Unsupported architecture #endif /* Allocate memory on the stack. Fast, but limited size. */ @@ -148,9 +148,9 @@ Thus we can restore the locals to a particular point in the stack. typedef struct jit_arguments { /* Pointers first. */ struct sljit_stack *stack; - PCRE_SPTR str; - PCRE_SPTR begin; - PCRE_SPTR end; + const pcre_uchar *str; + const pcre_uchar *begin; + const pcre_uchar *end; int *offsets; pcre_uchar *ptr; /* Everything else after. */ @@ -316,18 +316,30 @@ typedef struct compare_context { int length; int sourcereg; #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED - int byteptr; + int ucharptr; union { - int asint; - short asshort; + sljit_i asint; + sljit_h asshort; +#ifdef COMPILE_PCRE8 sljit_ub asbyte; - sljit_ub asbytes[4]; + sljit_ub asuchars[4]; +#else +#ifdef COMPILE_PCRE16 + sljit_uh asuchars[2]; +#endif +#endif } c; union { - int asint; - short asshort; + sljit_i asint; + sljit_h asshort; +#ifdef COMPILE_PCRE8 sljit_ub asbyte; - sljit_ub asbytes[4]; + sljit_ub asuchars[4]; +#else +#ifdef COMPILE_PCRE16 + sljit_uh asuchars[2]; +#endif +#endif } oc; #endif } compare_context; @@ -375,6 +387,16 @@ the start pointers when the end of the capturing group has not yet reached. */ #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_w)) #define PRIV_DATA(cc) (common->localptrs[(cc) - common->start]) +#ifdef COMPILE_PCRE8 +#define MOV_UCHAR SLJIT_MOV_UB +#else +#ifdef COMPILE_PCRE16 +#define MOV_UCHAR SLJIT_MOV_UH +#else +#error Unsupported compiling mode +#endif +#endif + /* Shortcuts. */ #define DEFINE_COMPILER \ struct sljit_compiler *compiler = common->compiler @@ -1173,7 +1195,7 @@ struct sljit_label *loop; int i; /* At this point we can freely use all temporary registers. */ /* TMP1 returns with begin - 1. */ -OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_GENERAL_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, 1); +OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_GENERAL_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1)); if (length < 8) { for (i = 0; i < length; i++) @@ -1211,6 +1233,9 @@ loop = LABEL(); OP2(SLJIT_SUB, SLJIT_GENERAL_REG2, 0, SLJIT_MEM1(SLJIT_GENERAL_REG1), 0, SLJIT_TEMPORARY_REG1, 0); OP2(SLJIT_ADD, SLJIT_GENERAL_REG1, 0, SLJIT_GENERAL_REG1, 0, SLJIT_IMM, sizeof(sljit_w)); /* Copy the integer value to the output buffer */ +#ifdef COMPILE_PCRE16 +OP2(SLJIT_LSHR, SLJIT_GENERAL_REG2, 0, SLJIT_GENERAL_REG2, 0, SLJIT_IMM, 1); +#endif OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_GENERAL_REG2, 0); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1); JUMPTO(SLJIT_C_NOT_ZERO, loop); @@ -1347,7 +1372,7 @@ DEFINE_COMPILER; struct sljit_jump *jump; #endif -OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); #ifdef SUPPORT_UTF8 if (common->utf8) { @@ -1356,7 +1381,7 @@ if (common->utf8) JUMPHERE(jump); } #endif -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); } static void peek_char(compiler_common *common) @@ -1614,7 +1639,7 @@ struct sljit_jump *singlebyte; #endif jump_list *newline = NULL; BOOL newlinecheck = FALSE; -BOOL readbyte = FALSE; +BOOL readuchar = FALSE; if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255)) @@ -1629,13 +1654,13 @@ if (firstline) if (common->nltype == NLTYPE_FIXED && common->newline > 255) { mainloop = LABEL(); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -1); - OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 0); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop); CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop); - OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0, SLJIT_IMM, 1); + OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); } else { @@ -1659,9 +1684,9 @@ start = JUMP(SLJIT_JUMP); if (newlinecheck) { newlinelabel = LABEL(); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff); COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); @@ -1672,17 +1697,17 @@ mainloop = LABEL(); /* Increasing the STR_PTR here requires one less jump in the most common case. */ #ifdef SUPPORT_UTF8 -if (common->utf8) readbyte = TRUE; +if (common->utf8) readuchar = TRUE; #endif -if (newlinecheck) readbyte = TRUE; +if (newlinecheck) readuchar = TRUE; -if (readbyte) - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); +if (readuchar) + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); if (newlinecheck) CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel); -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); #ifdef SUPPORT_UTF8 if (common->utf8) { @@ -1743,7 +1768,7 @@ else } } -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); #ifdef SUPPORT_UTF8 if (common->utf8) { @@ -2257,7 +2282,7 @@ static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, { DEFINE_COMPILER; unsigned int othercasebit = 0; -pcre_uint8 *othercasebyte = NULL; +pcre_uchar *othercasechar = NULL; #ifdef SUPPORT_UTF8 int utf8length; #endif @@ -2267,12 +2292,23 @@ if (caseless && char_has_othercase(common, cc)) othercasebit = char_get_othercase_bit(common, cc); SLJIT_ASSERT(othercasebit); /* Extracting bit difference info. */ - othercasebyte = cc + (othercasebit >> 8); +#ifdef COMPILE_PCRE8 + othercasechar = cc + (othercasebit >> 8); othercasebit &= 0xff; +#else +#ifdef COMPILE_PCRE16 + othercasechar = cc + (othercasebit >> 9); + if ((othercasebit & 0x100) != 0) + othercasebit = (othercasebit & 0xff) << 8; + else + othercasebit &= 0xff; +#endif +#endif } if (context->sourcereg == -1) { +#ifdef COMPILE_PCRE8 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED if (context->length >= 4) OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); @@ -2281,6 +2317,16 @@ if (context->sourcereg == -1) else #endif OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); +#else +#ifdef COMPILE_PCRE16 +#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED + if (context->length >= 4) + OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); + else +#endif + OP1(SLJIT_MOV_SH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); +#endif +#endif /* COMPILE_PCRE8 */ context->sourcereg = TMP2; } @@ -2293,67 +2339,83 @@ do { #endif - context->length--; + context->length -= IN_UCHARS(1); #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED /* Unaligned read is supported. */ - if (othercasebit != 0 && othercasebyte == cc) + if (othercasebit != 0 && othercasechar == cc) { - context->c.asbytes[context->byteptr] = *cc | othercasebit; - context->oc.asbytes[context->byteptr] = othercasebit; + context->c.asuchars[context->ucharptr] = *cc | othercasebit; + context->oc.asuchars[context->ucharptr] = othercasebit; } else { - context->c.asbytes[context->byteptr] = *cc; - context->oc.asbytes[context->byteptr] = 0; + context->c.asuchars[context->ucharptr] = *cc; + context->oc.asuchars[context->ucharptr] = 0; } - context->byteptr++; + context->ucharptr++; - if (context->byteptr >= 4 || context->length == 0 || (context->byteptr == 2 && context->length == 1)) +#ifdef COMPILE_PCRE8 + if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1)) +#else + if (context->ucharptr >= 2 || context->length == 0) +#endif { if (context->length >= 4) OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); +#ifdef COMPILE_PCRE8 else if (context->length >= 2) OP1(SLJIT_MOV_SH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); else if (context->length >= 1) OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); +#else + else if (context->length >= 2) + OP1(SLJIT_MOV_SH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); +#endif context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1; - switch(context->byteptr) + switch(context->ucharptr) { - case 4: + case 4 / sizeof(pcre_uchar): if (context->oc.asint != 0) OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint); add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint)); break; - case 2: + case 2 / sizeof(pcre_uchar): if (context->oc.asshort != 0) OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asshort); add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asshort | context->oc.asshort)); break; +#ifdef COMPILE_PCRE8 case 1: if (context->oc.asbyte != 0) OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte); add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte)); break; +#endif default: SLJIT_ASSERT_STOP(); break; } - context->byteptr = 0; + context->ucharptr = 0; } #else /* Unaligned read is unsupported. */ +#ifdef COMPILE_PCRE8 if (context->length > 0) OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); +#else + if (context->length > 0) + OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); +#endif context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1; - if (othercasebit != 0 && othercasebyte == cc) + if (othercasebit != 0 && othercasechar == cc) { OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit); add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit)); @@ -3019,7 +3081,7 @@ switch(type) case OP_CHAR: case OP_CHARI: - length = 1; + length = IN_UCHARS(1); #ifdef SUPPORT_UTF8 if (common->utf8 && *cc >= 0xc0) length += PRIV(utf8_table4)[*cc & 0x3f]; #endif @@ -3031,11 +3093,11 @@ switch(type) context.length = length; context.sourcereg = -1; #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED - context.byteptr = 0; + context.ucharptr = 0; #endif return byte_sequence_compare(common, type == OP_CHARI, cc, &context, fallbacks); } - add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + check_input_end(common, fallbacks); read_char(common); #ifdef SUPPORT_UTF8 if (common->utf8) @@ -3215,7 +3277,7 @@ do size = 0; cc += 1 + size; - context.length += size; + context.length += IN_UCHARS(size); } while (size > 0 && context.length <= 128); @@ -3228,9 +3290,10 @@ if (context.length > 0) context.sourcereg = -1; #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED - context.byteptr = 0; + context.ucharptr = 0; #endif do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, fallbacks); while (context.length > 0); +sljit_emit_op0(compiler, SLJIT_NOP); return cc; } @@ -6284,7 +6347,7 @@ if ((re->options & PCRE_ANCHORED) == 0) { if (study != NULL && study->minlength > 1) { - OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, study->minlength); + OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength)); CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_END, 0, mainloop); } else @@ -6294,7 +6357,7 @@ if ((re->options & PCRE_ANCHORED) == 0) { if (study != NULL && study->minlength > 1) { - OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, study->minlength); + OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength)); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0); COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END); @@ -6468,7 +6531,7 @@ return convert_executable_func.call_executable_func(arguments); int PRIV(jit_exec)(const real_pcre *re, void *executable_func, - PCRE_SPTR subject, int length, int start_offset, int options, + const pcre_uchar *subject, int length, int start_offset, int options, int match_limit, int *offsets, int offsetcount) { executable_function *function = (executable_function*)executable_func; @@ -6530,8 +6593,13 @@ sljit_free_code(function->executable_func); SLJIT_FREE(function); } +#ifdef COMPILE_PCRE8 PCRE_EXP_DECL pcre_jit_stack * pcre_jit_stack_alloc(int startsize, int maxsize) +#else +PCRE_EXP_DECL pcre_jit_stack * +pcre16_jit_stack_alloc(int startsize, int maxsize) +#endif { if (startsize < 1 || maxsize < 1) return NULL; @@ -6542,14 +6610,24 @@ maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1); return (pcre_jit_stack*)sljit_allocate_stack(startsize, maxsize); } +#ifdef COMPILE_PCRE8 PCRE_EXP_DECL void pcre_jit_stack_free(pcre_jit_stack *stack) +#else +PCRE_EXP_DECL void +pcre16_jit_stack_free(pcre_jit_stack *stack) +#endif { sljit_free_stack((struct sljit_stack*)stack); } +#ifdef COMPILE_PCRE8 PCRE_EXP_DECL void pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata) +#else +PCRE_EXP_DECL void +pcre16_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata) +#endif { executable_function *function; if (extra != NULL && @@ -6567,22 +6645,37 @@ if (extra != NULL && /* These are dummy functions to avoid linking errors when JIT support is not being compiled. */ +#ifdef COMPILE_PCRE8 PCRE_EXP_DECL pcre_jit_stack * pcre_jit_stack_alloc(int startsize, int maxsize) +#else +PCRE_EXP_DECL pcre_jit_stack * +pcre16_jit_stack_alloc(int startsize, int maxsize) +#endif { (void)startsize; (void)maxsize; return NULL; } +#ifdef COMPILE_PCRE8 PCRE_EXP_DECL void pcre_jit_stack_free(pcre_jit_stack *stack) +#else +PCRE_EXP_DECL void +pcre16_jit_stack_free(pcre_jit_stack *stack) +#endif { (void)stack; } +#ifdef COMPILE_PCRE8 PCRE_EXP_DECL void pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata) +#else +PCRE_EXP_DECL void +pcre16_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata) +#endif { (void)extra; (void)callback; diff --git a/pcre_printint.src b/pcre_printint.src index faa5e06..41e3555 100644 --- a/pcre_printint.src +++ b/pcre_printint.src @@ -77,7 +77,7 @@ print_char(FILE *f, pcre_uchar *ptr, BOOL utf8) int c = *ptr; #ifndef SUPPORT_UTF8 -utf8 = utf8; /* Avoid compiler warning */ +(void)utf8; /* Avoid compiler warning */ if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c); return 0; @@ -116,7 +116,19 @@ else #endif } +/************************************************* +* Print uchar string (regardless of utf8) * +*************************************************/ +static void +print_puchar(FILE *f, PCRE_PUCHAR ptr) +{ +while (*ptr != '\0') + { + register int c = *ptr++; + if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c); + } +} /************************************************* * Find Unicode property name * @@ -583,7 +595,9 @@ for(;;) case OP_MARK: case OP_PRUNE_ARG: case OP_SKIP_ARG: - fprintf(f, " %s %s", OP_names[*code], code + 2); + case OP_THEN_ARG: + fprintf(f, " %s ", OP_names[*code]); + print_puchar(f, code + 2); extra += code[1]; break; @@ -591,11 +605,6 @@ for(;;) fprintf(f, " %s", OP_names[*code]); break; - case OP_THEN_ARG: - fprintf(f, " %s %s", OP_names[*code], code + 2); - extra += code[1]; - break; - case OP_CIRCM: case OP_DOLLM: flag = "/m"; diff --git a/pcre_study.c b/pcre_study.c index 92cf32b..07e548a 100644 --- a/pcre_study.c +++ b/pcre_study.c @@ -599,6 +599,7 @@ set_type_bits(pcre_uint8 *start_bits, int cbit_type, int table_limit, { register int c; for (c = 0; c < table_limit; c++) start_bits[c] |= cd->cbits[c+cbit_type]; +#ifdef SUPPORT_UTF8 if (table_limit == 32) return; for (c = 128; c < 256; c++) { @@ -609,6 +610,7 @@ for (c = 128; c < 256; c++) SET_BIT(buff[0]); } } +#endif } @@ -1219,8 +1221,13 @@ Returns: pointer to a pcre_extra block, with study_data filled in and the NULL on error or if no optimization possible */ +#ifdef COMPILE_PCRE8 PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION pcre_study(const pcre *external_re, int options, const char **errorptr) +#else +PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION +pcre16_study(const pcre *external_re, int options, const char **errorptr) +#endif { int min; BOOL bits_set = FALSE; @@ -1369,8 +1376,13 @@ Argument: a pointer to the pcre_extra block Returns: nothing */ +#ifdef COMPILE_PCRE8 PCRE_EXP_DEFN void pcre_free_study(pcre_extra *extra) +#else +PCRE_EXP_DEFN void +pcre16_free_study(pcre_extra *extra) +#endif { #ifdef SUPPORT_JIT if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && diff --git a/sljit/sljitConfigInternal.h b/sljit/sljitConfigInternal.h index b0750d3..3f771d8 100644 --- a/sljit/sljitConfigInternal.h +++ b/sljit/sljitConfigInternal.h @@ -354,8 +354,8 @@ typedef long int sljit_w; #endif /* !SLJIT_UNALIGNED */ #if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) -void* sljit_malloc_exec(sljit_uw size); -void sljit_free_exec(void* ptr); +static void* sljit_malloc_exec(sljit_uw size); +static void sljit_free_exec(void* ptr); #define SLJIT_MALLOC_EXEC(size) sljit_malloc_exec(size) #define SLJIT_FREE_EXEC(ptr) sljit_free_exec(ptr) #endif diff --git a/sljit/sljitExecAllocator.c b/sljit/sljitExecAllocator.c index f3567b2..bfe8eb1 100644 --- a/sljit/sljitExecAllocator.c +++ b/sljit/sljitExecAllocator.c @@ -163,7 +163,7 @@ static SLJIT_INLINE void sljit_remove_free_block(struct free_block *free_block) } } -void* sljit_malloc_exec(sljit_uw size) +static void* sljit_malloc_exec(sljit_uw size) { struct block_header *header; struct block_header *next_header; @@ -231,7 +231,7 @@ void* sljit_malloc_exec(sljit_uw size) return MEM_START(header); } -void sljit_free_exec(void* ptr) +static void sljit_free_exec(void* ptr) { struct block_header *header; struct free_block* free_block; |