summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-11-26 12:48:56 +0000
committerzherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-11-26 12:48:56 +0000
commitaf0099785014bcb1968b7665a8a6f85cd299bb8f (patch)
treee46ed87c8614436a91016636e6ce2fe47a294a2a
parent950a27a0fd444ac862e956d0438d2d196519bd1a (diff)
downloadpcre-af0099785014bcb1968b7665a8a6f85cd299bb8f.tar.gz
Make simple patterns work in PCRE16
git-svn-id: svn://vcs.exim.org/pcre/code/branches/pcre16@767 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--pcre.h.in11
-rw-r--r--pcre_compile.c45
-rw-r--r--pcre_exec.c16
-rw-r--r--pcre_internal.h27
-rw-r--r--pcre_jit_compile.c193
-rw-r--r--pcre_printint.src23
-rw-r--r--pcre_study.c12
-rw-r--r--sljit/sljitConfigInternal.h4
-rw-r--r--sljit/sljitExecAllocator.c4
9 files changed, 240 insertions, 95 deletions
diff --git a/pcre.h.in b/pcre.h.in
index 8b56e25..7b2bca5 100644
--- a/pcre.h.in
+++ b/pcre.h.in
@@ -361,6 +361,8 @@ PCRE_EXP_DECL int pcre_dfa_exec(const pcre *, const pcre_extra *,
const char *, int, int, int, int *, int , int *, int);
PCRE_EXP_DECL int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
int, int, int, int *, int);
+PCRE_EXP_DECL int pcre16_exec(const pcre *, const pcre_extra *, PCRE_SPTR16,
+ int, int, int, int *, int);
PCRE_EXP_DECL void pcre_free_substring(const char *);
PCRE_EXP_DECL void pcre_free_substring_list(const char **);
PCRE_EXP_DECL int pcre_fullinfo(const pcre *, const pcre_extra *, int,
@@ -380,14 +382,21 @@ PCRE_EXP_DECL int pcre_refcount(pcre *, int);
PCRE_EXP_DECL int pcre16_utf16_to_host_byte_order(PCRE_SCHAR16 *,
PCRE_SPTR16, int, int);
PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **);
+PCRE_EXP_DECL pcre_extra *pcre16_study(const pcre *, int, const char **);
PCRE_EXP_DECL void pcre_free_study(pcre_extra *);
+PCRE_EXP_DECL void pcre16_free_study(pcre_extra *);
PCRE_EXP_DECL const char *pcre_version(void);
/* JIT compiler related functions. */
PCRE_EXP_DECL pcre_jit_stack *pcre_jit_stack_alloc(int, int);
+PCRE_EXP_DECL pcre_jit_stack *pcre16_jit_stack_alloc(int, int);
PCRE_EXP_DECL void pcre_jit_stack_free(pcre_jit_stack *);
-PCRE_EXP_DECL void pcre_assign_jit_stack(pcre_extra *, pcre_jit_callback, void *);
+PCRE_EXP_DECL void pcre16_jit_stack_free(pcre_jit_stack *);
+PCRE_EXP_DECL void pcre_assign_jit_stack(pcre_extra *,
+ pcre_jit_callback, void *);
+PCRE_EXP_DECL void pcre16_assign_jit_stack(pcre_extra *,
+ pcre_jit_callback, void *);
#ifdef __cplusplus
} /* extern "C" */
diff --git a/pcre_compile.c b/pcre_compile.c
index 38cd815..b0e6367 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -3419,7 +3419,7 @@ for (;; ptr++)
{
if (previous > orig_code)
{
- memmove(orig_code, previous, code - previous);
+ memmove(orig_code, previous, IN_UCHARS(code - previous));
code -= previous - orig_code;
previous = orig_code;
}
@@ -4484,7 +4484,7 @@ for (;; ptr++)
if (*previous == OP_RECURSE)
{
- memmove(previous + 1 + LINK_SIZE, previous, 1 + LINK_SIZE);
+ memmove(previous + 1 + LINK_SIZE, previous, IN_UCHARS(1 + LINK_SIZE));
*previous = OP_ONCE;
PUT(previous, 1, 2 + 2*LINK_SIZE);
previous[2 + 2*LINK_SIZE] = OP_KET;
@@ -4862,7 +4862,7 @@ for (;; ptr++)
{
*code = OP_END;
adjust_recurse(previous, 1, utf8, cd, save_hwm);
- memmove(previous+1, previous, len);
+ memmove(previous + 1, previous, IN_UCHARS(len));
code++;
if (repeat_max == 0)
{
@@ -4886,7 +4886,7 @@ for (;; ptr++)
int offset;
*code = OP_END;
adjust_recurse(previous, 2 + LINK_SIZE, utf8, cd, save_hwm);
- memmove(previous + 2 + LINK_SIZE, previous, len);
+ memmove(previous + 2 + LINK_SIZE, previous, IN_UCHARS(len));
code += 2 + LINK_SIZE;
*previous++ = OP_BRAZERO + repeat_type;
*previous++ = OP_BRA;
@@ -4941,7 +4941,7 @@ for (;; ptr++)
{
pcre_uchar *hc;
pcre_uchar *this_hwm = cd->hwm;
- memcpy(code, previous, len);
+ memcpy(code, previous, IN_UCHARS(len));
for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)
{
PUT(cd->hwm, 0, GET(hc, 0) + len);
@@ -5008,7 +5008,7 @@ for (;; ptr++)
PUTINC(code, 0, offset);
}
- memcpy(code, previous, len);
+ memcpy(code, previous, IN_UCHARS(len));
for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)
{
PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1));
@@ -5111,7 +5111,7 @@ for (;; ptr++)
int nlen = (int)(code - bracode);
*code = OP_END;
adjust_recurse(bracode, 1 + LINK_SIZE, utf8, cd, save_hwm);
- memmove(bracode + 1+LINK_SIZE, bracode, nlen);
+ memmove(bracode + 1 + LINK_SIZE, bracode, IN_UCHARS(nlen));
code += 1 + LINK_SIZE;
nlen += 1 + LINK_SIZE;
*bracode = OP_BRAPOS;
@@ -5226,7 +5226,7 @@ for (;; ptr++)
default:
*code = OP_END;
adjust_recurse(tempcode, 1 + LINK_SIZE, utf8, cd, save_hwm);
- memmove(tempcode + 1+LINK_SIZE, tempcode, len);
+ memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));
code += 1 + LINK_SIZE;
len += 1 + LINK_SIZE;
tempcode[0] = OP_ONCE;
@@ -5343,7 +5343,7 @@ for (;; ptr++)
*code = verbs[i].op_arg;
if (*code++ == OP_THEN_ARG) cd->external_flags |= PCRE_HASTHEN;
*code++ = arglen;
- memcpy(code, arg, arglen);
+ memcpy(code, arg, IN_UCHARS(arglen));
code += arglen;
*code++ = 0;
}
@@ -5779,7 +5779,7 @@ for (;; ptr++)
if (crc < 0)
{
memmove(slot + cd->name_entry_size, slot,
- (cd->names_found - i) * cd->name_entry_size);
+ IN_UCHARS((cd->names_found - i) * cd->name_entry_size));
break;
}
@@ -5810,8 +5810,8 @@ for (;; ptr++)
}
PUT2(slot, 0, cd->bracount + 1);
- memcpy(slot + 2, name, namelen);
- slot[2+namelen] = 0;
+ memcpy(slot + 2, name, IN_UCHARS(namelen));
+ slot[2 + namelen] = 0;
}
}
@@ -6877,7 +6877,7 @@ for (;;)
if (cd->open_caps->flag)
{
memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
- code - start_bracket);
+ IN_UCHARS(code - start_bracket));
*start_bracket = OP_ONCE;
code += 1 + LINK_SIZE;
PUT(start_bracket, 1, (int)(code - start_bracket));
@@ -7247,7 +7247,7 @@ Returns: pointer to compiled data block, or NULL on error,
with errorptr and erroroffset set
*/
-#ifndef COMPILE_PCRE16
+#ifdef COMPILE_PCRE8
PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
pcre_compile(const char *pattern, int options, const char **errorptr,
int *erroroffset, const unsigned char *tables)
@@ -7257,7 +7257,7 @@ pcre16_compile(PCRE_SPTR16 pattern, int options, const char **errorptr,
int *erroroffset, const unsigned char *tables)
#endif
{
-#ifndef COMPILE_PCRE16
+#ifdef COMPILE_PCRE8
return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
#else
return pcre16_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
@@ -7265,7 +7265,7 @@ return pcre16_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
}
-#ifndef COMPILE_PCRE16
+#ifdef COMPILE_PCRE8
PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
pcre_compile2(const char *pattern, int options, int *errorcodeptr,
const char **errorptr, int *erroroffset, const unsigned char *tables)
@@ -7469,7 +7469,10 @@ cd->backref_map = 0;
/* Reflect pattern for debugging output */
DPRINTF(("------------------------------------------------------------------\n"));
-DPRINTF(("%s\n", pattern));
+#ifdef PCRE_DEBUG
+print_puchar(stdout, (PCRE_PUCHAR)pattern);
+#endif
+DPRINTF(("\n"));
/* Pretend to compile the pattern while actually just accumulating the length
of memory required. This behaviour is triggered by passing a non-NULL final
@@ -7486,7 +7489,7 @@ cd->start_workspace = cworkspace;
cd->start_code = cworkspace;
cd->hwm = cworkspace;
cd->start_pattern = (const pcre_uchar *)pattern;
-cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC(pattern));
+cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern));
cd->req_varyopt = 0;
cd->external_options = options;
cd->external_flags = 0;
@@ -7506,7 +7509,7 @@ code = cworkspace;
if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN;
DPRINTF(("end pre-compile: length=%d workspace=%d\n", length,
- cd->hwm - cworkspace));
+ (int)(cd->hwm - cworkspace)));
if (length > MAX_PATTERN_SIZE)
{
@@ -7519,7 +7522,7 @@ externally provided function. Integer overflow should no longer be possible
because nowadays we limit the maximum value of cd->names_found and
cd->name_entry_size. */
-size = length + sizeof(real_pcre) + cd->names_found * (cd->name_entry_size + 3);
+size = sizeof(real_pcre) + (length + cd->names_found * (cd->name_entry_size + 3)) * sizeof(pcre_uchar);
re = (real_pcre *)(pcre_malloc)(size);
if (re == NULL)
@@ -7541,7 +7544,7 @@ re->flags = cd->external_flags;
re->dummy1 = 0;
re->first_byte = 0;
re->req_byte = 0;
-re->name_table_offset = sizeof(real_pcre);
+re->name_table_offset = sizeof(real_pcre) / sizeof(pcre_uchar);
re->name_entry_size = cd->name_entry_size;
re->name_count = cd->names_found;
re->ref_count = 0;
diff --git a/pcre_exec.c b/pcre_exec.c
index 4d69630..4aa4a0a 100644
--- a/pcre_exec.c
+++ b/pcre_exec.c
@@ -1232,7 +1232,7 @@ for (;;)
cb.capture_top = offset_top/2;
cb.capture_last = md->capture_last;
cb.callout_data = md->callout_data;
- cb.mark = markptr;
+ cb.mark = (unsigned char *)markptr;
if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
if (rrc < 0) RRETURN(rrc);
}
@@ -1643,7 +1643,7 @@ for (;;)
cb.capture_top = offset_top/2;
cb.capture_last = md->capture_last;
cb.callout_data = md->callout_data;
- cb.mark = markptr;
+ cb.mark = (unsigned char *)markptr;
if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
if (rrc < 0) RRETURN(rrc);
}
@@ -5926,10 +5926,17 @@ Returns: > 0 => success; value is the number of elements filled in
< -1 => some kind of unexpected problem
*/
+#ifdef COMPILE_PCRE8
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
int offsetcount)
+#else
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
+pcre16_exec(const pcre *argument_re, const pcre_extra *extra_data,
+ PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
+ int offsetcount)
+#endif
{
int rc, ocount, arg_offset_max;
int first_byte = -1;
@@ -6015,8 +6022,9 @@ if (extra_data != NULL
&& (extra_data->flags & PCRE_EXTRA_TABLES) == 0
&& (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0)
- return PRIV(jit_exec)(re, extra_data->executable_jit, subject, length,
- start_offset, options, ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)
+ return PRIV(jit_exec)(re, extra_data->executable_jit,
+ (const pcre_uchar *)subject, length, start_offset, options,
+ ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)
? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount);
#endif
diff --git a/pcre_internal.h b/pcre_internal.h
index 2ca7698..ec7a9ff 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -220,16 +220,27 @@ for the current character representation (either 8 or 16 bit) to save lots
of typing. I tried "uchar", but it causes problems on Digital Unix, where
it is defined in sys/types, so use "uschar" instead. */
-#ifndef COMPILE_PCRE16
+#ifdef COMPILE_PCRE8
+
typedef unsigned char pcre_uchar;
+#define IN_UCHARS(x) (x)
+
#else
+
+#ifdef COMPILE_PCRE16
#if USHRT_MAX != 65535
/* This is a warning message. Change PCRE_SCHAR16 to a 16 bit data type in
pcre.h(.in) and disable (comment out) this message. */
#error Warning: PCRE_SCHAR16 is not a 16 bit data type.
#endif
typedef pcre_uint16 pcre_uchar;
-#endif
+#define IN_UCHARS(x) ((x) << 1)
+
+#else
+#error Unsupported compiling mode
+#endif /* COMPILE_PCRE16 */
+
+#endif /* COMPILE_PCRE8 */
/* This is an unsigned int value that no character can ever have. UTF-8
characters only go up to 0x7fffffff (though Unicode doesn't go beyond
@@ -288,9 +299,6 @@ must begin with PCRE_. */
#define PCRE_PUCHAR CUSTOM_SUBJECT_PTR
#else
#define PCRE_PUCHAR const pcre_uchar *
-
-/* PCRE_SPTR is defined in pcre.h. */
-#define USPTR const uschar *
#endif
/* Include the public PCRE header and the definitions of UCP character property
@@ -424,6 +432,9 @@ is automated on Unix systems via the "configure" command. */
#elif LINK_SIZE == 3 || LINK_SIZE == 4
+#undef LINK_SIZE
+#define LINK_SIZE 2
+
#define PUT(a,n,d) \
(a[n] = (d) >> 16), \
(a[(n)+1] = (d) & 65536)
@@ -2033,7 +2044,7 @@ sense, but are not part of the PCRE public API. */
strncmp((char *)(str1), (char *)(str2), (num))
#define STRNCMP_UC_C8(str1, str2, num) \
strncmp((char *)(str1), (str2), (num))
-#define STRLEN_UC(str) strlen(str)
+#define STRLEN_UC(str) strlen((const char *)str)
#else
@@ -2076,8 +2087,8 @@ extern BOOL PRIV(xclass)(int, const pcre_uchar *);
#ifdef SUPPORT_JIT
extern void PRIV(jit_compile)(const real_pcre *, pcre_extra *);
-extern int PRIV(jit_exec)(const real_pcre *, void *, PCRE_SPTR,
- int, int, int, int, int *, int);
+extern int PRIV(jit_exec)(const real_pcre *, void *,
+ const pcre_uchar *, int, int, int, int, int *, int);
extern void PRIV(jit_free)(void *);
#endif
diff --git a/pcre_jit_compile.c b/pcre_jit_compile.c
index 17e70c4..c1d1140 100644
--- a/pcre_jit_compile.c
+++ b/pcre_jit_compile.c
@@ -62,7 +62,7 @@ system files. */
#include "sljit/sljitLir.c"
#if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
-#error "Unsupported architecture"
+#error Unsupported architecture
#endif
/* Allocate memory on the stack. Fast, but limited size. */
@@ -148,9 +148,9 @@ Thus we can restore the locals to a particular point in the stack.
typedef struct jit_arguments {
/* Pointers first. */
struct sljit_stack *stack;
- PCRE_SPTR str;
- PCRE_SPTR begin;
- PCRE_SPTR end;
+ const pcre_uchar *str;
+ const pcre_uchar *begin;
+ const pcre_uchar *end;
int *offsets;
pcre_uchar *ptr;
/* Everything else after. */
@@ -316,18 +316,30 @@ typedef struct compare_context {
int length;
int sourcereg;
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
- int byteptr;
+ int ucharptr;
union {
- int asint;
- short asshort;
+ sljit_i asint;
+ sljit_h asshort;
+#ifdef COMPILE_PCRE8
sljit_ub asbyte;
- sljit_ub asbytes[4];
+ sljit_ub asuchars[4];
+#else
+#ifdef COMPILE_PCRE16
+ sljit_uh asuchars[2];
+#endif
+#endif
} c;
union {
- int asint;
- short asshort;
+ sljit_i asint;
+ sljit_h asshort;
+#ifdef COMPILE_PCRE8
sljit_ub asbyte;
- sljit_ub asbytes[4];
+ sljit_ub asuchars[4];
+#else
+#ifdef COMPILE_PCRE16
+ sljit_uh asuchars[2];
+#endif
+#endif
} oc;
#endif
} compare_context;
@@ -375,6 +387,16 @@ the start pointers when the end of the capturing group has not yet reached. */
#define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_w))
#define PRIV_DATA(cc) (common->localptrs[(cc) - common->start])
+#ifdef COMPILE_PCRE8
+#define MOV_UCHAR SLJIT_MOV_UB
+#else
+#ifdef COMPILE_PCRE16
+#define MOV_UCHAR SLJIT_MOV_UH
+#else
+#error Unsupported compiling mode
+#endif
+#endif
+
/* Shortcuts. */
#define DEFINE_COMPILER \
struct sljit_compiler *compiler = common->compiler
@@ -1173,7 +1195,7 @@ struct sljit_label *loop;
int i;
/* At this point we can freely use all temporary registers. */
/* TMP1 returns with begin - 1. */
-OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_GENERAL_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, 1);
+OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_GENERAL_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
if (length < 8)
{
for (i = 0; i < length; i++)
@@ -1211,6 +1233,9 @@ loop = LABEL();
OP2(SLJIT_SUB, SLJIT_GENERAL_REG2, 0, SLJIT_MEM1(SLJIT_GENERAL_REG1), 0, SLJIT_TEMPORARY_REG1, 0);
OP2(SLJIT_ADD, SLJIT_GENERAL_REG1, 0, SLJIT_GENERAL_REG1, 0, SLJIT_IMM, sizeof(sljit_w));
/* Copy the integer value to the output buffer */
+#ifdef COMPILE_PCRE16
+OP2(SLJIT_LSHR, SLJIT_GENERAL_REG2, 0, SLJIT_GENERAL_REG2, 0, SLJIT_IMM, 1);
+#endif
OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_GENERAL_REG2, 0);
OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
JUMPTO(SLJIT_C_NOT_ZERO, loop);
@@ -1347,7 +1372,7 @@ DEFINE_COMPILER;
struct sljit_jump *jump;
#endif
-OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
#ifdef SUPPORT_UTF8
if (common->utf8)
{
@@ -1356,7 +1381,7 @@ if (common->utf8)
JUMPHERE(jump);
}
#endif
-OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
}
static void peek_char(compiler_common *common)
@@ -1614,7 +1639,7 @@ struct sljit_jump *singlebyte;
#endif
jump_list *newline = NULL;
BOOL newlinecheck = FALSE;
-BOOL readbyte = FALSE;
+BOOL readuchar = FALSE;
if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
@@ -1629,13 +1654,13 @@ if (firstline)
if (common->nltype == NLTYPE_FIXED && common->newline > 255)
{
mainloop = LABEL();
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
- OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -1);
- OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
+ OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
- OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0, SLJIT_IMM, 1);
+ OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
}
else
{
@@ -1659,9 +1684,9 @@ start = JUMP(SLJIT_JUMP);
if (newlinecheck)
{
newlinelabel = LABEL();
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
- OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
@@ -1672,17 +1697,17 @@ mainloop = LABEL();
/* Increasing the STR_PTR here requires one less jump in the most common case. */
#ifdef SUPPORT_UTF8
-if (common->utf8) readbyte = TRUE;
+if (common->utf8) readuchar = TRUE;
#endif
-if (newlinecheck) readbyte = TRUE;
+if (newlinecheck) readuchar = TRUE;
-if (readbyte)
- OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+if (readuchar)
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
if (newlinecheck)
CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
-OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
#ifdef SUPPORT_UTF8
if (common->utf8)
{
@@ -1743,7 +1768,7 @@ else
}
}
-OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
#ifdef SUPPORT_UTF8
if (common->utf8)
{
@@ -2257,7 +2282,7 @@ static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless,
{
DEFINE_COMPILER;
unsigned int othercasebit = 0;
-pcre_uint8 *othercasebyte = NULL;
+pcre_uchar *othercasechar = NULL;
#ifdef SUPPORT_UTF8
int utf8length;
#endif
@@ -2267,12 +2292,23 @@ if (caseless && char_has_othercase(common, cc))
othercasebit = char_get_othercase_bit(common, cc);
SLJIT_ASSERT(othercasebit);
/* Extracting bit difference info. */
- othercasebyte = cc + (othercasebit >> 8);
+#ifdef COMPILE_PCRE8
+ othercasechar = cc + (othercasebit >> 8);
othercasebit &= 0xff;
+#else
+#ifdef COMPILE_PCRE16
+ othercasechar = cc + (othercasebit >> 9);
+ if ((othercasebit & 0x100) != 0)
+ othercasebit = (othercasebit & 0xff) << 8;
+ else
+ othercasebit &= 0xff;
+#endif
+#endif
}
if (context->sourcereg == -1)
{
+#ifdef COMPILE_PCRE8
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
if (context->length >= 4)
OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
@@ -2281,6 +2317,16 @@ if (context->sourcereg == -1)
else
#endif
OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
+#else
+#ifdef COMPILE_PCRE16
+#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
+ if (context->length >= 4)
+ OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
+ else
+#endif
+ OP1(SLJIT_MOV_SH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
+#endif
+#endif /* COMPILE_PCRE8 */
context->sourcereg = TMP2;
}
@@ -2293,67 +2339,83 @@ do
{
#endif
- context->length--;
+ context->length -= IN_UCHARS(1);
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
/* Unaligned read is supported. */
- if (othercasebit != 0 && othercasebyte == cc)
+ if (othercasebit != 0 && othercasechar == cc)
{
- context->c.asbytes[context->byteptr] = *cc | othercasebit;
- context->oc.asbytes[context->byteptr] = othercasebit;
+ context->c.asuchars[context->ucharptr] = *cc | othercasebit;
+ context->oc.asuchars[context->ucharptr] = othercasebit;
}
else
{
- context->c.asbytes[context->byteptr] = *cc;
- context->oc.asbytes[context->byteptr] = 0;
+ context->c.asuchars[context->ucharptr] = *cc;
+ context->oc.asuchars[context->ucharptr] = 0;
}
- context->byteptr++;
+ context->ucharptr++;
- if (context->byteptr >= 4 || context->length == 0 || (context->byteptr == 2 && context->length == 1))
+#ifdef COMPILE_PCRE8
+ if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
+#else
+ if (context->ucharptr >= 2 || context->length == 0)
+#endif
{
if (context->length >= 4)
OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
+#ifdef COMPILE_PCRE8
else if (context->length >= 2)
OP1(SLJIT_MOV_SH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
else if (context->length >= 1)
OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
+#else
+ else if (context->length >= 2)
+ OP1(SLJIT_MOV_SH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
+#endif
context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
- switch(context->byteptr)
+ switch(context->ucharptr)
{
- case 4:
+ case 4 / sizeof(pcre_uchar):
if (context->oc.asint != 0)
OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
break;
- case 2:
+ case 2 / sizeof(pcre_uchar):
if (context->oc.asshort != 0)
OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asshort);
add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asshort | context->oc.asshort));
break;
+#ifdef COMPILE_PCRE8
case 1:
if (context->oc.asbyte != 0)
OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
break;
+#endif
default:
SLJIT_ASSERT_STOP();
break;
}
- context->byteptr = 0;
+ context->ucharptr = 0;
}
#else
/* Unaligned read is unsupported. */
+#ifdef COMPILE_PCRE8
if (context->length > 0)
OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
+#else
+ if (context->length > 0)
+ OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
+#endif
context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
- if (othercasebit != 0 && othercasebyte == cc)
+ if (othercasebit != 0 && othercasechar == cc)
{
OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
@@ -3019,7 +3081,7 @@ switch(type)
case OP_CHAR:
case OP_CHARI:
- length = 1;
+ length = IN_UCHARS(1);
#ifdef SUPPORT_UTF8
if (common->utf8 && *cc >= 0xc0) length += PRIV(utf8_table4)[*cc & 0x3f];
#endif
@@ -3031,11 +3093,11 @@ switch(type)
context.length = length;
context.sourcereg = -1;
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
- context.byteptr = 0;
+ context.ucharptr = 0;
#endif
return byte_sequence_compare(common, type == OP_CHARI, cc, &context, fallbacks);
}
- add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+ check_input_end(common, fallbacks);
read_char(common);
#ifdef SUPPORT_UTF8
if (common->utf8)
@@ -3215,7 +3277,7 @@ do
size = 0;
cc += 1 + size;
- context.length += size;
+ context.length += IN_UCHARS(size);
}
while (size > 0 && context.length <= 128);
@@ -3228,9 +3290,10 @@ if (context.length > 0)
context.sourcereg = -1;
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
- context.byteptr = 0;
+ context.ucharptr = 0;
#endif
do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, fallbacks); while (context.length > 0);
+sljit_emit_op0(compiler, SLJIT_NOP);
return cc;
}
@@ -6284,7 +6347,7 @@ if ((re->options & PCRE_ANCHORED) == 0)
{
if (study != NULL && study->minlength > 1)
{
- OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, study->minlength);
+ OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength));
CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_END, 0, mainloop);
}
else
@@ -6294,7 +6357,7 @@ if ((re->options & PCRE_ANCHORED) == 0)
{
if (study != NULL && study->minlength > 1)
{
- OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, study->minlength);
+ OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength));
OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0);
COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER);
OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);
@@ -6468,7 +6531,7 @@ return convert_executable_func.call_executable_func(arguments);
int
PRIV(jit_exec)(const real_pcre *re, void *executable_func,
- PCRE_SPTR subject, int length, int start_offset, int options,
+ const pcre_uchar *subject, int length, int start_offset, int options,
int match_limit, int *offsets, int offsetcount)
{
executable_function *function = (executable_function*)executable_func;
@@ -6530,8 +6593,13 @@ sljit_free_code(function->executable_func);
SLJIT_FREE(function);
}
+#ifdef COMPILE_PCRE8
PCRE_EXP_DECL pcre_jit_stack *
pcre_jit_stack_alloc(int startsize, int maxsize)
+#else
+PCRE_EXP_DECL pcre_jit_stack *
+pcre16_jit_stack_alloc(int startsize, int maxsize)
+#endif
{
if (startsize < 1 || maxsize < 1)
return NULL;
@@ -6542,14 +6610,24 @@ maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
return (pcre_jit_stack*)sljit_allocate_stack(startsize, maxsize);
}
+#ifdef COMPILE_PCRE8
PCRE_EXP_DECL void
pcre_jit_stack_free(pcre_jit_stack *stack)
+#else
+PCRE_EXP_DECL void
+pcre16_jit_stack_free(pcre_jit_stack *stack)
+#endif
{
sljit_free_stack((struct sljit_stack*)stack);
}
+#ifdef COMPILE_PCRE8
PCRE_EXP_DECL void
pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
+#else
+PCRE_EXP_DECL void
+pcre16_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
+#endif
{
executable_function *function;
if (extra != NULL &&
@@ -6567,22 +6645,37 @@ if (extra != NULL &&
/* These are dummy functions to avoid linking errors when JIT support is not
being compiled. */
+#ifdef COMPILE_PCRE8
PCRE_EXP_DECL pcre_jit_stack *
pcre_jit_stack_alloc(int startsize, int maxsize)
+#else
+PCRE_EXP_DECL pcre_jit_stack *
+pcre16_jit_stack_alloc(int startsize, int maxsize)
+#endif
{
(void)startsize;
(void)maxsize;
return NULL;
}
+#ifdef COMPILE_PCRE8
PCRE_EXP_DECL void
pcre_jit_stack_free(pcre_jit_stack *stack)
+#else
+PCRE_EXP_DECL void
+pcre16_jit_stack_free(pcre_jit_stack *stack)
+#endif
{
(void)stack;
}
+#ifdef COMPILE_PCRE8
PCRE_EXP_DECL void
pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
+#else
+PCRE_EXP_DECL void
+pcre16_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
+#endif
{
(void)extra;
(void)callback;
diff --git a/pcre_printint.src b/pcre_printint.src
index faa5e06..41e3555 100644
--- a/pcre_printint.src
+++ b/pcre_printint.src
@@ -77,7 +77,7 @@ print_char(FILE *f, pcre_uchar *ptr, BOOL utf8)
int c = *ptr;
#ifndef SUPPORT_UTF8
-utf8 = utf8; /* Avoid compiler warning */
+(void)utf8; /* Avoid compiler warning */
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
return 0;
@@ -116,7 +116,19 @@ else
#endif
}
+/*************************************************
+* Print uchar string (regardless of utf8) *
+*************************************************/
+static void
+print_puchar(FILE *f, PCRE_PUCHAR ptr)
+{
+while (*ptr != '\0')
+ {
+ register int c = *ptr++;
+ if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
+ }
+}
/*************************************************
* Find Unicode property name *
@@ -583,7 +595,9 @@ for(;;)
case OP_MARK:
case OP_PRUNE_ARG:
case OP_SKIP_ARG:
- fprintf(f, " %s %s", OP_names[*code], code + 2);
+ case OP_THEN_ARG:
+ fprintf(f, " %s ", OP_names[*code]);
+ print_puchar(f, code + 2);
extra += code[1];
break;
@@ -591,11 +605,6 @@ for(;;)
fprintf(f, " %s", OP_names[*code]);
break;
- case OP_THEN_ARG:
- fprintf(f, " %s %s", OP_names[*code], code + 2);
- extra += code[1];
- break;
-
case OP_CIRCM:
case OP_DOLLM:
flag = "/m";
diff --git a/pcre_study.c b/pcre_study.c
index 92cf32b..07e548a 100644
--- a/pcre_study.c
+++ b/pcre_study.c
@@ -599,6 +599,7 @@ set_type_bits(pcre_uint8 *start_bits, int cbit_type, int table_limit,
{
register int c;
for (c = 0; c < table_limit; c++) start_bits[c] |= cd->cbits[c+cbit_type];
+#ifdef SUPPORT_UTF8
if (table_limit == 32) return;
for (c = 128; c < 256; c++)
{
@@ -609,6 +610,7 @@ for (c = 128; c < 256; c++)
SET_BIT(buff[0]);
}
}
+#endif
}
@@ -1219,8 +1221,13 @@ Returns: pointer to a pcre_extra block, with study_data filled in and the
NULL on error or if no optimization possible
*/
+#ifdef COMPILE_PCRE8
PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION
pcre_study(const pcre *external_re, int options, const char **errorptr)
+#else
+PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION
+pcre16_study(const pcre *external_re, int options, const char **errorptr)
+#endif
{
int min;
BOOL bits_set = FALSE;
@@ -1369,8 +1376,13 @@ Argument: a pointer to the pcre_extra block
Returns: nothing
*/
+#ifdef COMPILE_PCRE8
PCRE_EXP_DEFN void
pcre_free_study(pcre_extra *extra)
+#else
+PCRE_EXP_DEFN void
+pcre16_free_study(pcre_extra *extra)
+#endif
{
#ifdef SUPPORT_JIT
if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
diff --git a/sljit/sljitConfigInternal.h b/sljit/sljitConfigInternal.h
index b0750d3..3f771d8 100644
--- a/sljit/sljitConfigInternal.h
+++ b/sljit/sljitConfigInternal.h
@@ -354,8 +354,8 @@ typedef long int sljit_w;
#endif /* !SLJIT_UNALIGNED */
#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR)
-void* sljit_malloc_exec(sljit_uw size);
-void sljit_free_exec(void* ptr);
+static void* sljit_malloc_exec(sljit_uw size);
+static void sljit_free_exec(void* ptr);
#define SLJIT_MALLOC_EXEC(size) sljit_malloc_exec(size)
#define SLJIT_FREE_EXEC(ptr) sljit_free_exec(ptr)
#endif
diff --git a/sljit/sljitExecAllocator.c b/sljit/sljitExecAllocator.c
index f3567b2..bfe8eb1 100644
--- a/sljit/sljitExecAllocator.c
+++ b/sljit/sljitExecAllocator.c
@@ -163,7 +163,7 @@ static SLJIT_INLINE void sljit_remove_free_block(struct free_block *free_block)
}
}
-void* sljit_malloc_exec(sljit_uw size)
+static void* sljit_malloc_exec(sljit_uw size)
{
struct block_header *header;
struct block_header *next_header;
@@ -231,7 +231,7 @@ void* sljit_malloc_exec(sljit_uw size)
return MEM_START(header);
}
-void sljit_free_exec(void* ptr)
+static void sljit_free_exec(void* ptr)
{
struct block_header *header;
struct free_block* free_block;