diff options
author | ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> | 2019-05-24 17:15:48 +0000 |
---|---|---|
committer | ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> | 2019-05-24 17:15:48 +0000 |
commit | be0b8eba4f57f4572a6744cc534081fc7249386d (patch) | |
tree | cf4eb5c8740d93c022457fd7628f3fb3f152e29b /src/pcre2_jit_compile.c | |
parent | d45c1c6b2ee61449c2e574f4e2ef598846bdf851 (diff) | |
download | pcre2-be0b8eba4f57f4572a6744cc534081fc7249386d.tar.gz |
Implement support for invalid UTF in the pcre2_match() interpreter.
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1094 6239d852-aaf2-0410-a92c-79f79f948069
Diffstat (limited to 'src/pcre2_jit_compile.c')
-rw-r--r-- | src/pcre2_jit_compile.c | 85 |
1 files changed, 59 insertions, 26 deletions
diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c index 3d5c489..10e8a4a 100644 --- a/src/pcre2_jit_compile.c +++ b/src/pcre2_jit_compile.c @@ -6,8 +6,9 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel + This module by Zoltan Herczeg Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2018 University of Cambridge + New API code Copyright (c) 2016-2019 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -7846,8 +7847,6 @@ if (needstype || needsscript) if (needsscript) { // PH hacking -//fprintf(stderr, "~~B\n"); - OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2); OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3); OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); @@ -7901,7 +7900,6 @@ if (needstype || needsscript) if (!needschar) { // PH hacking -//fprintf(stderr, "~~C\n"); OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2); OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3); OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); @@ -7916,7 +7914,6 @@ if (needstype || needsscript) else { // PH hacking -//fprintf(stderr, "~~D\n"); OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2); OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3); @@ -8594,8 +8591,8 @@ uint32_t c; /* Patch by PH */ /* GETCHARINC(c, cc); */ - c = *cc++; + #if PCRE2_CODE_UNIT_WIDTH == 32 if (c >= 0x110000) return NULL; @@ -9257,8 +9254,6 @@ if (common->utf && *cc == OP_REFI) CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop); // PH hacking -//fprintf(stderr, "~~E\n"); - OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); @@ -14156,49 +14151,87 @@ Returns: 0: success or (*NOJIT) was used PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION pcre2_jit_compile(pcre2_code *code, uint32_t options) { -#ifndef SUPPORT_JIT - -(void)code; -(void)options; -return PCRE2_ERROR_JIT_BADOPTION; - -#else /* SUPPORT_JIT */ - pcre2_real_code *re = (pcre2_real_code *)code; executable_functions *functions; -uint32_t excluded_options; -int result; if (code == NULL) return PCRE2_ERROR_NULL; if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0) return PCRE2_ERROR_JIT_BADOPTION; + +functions = (executable_functions *)re->executable_jit; -if ((re->flags & PCRE2_NOJIT) != 0) return 0; +/* Support for invalid UTF was first introduced in JIT, with the option +PCRE2_JIT_INVALID_UTF. Later, support was added to the interpreter, and the +compile-time option PCRE2_MATCH_INVALID_UTF was created. This is now the +preferred feature, with the earlier option deprecated. However, for backward +compatibility, if the earlier option is set, it forces the new option so that +if JIT matching falls back to the interpreter, there is still support for +invalid UTF. However, if this function has already been successfully called +without PCRE2_JIT_INVALID_UTF and without PCRE2_MATCH_INVALID_UTF (meaning that +non-invalid-supporting JIT code was compiled), give an error. + +If in the future support for PCRE2_JIT_INVALID_UTF is withdrawn, the following +actions are needed: + + 1. Remove the definition from pcre2.h.in and from the list in + PUBLIC_JIT_COMPILE_OPTIONS above. + + 2. Replace PCRE2_JIT_INVALID_UTF with a local flag in this module. + + 3. Replace PCRE2_JIT_INVALID_UTF in pcre2_jit_test.c. + + 4. Delete the following short block of code. The setting of "re" and + "functions" can be moved into the JIT-only block below, but if that is + done, (void)re and (void)functions will be needed in the non-JIT case, to + avoid compiler warnings. +*/ -functions = (executable_functions *)re->executable_jit; +if ((options & PCRE2_JIT_INVALID_UTF) != 0) + { + if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) == 0) + { + if (functions != NULL) return PCRE2_ERROR_JIT_BADOPTION; + re->overall_options |= PCRE2_MATCH_INVALID_UTF; + } + } + +/* The above tests are run with and without JIT support. This means that +PCRE2_JIT_INVALID_UTF propagates back into the regex options (ensuring +interpreter support) even in the absence of JIT. But now, if there is no JIT +support, give an error return. */ +#ifndef SUPPORT_JIT +return PCRE2_ERROR_JIT_BADOPTION; +#else /* SUPPORT_JIT */ + +/* There is JIT support. Do the necessary. */ + +if ((re->flags & PCRE2_NOJIT) != 0) return 0; +if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0) + options |= PCRE2_JIT_INVALID_UTF; + if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL || functions->executable_funcs[0] == NULL)) { - excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD); - result = jit_compile(code, options & ~excluded_options); + uint32_t excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD); + int result = jit_compile(code, options & ~excluded_options); if (result != 0) return result; } if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL || functions->executable_funcs[1] == NULL)) { - excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD); - result = jit_compile(code, options & ~excluded_options); + uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD); + int result = jit_compile(code, options & ~excluded_options); if (result != 0) return result; } if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL || functions->executable_funcs[2] == NULL)) { - excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT); - result = jit_compile(code, options & ~excluded_options); + uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT); + int result = jit_compile(code, options & ~excluded_options); if (result != 0) return result; } |