summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzherczeg <zherczeg@6239d852-aaf2-0410-a92c-79f79f948069>2021-01-09 13:41:29 +0000
committerzherczeg <zherczeg@6239d852-aaf2-0410-a92c-79f79f948069>2021-01-09 13:41:29 +0000
commit7a60c5a0ee9e922750b217dd45e5731ca8800ae8 (patch)
tree8048c3708f2f678747d935969834114682f8f59e
parent997c5c6884b3683ff136f4f9e524b30cccc02867 (diff)
downloadpcre2-7a60c5a0ee9e922750b217dd45e5731ca8800ae8.tar.gz
Add fast forward char pair simd support for s390x.
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@1291 6239d852-aaf2-0410-a92c-79f79f948069
-rw-r--r--src/pcre2_jit_simd_inc.h351
1 files changed, 302 insertions, 49 deletions
diff --git a/src/pcre2_jit_simd_inc.h b/src/pcre2_jit_simd_inc.h
index b2c90a9..b19e021 100644
--- a/src/pcre2_jit_simd_inc.h
+++ b/src/pcre2_jit_simd_inc.h
@@ -39,9 +39,10 @@ POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
-#if !(defined SUPPORT_VALGRIND) && \
- ((defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
- || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X))
+#if !(defined SUPPORT_VALGRIND)
+
+#if ((defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
+ || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X))
typedef enum {
vector_compare_match1,
@@ -49,6 +50,19 @@ typedef enum {
vector_compare_match2,
} vector_compare_type;
+static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void)
+{
+#if PCRE2_CODE_UNIT_WIDTH == 8
+return 15;
+#elif PCRE2_CODE_UNIT_WIDTH == 16
+return 7;
+#elif PCRE2_CODE_UNIT_WIDTH == 32
+return 3;
+#else
+#error "Unsupported unit width"
+#endif
+}
+
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
static struct sljit_jump *jump_if_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg)
{
@@ -64,9 +78,9 @@ return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00);
}
#endif
-#endif /* SUPPORT_VALGRIND && (SLJIT_CONFIG_X86 || SLJIT_CONFIG_S390X) */
+#endif /* SLJIT_CONFIG_X86 || SLJIT_CONFIG_S390X */
-#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
+#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
static sljit_s32 character_to_int32(PCRE2_UCHAR chr)
{
@@ -191,6 +205,7 @@ switch (step)
static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
{
DEFINE_COMPILER;
+sljit_u8 instruction[8];
struct sljit_label *start;
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
struct sljit_label *restart;
@@ -198,7 +213,6 @@ struct sljit_label *restart;
struct sljit_jump *quit;
struct sljit_jump *partial_quit[2];
vector_compare_type compare_type = vector_compare_match1;
-sljit_u8 instruction[8];
sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1);
sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR);
sljit_s32 data_ind = 0;
@@ -355,11 +369,11 @@ if (common->utf && offset > 0)
static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2)
{
DEFINE_COMPILER;
+sljit_u8 instruction[8];
struct sljit_label *start;
struct sljit_jump *quit;
jump_list *not_found = NULL;
vector_compare_type compare_type = vector_compare_match1;
-sljit_u8 instruction[8];
sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1);
sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR);
sljit_s32 data_ind = 0;
@@ -482,25 +496,13 @@ return not_found;
#ifndef _WIN64
-static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void)
-{
-#if PCRE2_CODE_UNIT_WIDTH == 8
-return 15;
-#elif PCRE2_CODE_UNIT_WIDTH == 16
-return 7;
-#elif PCRE2_CODE_UNIT_WIDTH == 32
-return 3;
-#else
-#error "Unsupported unit width"
-#endif
-}
-
#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
{
DEFINE_COMPILER;
+sljit_u8 instruction[8];
vector_compare_type compare1_type = vector_compare_match1;
vector_compare_type compare2_type = vector_compare_match1;
sljit_u32 bit1 = 0;
@@ -522,7 +524,6 @@ struct sljit_label *start;
struct sljit_label *restart;
#endif
struct sljit_jump *jump[2];
-sljit_u8 instruction[8];
int i;
SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
@@ -737,9 +738,6 @@ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
-if (common->match_end_ptr != 0)
- OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
-
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
if (common->utf)
{
@@ -766,7 +764,7 @@ if (common->match_end_ptr != 0)
#undef SSE2_COMPARE_TYPE_INDEX
-#endif /* SLJIT_CONFIG_X86 && !SUPPORT_VALGRIND */
+#endif /* SLJIT_CONFIG_X86 */
#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 && (defined __ARM_NEON || defined __ARM_NEON__))
@@ -1128,9 +1126,7 @@ JUMPHERE(partial_quit);
#endif /* SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 */
-#if (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) && !(defined SUPPORT_VALGRIND)
-
-#define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1
+#if (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
#if PCRE2_CODE_UNIT_WIDTH == 8
#define VECTOR_ELEMENT_SIZE 0
@@ -1142,13 +1138,14 @@ JUMPHERE(partial_quit);
#error "Unsupported unit width"
#endif
-static void load_from_mem_vector(struct sljit_compiler *compiler, BOOL vlbb, sljit_s32 dst_vreg, sljit_s32 src_general_reg)
+static void load_from_mem_vector(struct sljit_compiler *compiler, BOOL vlbb, sljit_s32 dst_vreg,
+ sljit_s32 base_reg, sljit_s32 index_reg)
{
sljit_u16 instruction[3];
-instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4));
-instruction[1] = (sljit_u16)(src_general_reg << 12);
-instruction[2] = (sljit_u16)(vlbb ? ((6 << 12) | (0x8 << 8) | 0x07) : ((0x8 << 8) | 0x06));
+instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | index_reg);
+instruction[1] = (sljit_u16)(base_reg << 12);
+instruction[2] = (sljit_u16)((0x8 << 8) | (vlbb ? 0x07 : 0x06));
sljit_emit_op_custom(compiler, instruction, 6);
}
@@ -1246,9 +1243,12 @@ switch (step)
}
}
+#define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1
+
static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
{
DEFINE_COMPILER;
+sljit_u16 instruction[3];
struct sljit_label *start;
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
struct sljit_label *restart;
@@ -1256,7 +1256,6 @@ struct sljit_label *restart;
struct sljit_jump *quit;
struct sljit_jump *partial_quit[2];
vector_compare_type compare_type = vector_compare_match1;
-sljit_u16 instruction[3];
sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1);
sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR);
sljit_s32 data_ind = 0;
@@ -1302,34 +1301,36 @@ if (char1 != char2)
/* VREPI */
instruction[0] = (sljit_u16)(0xe700 | (cmp2_ind << 4));
instruction[1] = (sljit_u16)(bit != 0 ? bit : char2);
+ /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
sljit_emit_op_custom(compiler, instruction, 6);
}
#else /* PCRE2_CODE_UNIT_WIDTH == 32 */
-replicate_imm_vector(compiler, 0, cmp1_ind, char1 | bit, TMP1);
-
-if (char1 != char2)
- replicate_imm_vector(compiler, 0, cmp2_ind, bit != 0 ? bit : char2, TMP1);
-
-replicate_imm_vector(compiler, 1, cmp1_ind, char1 | bit, TMP1);
+for (int i = 0; i < 2; i++)
+ {
+ replicate_imm_vector(compiler, i, cmp1_ind, char1 | bit, TMP1);
-if (char1 != char2)
- replicate_imm_vector(compiler, 1, cmp2_ind, bit != 0 ? bit : char2, TMP1);
+ if (char1 != char2)
+ replicate_imm_vector(compiler, i, cmp2_ind, bit != 0 ? bit : char2, TMP1);
+ }
#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
-/* VX */
-instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4) | zero_ind);
-instruction[1] = (sljit_u16)(zero_ind << 12);
-instruction[2] = (sljit_u16)((0xe << 8) | 0x6d);
-sljit_emit_op_custom(compiler, instruction, 6);
+if (compare_type == vector_compare_match2)
+ {
+ /* VREPI */
+ instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4));
+ instruction[1] = 0;
+ instruction[2] = (sljit_u16)((0x8 << 8) | 0x45);
+ sljit_emit_op_custom(compiler, instruction, 6);
+ }
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
restart = LABEL();
#endif
-load_from_mem_vector(compiler, TRUE, data_ind, str_ptr_reg_ind);
+load_from_mem_vector(compiler, TRUE, data_ind, str_ptr_reg_ind, 0);
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, ~15);
if (compare_type != vector_compare_match2)
@@ -1377,7 +1378,7 @@ partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
if (common->mode == PCRE2_JIT_COMPLETE)
add_jump(compiler, &common->failed_match, partial_quit[1]);
-load_from_mem_vector(compiler, TRUE, data_ind, str_ptr_reg_ind);
+load_from_mem_vector(compiler, TRUE, data_ind, str_ptr_reg_ind, 0);
if (compare_type != vector_compare_match2)
{
@@ -1446,4 +1447,256 @@ if (common->utf && offset > 0)
#endif
}
-#endif /* SLJIT_CONFIG_S390X && !SUPPORT_VALGRIND */
+#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD 1
+
+static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
+ PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
+{
+DEFINE_COMPILER;
+sljit_u16 instruction[3];
+struct sljit_label *start;
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+struct sljit_label *restart;
+#endif
+struct sljit_jump *quit;
+struct sljit_jump *jump[2];
+vector_compare_type compare1_type = vector_compare_match1;
+vector_compare_type compare2_type = vector_compare_match1;
+sljit_u32 bit1 = 0;
+sljit_u32 bit2 = 0;
+sljit_s32 diff = IN_UCHARS(offs2 - offs1);
+sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1);
+sljit_s32 tmp2_reg_ind = sljit_get_register_index(TMP2);
+sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR);
+sljit_s32 data1_ind = 0;
+sljit_s32 data2_ind = 1;
+sljit_s32 tmp1_ind = 2;
+sljit_s32 tmp2_ind = 3;
+sljit_s32 cmp1a_ind = 4;
+sljit_s32 cmp1b_ind = 5;
+sljit_s32 cmp2a_ind = 6;
+sljit_s32 cmp2b_ind = 7;
+sljit_s32 zero_ind = 8;
+int i;
+
+SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
+SLJIT_ASSERT(-diff <= (sljit_s32)IN_UCHARS(max_fast_forward_char_pair_offset()));
+SLJIT_ASSERT(tmp1_reg_ind != 0 && tmp2_reg_ind != 0);
+
+if (char1a != char1b)
+ {
+ bit1 = char1a ^ char1b;
+ compare1_type = vector_compare_match1i;
+
+ if (!is_powerof2(bit1))
+ {
+ bit1 = 0;
+ compare1_type = vector_compare_match2;
+ }
+ }
+
+if (char2a != char2b)
+ {
+ bit2 = char2a ^ char2b;
+ compare2_type = vector_compare_match1i;
+
+ if (!is_powerof2(bit2))
+ {
+ bit2 = 0;
+ compare2_type = vector_compare_match2;
+ }
+ }
+
+/* Initialize. */
+if (common->match_end_ptr != 0)
+ {
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
+ OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
+ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
+
+ OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0);
+ CMOV(SLJIT_LESS, STR_END, TMP1, 0);
+ }
+
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
+add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, ~15);
+
+#if PCRE2_CODE_UNIT_WIDTH != 32
+
+OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff);
+
+/* VREPI */
+instruction[0] = (sljit_u16)(0xe700 | (cmp1a_ind << 4));
+instruction[1] = (sljit_u16)(char1a | bit1);
+instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45);
+sljit_emit_op_custom(compiler, instruction, 6);
+
+if (char1a != char1b)
+ {
+ /* VREPI */
+ instruction[0] = (sljit_u16)(0xe700 | (cmp1b_ind << 4));
+ instruction[1] = (sljit_u16)(bit1 != 0 ? bit1 : char1b);
+ /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
+ sljit_emit_op_custom(compiler, instruction, 6);
+ }
+
+/* VREPI */
+instruction[0] = (sljit_u16)(0xe700 | (cmp2a_ind << 4));
+instruction[1] = (sljit_u16)(char2a | bit2);
+/* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
+sljit_emit_op_custom(compiler, instruction, 6);
+
+if (char2a != char2b)
+ {
+ /* VREPI */
+ instruction[0] = (sljit_u16)(0xe700 | (cmp2b_ind << 4));
+ instruction[1] = (sljit_u16)(bit2 != 0 ? bit2 : char2b);
+ /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */
+ sljit_emit_op_custom(compiler, instruction, 6);
+ }
+
+#else /* PCRE2_CODE_UNIT_WIDTH == 32 */
+
+for (int i = 0; i < 2; i++)
+ {
+ replicate_imm_vector(compiler, i, cmp1a_ind, char1a | bit1, TMP1);
+
+ if (char1a != char1b)
+ replicate_imm_vector(compiler, i, cmp1b_ind, bit1 != 0 ? bit1 : char1b, TMP1);
+
+ replicate_imm_vector(compiler, i, cmp2a_ind, char2a | bit2, TMP1);
+
+ if (char2a != char2b)
+ replicate_imm_vector(compiler, i, cmp2b_ind, bit2 != 0 ? bit2 : char2b, TMP1);
+ }
+
+OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff);
+
+#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
+
+/* VREPI */
+instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4));
+instruction[1] = 0;
+instruction[2] = (sljit_u16)((0x8 << 8) | 0x45);
+sljit_emit_op_custom(compiler, instruction, 6);
+
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+restart = LABEL();
+#endif
+
+jump[0] = CMP(SLJIT_LESS, TMP1, 0, TMP2, 0);
+load_from_mem_vector(compiler, TRUE, data2_ind, tmp1_reg_ind, 0);
+jump[1] = JUMP(SLJIT_JUMP);
+JUMPHERE(jump[0]);
+load_from_mem_vector(compiler, FALSE, data2_ind, tmp1_reg_ind, 0);
+JUMPHERE(jump[1]);
+
+load_from_mem_vector(compiler, TRUE, data1_ind, str_ptr_reg_ind, 0);
+OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 16);
+
+for (i = 0; i < 3; i++)
+ {
+ fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
+ fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
+ }
+
+/* VN */
+instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
+instruction[1] = (sljit_u16)(data2_ind << 12);
+instruction[2] = (sljit_u16)((0xe << 8) | 0x68);
+sljit_emit_op_custom(compiler, instruction, 6);
+
+/* VFENE */
+instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
+instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
+instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
+sljit_emit_op_custom(compiler, instruction, 6);
+
+/* TODO: use sljit_set_current_flags */
+
+/* VLGVB */
+instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data1_ind);
+instruction[1] = 7;
+instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
+sljit_emit_op_custom(compiler, instruction, 6);
+
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+quit = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
+
+OP2(SLJIT_SUB, STR_PTR, 0, TMP2, 0, SLJIT_IMM, 16);
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, diff);
+
+/* Main loop. */
+start = LABEL();
+
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
+add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+
+load_from_mem_vector(compiler, FALSE, data1_ind, str_ptr_reg_ind, 0);
+load_from_mem_vector(compiler, FALSE, data2_ind, str_ptr_reg_ind, tmp1_reg_ind);
+
+for (i = 0; i < 3; i++)
+ {
+ fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
+ fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
+ }
+
+/* VN */
+instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
+instruction[1] = (sljit_u16)(data2_ind << 12);
+instruction[2] = (sljit_u16)((0xe << 8) | 0x68);
+sljit_emit_op_custom(compiler, instruction, 6);
+
+/* VFENE */
+instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind);
+instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4));
+instruction[2] = (sljit_u16)((0xe << 8) | 0x81);
+sljit_emit_op_custom(compiler, instruction, 6);
+
+/* TODO: use sljit_set_current_flags */
+
+/* VLGVB */
+instruction[0] = (sljit_u16)(0xe700 | (tmp2_reg_ind << 4) | data1_ind);
+instruction[1] = 7;
+instruction[2] = (sljit_u16)((0x4 << 8) | 0x21);
+sljit_emit_op_custom(compiler, instruction, 6);
+
+CMPTO(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 16, start);
+
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+
+JUMPHERE(quit);
+
+add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+if (common->utf)
+ {
+ SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
+
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1));
+
+ quit = jump_if_utf_char_start(compiler, TMP1);
+
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+ add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+
+ /* TMP1 contains diff. */
+ OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, ~15);
+ OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff);
+ JUMPTO(SLJIT_JUMP, restart);
+
+ JUMPHERE(quit);
+ }
+#endif
+
+OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
+
+if (common->match_end_ptr != 0)
+ OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
+}
+
+#endif /* SLJIT_CONFIG_S390X */
+
+#endif /* !SUPPORT_VALGRIND */