diff options
author | zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2012-04-04 05:35:52 +0000 |
---|---|---|
committer | zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2012-04-04 05:35:52 +0000 |
commit | f8fe154ad9e81a9a85ee4bd3586a68f32e410782 (patch) | |
tree | 78a29be9072d1e8ca2773e12674548a0f2fb32bc | |
parent | 1329e2538aff26f32b3a16b6631b14c618b5dabd (diff) | |
download | pcre-f8fe154ad9e81a9a85ee4bd3586a68f32e410782.tar.gz |
JIT compiler update: fix x86-64 alignemnt issue
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@956 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | sljit/sljitNativeX86_32.c | 14 | ||||
-rw-r--r-- | sljit/sljitNativeX86_64.c | 98 | ||||
-rw-r--r-- | sljit/sljitNativeX86_common.c | 10 |
3 files changed, 57 insertions, 65 deletions
diff --git a/sljit/sljitNativeX86_32.c b/sljit/sljitNativeX86_32.c index 24ec6e0..e955825 100644 --- a/sljit/sljitNativeX86_32.c +++ b/sljit/sljitNativeX86_32.c @@ -149,14 +149,14 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, i #ifdef _WIN32 if (local_size > 1024) { FAIL_IF(emit_do_imm(compiler, 0xb8 + reg_map[SLJIT_TEMPORARY_REG1], local_size)); - FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_touch_stack))); + FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack))); } #endif compiler->local_size = local_size; - if (local_size > 0) - return emit_non_cum_binary(compiler, 0x2b, 0x29, 0x5 << 3, 0x2d, - SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, local_size); + SLJIT_ASSERT(local_size > 0); + return emit_non_cum_binary(compiler, 0x2b, 0x29, 0x5 << 3, 0x2d, + SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, local_size); return SLJIT_SUCCESS; } @@ -199,9 +199,9 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, compiler->flags_saved = 0; FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); - if (compiler->local_size > 0) - FAIL_IF(emit_cum_binary(compiler, 0x03, 0x01, 0x0 << 3, 0x05, - SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, compiler->local_size)); + SLJIT_ASSERT(compiler->local_size > 0); + FAIL_IF(emit_cum_binary(compiler, 0x03, 0x01, 0x0 << 3, 0x05, + SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, compiler->local_size)); size = 2 + (compiler->saveds <= 3 ? compiler->saveds : 3); #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) diff --git a/sljit/sljitNativeX86_64.c b/sljit/sljitNativeX86_64.c index 5b15ccf..480cebc 100644 --- a/sljit/sljitNativeX86_64.c +++ b/sljit/sljitNativeX86_64.c @@ -196,12 +196,11 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, i #endif } - local_size = ((local_size + pushed_size + 16 - 1) & ~(16 - 1)) - pushed_size; -#ifdef _WIN64 - local_size += 4 * sizeof(sljit_w); + local_size = ((local_size + FIXED_LOCALS_OFFSET + pushed_size + 16 - 1) & ~(16 - 1)) - pushed_size; compiler->local_size = local_size; +#ifdef _WIN64 if (local_size > 1024) { - /* Allocate the stack for the function itself. */ + /* Allocate stack for the callback, which grows the stack. */ buf = (sljit_ub*)ensure_buf(compiler, 1 + 4); FAIL_IF(!buf); INC_SIZE(4); @@ -218,36 +217,29 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, i local_size -= 4 * sizeof(sljit_w); } FAIL_IF(emit_load_imm64(compiler, SLJIT_TEMPORARY_REG1, local_size)); - FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_touch_stack))); + FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack))); } -#else - local_size += sizeof(sljit_w); - compiler->local_size = local_size; - if (local_size > 0) { #endif - /* In case of Win64, local_size is always > 4 * sizeof(sljit_w) */ - if (local_size <= 127) { - buf = (sljit_ub*)ensure_buf(compiler, 1 + 4); - FAIL_IF(!buf); - INC_SIZE(4); - *buf++ = REX_W; - *buf++ = 0x83; - *buf++ = 0xc0 | (5 << 3) | 4; - *buf++ = local_size; - } - else { - buf = (sljit_ub*)ensure_buf(compiler, 1 + 7); - FAIL_IF(!buf); - INC_SIZE(7); - *buf++ = REX_W; - *buf++ = 0x81; - *buf++ = 0xc0 | (5 << 3) | 4; - *(sljit_hw*)buf = local_size; - buf += sizeof(sljit_hw); - } -#ifndef _WIN64 + SLJIT_ASSERT(local_size > 0); + if (local_size <= 127) { + buf = (sljit_ub*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!buf); + INC_SIZE(4); + *buf++ = REX_W; + *buf++ = 0x83; + *buf++ = 0xc0 | (5 << 3) | 4; + *buf++ = local_size; + } + else { + buf = (sljit_ub*)ensure_buf(compiler, 1 + 7); + FAIL_IF(!buf); + INC_SIZE(7); + *buf++ = REX_W; + *buf++ = 0x81; + *buf++ = 0xc0 | (5 << 3) | 4; + *(sljit_hw*)buf = local_size; + buf += sizeof(sljit_hw); } -#endif return SLJIT_SUCCESS; } @@ -271,12 +263,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, if (temporaries >= 5) pushed_size += sizeof(sljit_w); #endif - compiler->local_size = ((local_size + pushed_size + 16 - 1) & ~(16 - 1)) - pushed_size; -#ifdef _WIN64 - compiler->local_size += 4 * sizeof(sljit_w); -#else - compiler->local_size += sizeof(sljit_w); -#endif + compiler->local_size = ((local_size + FIXED_LOCALS_OFFSET + pushed_size + 16 - 1) & ~(16 - 1)) - pushed_size; } SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, int op, int src, sljit_w srcw) @@ -291,25 +278,24 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, compiler->flags_saved = 0; FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); - if (compiler->local_size > 0) { - if (compiler->local_size <= 127) { - buf = (sljit_ub*)ensure_buf(compiler, 1 + 4); - FAIL_IF(!buf); - INC_SIZE(4); - *buf++ = REX_W; - *buf++ = 0x83; - *buf++ = 0xc0 | (0 << 3) | 4; - *buf = compiler->local_size; - } - else { - buf = (sljit_ub*)ensure_buf(compiler, 1 + 7); - FAIL_IF(!buf); - INC_SIZE(7); - *buf++ = REX_W; - *buf++ = 0x81; - *buf++ = 0xc0 | (0 << 3) | 4; - *(sljit_hw*)buf = compiler->local_size; - } + SLJIT_ASSERT(compiler->local_size > 0); + if (compiler->local_size <= 127) { + buf = (sljit_ub*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!buf); + INC_SIZE(4); + *buf++ = REX_W; + *buf++ = 0x83; + *buf++ = 0xc0 | (0 << 3) | 4; + *buf = compiler->local_size; + } + else { + buf = (sljit_ub*)ensure_buf(compiler, 1 + 7); + FAIL_IF(!buf); + INC_SIZE(7); + *buf++ = REX_W; + *buf++ = 0x81; + *buf++ = 0xc0 | (0 << 3) | 4; + *(sljit_hw*)buf = compiler->local_size; } size = 1 + compiler->saveds; diff --git a/sljit/sljitNativeX86_common.c b/sljit/sljitNativeX86_common.c index 5370d10..49b9089 100644 --- a/sljit/sljitNativeX86_common.c +++ b/sljit/sljitNativeX86_common.c @@ -457,11 +457,17 @@ static SLJIT_INLINE int emit_restore_flags(struct sljit_compiler *compiler, int #ifdef _WIN32 #include <malloc.h> -static void SLJIT_CALL sljit_touch_stack(sljit_w local_size) +static void SLJIT_CALL sljit_grow_stack(sljit_w local_size) { - /* Workaround for calling _chkstk. */ + /* Workaround for calling the internal _chkstk() function on Windows. + This function touches all 4k pages belongs to the requested stack space, + which size is passed in local_size. This is necessary on Windows where + the stack can only grow in 4k steps. However, this function just burn + CPU cycles if the stack is large enough, but you don't know it in advance. + I think this is a bad design even if it has some reasons. */ alloca(local_size); } + #endif #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |