diff options
author | David Schleef <ds@ginger.bigkitten.com> | 2008-05-27 15:06:30 -0700 |
---|---|---|
committer | David Schleef <ds@ginger.bigkitten.com> | 2008-05-27 15:06:30 -0700 |
commit | 8c46abcb0bf8effe7776894a47c1952864174c71 (patch) | |
tree | e5dee7ade7cac771d4be535801dcbffefe63963d | |
parent | 486e63504ffc558be68effcfffb315d18a0cfb82 (diff) | |
download | liboil-8c46abcb0bf8effe7776894a47c1952864174c71.tar.gz |
[orc] Fixes to get SSE rules working.
-rw-r--r-- | orc/orcprogram-x86.c | 40 | ||||
-rw-r--r-- | orc/orcprogram.c | 6 | ||||
-rw-r--r-- | orc/orcrules-sse.c | 9 | ||||
-rw-r--r-- | orc/x86.c | 90 | ||||
-rw-r--r-- | orc/x86.h | 6 |
5 files changed, 140 insertions, 11 deletions
diff --git a/orc/orcprogram-x86.c b/orc/orcprogram-x86.c index cb811c3..ca70bb2 100644 --- a/orc/orcprogram-x86.c +++ b/orc/orcprogram-x86.c @@ -185,7 +185,7 @@ orc_program_x86_init (OrcProgram *program) program->used_regs[i] = 0; } - program->data_register_class = 2; + program->data_register_class = 3; } void @@ -240,6 +240,19 @@ x86_emit_load_src (OrcProgram *program, OrcVariable *var) case ORC_RULE_MMX_4: x86_emit_mov_memoffset_mmx (program, 8, 0, ptr_reg, var->alloc); break; + case ORC_RULE_SSE_1: + x86_emit_mov_memoffset_reg (program, 2, 0, ptr_reg, X86_ECX); + x86_emit_mov_reg_sse (program, X86_ECX, var->alloc); + break; + case ORC_RULE_SSE_2: + x86_emit_mov_memoffset_sse (program, 4, 0, ptr_reg, var->alloc); + break; + case ORC_RULE_SSE_4: + x86_emit_mov_memoffset_sse (program, 8, 0, ptr_reg, var->alloc); + break; + case ORC_RULE_SSE_8: + x86_emit_mov_memoffset_sse (program, 16, 0, ptr_reg, var->alloc); + break; default: printf("ERROR\n"); } @@ -274,6 +287,23 @@ x86_emit_store_dest (OrcProgram *program, OrcVariable *var) case ORC_RULE_MMX_4: x86_emit_mov_mmx_memoffset (program, 8, var->alloc, 0, ptr_reg); break; + case ORC_RULE_SSE_1: + /* FIXME we might be using ecx twice here */ + if (ptr_reg == X86_ECX) { + printf("ERROR\n"); + } + x86_emit_mov_sse_reg (program, var->alloc, X86_ECX); + x86_emit_mov_reg_memoffset (program, 2, X86_ECX, 0, ptr_reg); + break; + case ORC_RULE_SSE_2: + x86_emit_mov_sse_memoffset (program, 4, var->alloc, 0, ptr_reg); + break; + case ORC_RULE_SSE_4: + x86_emit_mov_sse_memoffset (program, 8, var->alloc, 0, ptr_reg); + break; + case ORC_RULE_SSE_8: + x86_emit_mov_sse_memoffset (program, 16, var->alloc, 0, ptr_reg); + break; default: printf("ERROR\n"); } @@ -302,7 +332,7 @@ orc_program_assemble_x86 (OrcProgram *program) (int)ORC_STRUCT_OFFSET(OrcExecutor,counter1), x86_exec_ptr); x86_emit_je (program, 1); - program->rule_set = ORC_RULE_MMX_1; + program->rule_set = ORC_RULE_SSE_1; program->n_per_loop = 1; program->loop_shift = 0; x86_emit_label (program, 0); @@ -320,9 +350,9 @@ orc_program_assemble_x86 (OrcProgram *program) (int)ORC_STRUCT_OFFSET(OrcExecutor,counter2), x86_exec_ptr); x86_emit_je (program, 3); - program->rule_set = ORC_RULE_MMX_4; - program->n_per_loop = 4; - program->loop_shift = 2; + program->rule_set = ORC_RULE_SSE_8; + program->n_per_loop = 8; + program->loop_shift = 3; x86_emit_label (program, 2); x86_emit_loop (program); x86_emit_dec_memoffset (program, 4, diff --git a/orc/orcprogram.c b/orc/orcprogram.c index b64d41a..52bff06 100644 --- a/orc/orcprogram.c +++ b/orc/orcprogram.c @@ -24,10 +24,10 @@ orc_program_new (void) #if defined(HAVE_POWERPC) p->rule_set = ORC_RULE_ALTIVEC_1; #else - p->rule_set = ORC_RULE_MMX_1; + p->rule_set = ORC_RULE_SSE_8; #endif - p->n_per_loop = 1; - p->loop_shift = 0; + p->n_per_loop = 8; + p->loop_shift = 3; return p; } diff --git a/orc/orcrules-sse.c b/orc/orcrules-sse.c index 2c56449..9f559e0 100644 --- a/orc/orcrules-sse.c +++ b/orc/orcrules-sse.c @@ -29,6 +29,7 @@ sse_emit_loadi_s16 (OrcProgram *p, int reg, int value) x86_emit_mov_imm_reg (p, 4, value, X86_ECX); printf(" movd %%ecx, %%%s\n", x86_get_regname_sse(reg)); + *p->codeptr++ = 0x66; *p->codeptr++ = 0x0f; *p->codeptr++ = 0x6e; x86_emit_modrm_reg (p, X86_ECX, reg); @@ -36,6 +37,7 @@ sse_emit_loadi_s16 (OrcProgram *p, int reg, int value) printf(" pshufw $0, %%%s, %%%s\n", x86_get_regname_sse(reg), x86_get_regname_sse(reg)); + *p->codeptr++ = 0x66; *p->codeptr++ = 0x0f; *p->codeptr++ = 0x70; x86_emit_modrm_reg (p, reg, reg); @@ -57,6 +59,7 @@ sse_rule_add_s16 (OrcProgram *p, void *user, OrcInstruction *insn) x86_get_regname_sse(p->vars[insn->args[2]].alloc), x86_get_regname_sse(p->vars[insn->args[0]].alloc)); + *p->codeptr++ = 0x66; *p->codeptr++ = 0x0f; *p->codeptr++ = 0xfd; x86_emit_modrm_reg (p, p->vars[insn->args[2]].alloc, @@ -70,6 +73,7 @@ sse_rule_sub_s16 (OrcProgram *p, void *user, OrcInstruction *insn) x86_get_regname_sse(p->vars[insn->args[2]].alloc), x86_get_regname_sse(p->vars[insn->args[0]].alloc)); + *p->codeptr++ = 0x66; *p->codeptr++ = 0x0f; *p->codeptr++ = 0xf9; x86_emit_modrm_reg (p, p->vars[insn->args[2]].alloc, @@ -83,6 +87,7 @@ sse_rule_mul_s16 (OrcProgram *p, void *user, OrcInstruction *insn) x86_get_regname_sse(p->vars[insn->args[2]].alloc), x86_get_regname_sse(p->vars[insn->args[0]].alloc)); + *p->codeptr++ = 0x66; *p->codeptr++ = 0x0f; *p->codeptr++ = 0xd5; x86_emit_modrm_reg (p, p->vars[insn->args[2]].alloc, @@ -97,6 +102,7 @@ sse_rule_lshift_s16 (OrcProgram *p, void *user, OrcInstruction *insn) p->vars[insn->args[2]].s16, x86_get_regname_sse(p->vars[insn->args[0]].alloc)); + *p->codeptr++ = 0x66; *p->codeptr++ = 0x0f; *p->codeptr++ = 0x71; x86_emit_modrm_reg (p, p->vars[insn->args[0]].alloc, 6); @@ -107,6 +113,7 @@ sse_rule_lshift_s16 (OrcProgram *p, void *user, OrcInstruction *insn) x86_get_regname_sse(p->vars[insn->args[2]].alloc), x86_get_regname_sse(p->vars[insn->args[0]].alloc)); + *p->codeptr++ = 0x66; *p->codeptr++ = 0x0f; *p->codeptr++ = 0xf1; x86_emit_modrm_reg (p, p->vars[insn->args[0]].alloc, @@ -122,6 +129,7 @@ sse_rule_rshift_s16 (OrcProgram *p, void *user, OrcInstruction *insn) p->vars[insn->args[2]].s16, x86_get_regname_sse(p->vars[insn->args[0]].alloc)); + *p->codeptr++ = 0x66; *p->codeptr++ = 0x0f; *p->codeptr++ = 0x71; x86_emit_modrm_reg (p, p->vars[insn->args[0]].alloc, 4); @@ -132,6 +140,7 @@ sse_rule_rshift_s16 (OrcProgram *p, void *user, OrcInstruction *insn) x86_get_regname_sse(p->vars[insn->args[2]].alloc), x86_get_regname_sse(p->vars[insn->args[0]].alloc)); + *p->codeptr++ = 0x66; *p->codeptr++ = 0x0f; *p->codeptr++ = 0xe1; x86_emit_modrm_reg (p, p->vars[insn->args[0]].alloc, @@ -102,10 +102,12 @@ x86_get_regname_mmx(int i) const char * x86_get_regname_sse(int i) { - static const char *x86_regs[] = { "xmm0", "xmm1", "xmm2", "xmm3", - "xmm4", "xmm5", "xmm6", "xmm7" }; + static const char *x86_regs[] = { + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", + "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" + }; - if (i>=X86_XMM0 && i<X86_XMM0 + 8) return x86_regs[i - X86_XMM0]; + if (i>=X86_XMM0 && i<X86_XMM0 + 16) return x86_regs[i - X86_XMM0]; switch (i) { case 0: return "UNALLOCATED"; @@ -244,6 +246,35 @@ x86_emit_mov_memoffset_mmx (OrcProgram *program, int size, int offset, } void +x86_emit_mov_memoffset_sse (OrcProgram *program, int size, int offset, + int reg1, int reg2) +{ + if (size == 4) { + printf(" movd %d(%%%s), %%%s\n", offset, x86_get_regname_ptr(reg1), + x86_get_regname_sse(reg2)); + *program->codeptr++ = 0x66; + x86_emit_rex(program, 0, reg2, 0, reg1); + *program->codeptr++ = 0x0f; + *program->codeptr++ = 0x6e; + } else if (size == 8) { + printf(" movq %d(%%%s), %%%s\n", offset, x86_get_regname_ptr(reg1), + x86_get_regname_sse(reg2)); + *program->codeptr++ = 0x66; + x86_emit_rex(program, 0, reg2, 0, reg1); + *program->codeptr++ = 0x0f; + *program->codeptr++ = 0x6f; + } else { + printf(" movdqu %d(%%%s), %%%s\n", offset, x86_get_regname_ptr(reg1), + x86_get_regname_sse(reg2)); + x86_emit_rex(program, 0, reg2, 0, reg1); + *program->codeptr++ = 0xf3; + *program->codeptr++ = 0x0f; + *program->codeptr++ = 0x6f; + } + x86_emit_modrm_memoffset (program, reg2, offset, reg1); +} + +void x86_emit_mov_reg_memoffset (OrcProgram *program, int size, int reg1, int offset, int reg2) { @@ -285,6 +316,35 @@ x86_emit_mov_mmx_memoffset (OrcProgram *program, int size, int reg1, int offset, } void +x86_emit_mov_sse_memoffset (OrcProgram *program, int size, int reg1, int offset, + int reg2) +{ + if (size == 4) { + printf(" movd %%%s, %d(%%%s)\n", x86_get_regname_sse(reg1), offset, + x86_get_regname_ptr(reg2)); + *program->codeptr++ = 0x66; + x86_emit_rex(program, 0, reg1, 0, reg2); + *program->codeptr++ = 0x0f; + *program->codeptr++ = 0x7e; + } else if (size == 8) { + printf(" movq %%%s, %d(%%%s)\n", x86_get_regname_sse(reg1), offset, + x86_get_regname_ptr(reg2)); + *program->codeptr++ = 0x66; + x86_emit_rex(program, 0, reg1, 0, reg2); + *program->codeptr++ = 0x0f; + *program->codeptr++ = 0x7f; + } else { + printf(" movdqu %%%s, %d(%%%s)\n", x86_get_regname_sse(reg1), offset, + x86_get_regname_ptr(reg2)); + *program->codeptr++ = 0xf3; + *program->codeptr++ = 0x0f; + *program->codeptr++ = 0x7f; + } + + x86_emit_modrm_memoffset (program, reg1, offset, reg2); +} + +void x86_emit_mov_imm_reg (OrcProgram *program, int size, int value, int reg1) { if (size == 2) { @@ -349,6 +409,30 @@ void x86_emit_mov_mmx_reg (OrcProgram *program, int reg1, int reg2) x86_emit_modrm_reg (program, reg2, reg1); } +void x86_emit_mov_reg_sse (OrcProgram *program, int reg1, int reg2) +{ + /* FIXME */ + printf(" movd %%%s, %%%s\n", x86_get_regname(reg1), + x86_get_regname_sse(reg2)); + *program->codeptr++ = 0x66; + x86_emit_rex(program, 0, reg1, 0, reg2); + *program->codeptr++ = 0x0f; + *program->codeptr++ = 0x6e; + x86_emit_modrm_reg (program, reg1, reg2); +} + +void x86_emit_mov_sse_reg (OrcProgram *program, int reg1, int reg2) +{ + /* FIXME */ + printf(" movd %%%s, %%%s\n", x86_get_regname_sse(reg1), + x86_get_regname(reg2)); + *program->codeptr++ = 0x66; + x86_emit_rex(program, 0, reg2, 0, reg1); + *program->codeptr++ = 0x0f; + *program->codeptr++ = 0x7e; + x86_emit_modrm_reg (program, reg2, reg1); +} + void x86_emit_test_reg_reg (OrcProgram *program, int size, int reg1, int reg2) { @@ -12,13 +12,19 @@ void x86_emit_pop (OrcProgram *program, int size, int reg); void x86_emit_mov_memoffset_reg (OrcProgram *program, int size, int offset, int reg1, int reg2); void x86_emit_mov_memoffset_mmx (OrcProgram *program, int size, int offset, int reg1, int reg2); +void x86_emit_mov_memoffset_sse (OrcProgram *program, int size, int offset, + int reg1, int reg2); void x86_emit_mov_reg_memoffset (OrcProgram *program, int size, int reg1, int offset, int reg2); void x86_emit_mov_mmx_memoffset (OrcProgram *program, int size, int reg1, int offset, int reg2); +void x86_emit_mov_sse_memoffset (OrcProgram *program, int size, int reg1, int offset, + int reg2); void x86_emit_mov_imm_reg (OrcProgram *program, int size, int value, int reg1); void x86_emit_mov_reg_reg (OrcProgram *program, int size, int reg1, int reg2); void x86_emit_mov_reg_mmx (OrcProgram *program, int reg1, int reg2); void x86_emit_mov_mmx_reg (OrcProgram *program, int reg1, int reg2); +void x86_emit_mov_reg_sse (OrcProgram *program, int reg1, int reg2); +void x86_emit_mov_sse_reg (OrcProgram *program, int reg1, int reg2); void x86_emit_test_reg_reg (OrcProgram *program, int size, int reg1, int reg2); void x86_emit_sar_imm_reg (OrcProgram *program, int size, int value, int reg); void x86_emit_dec_memoffset (OrcProgram *program, int size, int offset, int reg); |