summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Schleef <ds@ginger.bigkitten.com>2008-05-27 15:06:30 -0700
committerDavid Schleef <ds@ginger.bigkitten.com>2008-05-27 15:06:30 -0700
commit8c46abcb0bf8effe7776894a47c1952864174c71 (patch)
treee5dee7ade7cac771d4be535801dcbffefe63963d
parent486e63504ffc558be68effcfffb315d18a0cfb82 (diff)
downloadliboil-8c46abcb0bf8effe7776894a47c1952864174c71.tar.gz
[orc] Fixes to get SSE rules working.
-rw-r--r--orc/orcprogram-x86.c40
-rw-r--r--orc/orcprogram.c6
-rw-r--r--orc/orcrules-sse.c9
-rw-r--r--orc/x86.c90
-rw-r--r--orc/x86.h6
5 files changed, 140 insertions, 11 deletions
diff --git a/orc/orcprogram-x86.c b/orc/orcprogram-x86.c
index cb811c3..ca70bb2 100644
--- a/orc/orcprogram-x86.c
+++ b/orc/orcprogram-x86.c
@@ -185,7 +185,7 @@ orc_program_x86_init (OrcProgram *program)
program->used_regs[i] = 0;
}
- program->data_register_class = 2;
+ program->data_register_class = 3;
}
void
@@ -240,6 +240,19 @@ x86_emit_load_src (OrcProgram *program, OrcVariable *var)
case ORC_RULE_MMX_4:
x86_emit_mov_memoffset_mmx (program, 8, 0, ptr_reg, var->alloc);
break;
+ case ORC_RULE_SSE_1:
+ x86_emit_mov_memoffset_reg (program, 2, 0, ptr_reg, X86_ECX);
+ x86_emit_mov_reg_sse (program, X86_ECX, var->alloc);
+ break;
+ case ORC_RULE_SSE_2:
+ x86_emit_mov_memoffset_sse (program, 4, 0, ptr_reg, var->alloc);
+ break;
+ case ORC_RULE_SSE_4:
+ x86_emit_mov_memoffset_sse (program, 8, 0, ptr_reg, var->alloc);
+ break;
+ case ORC_RULE_SSE_8:
+ x86_emit_mov_memoffset_sse (program, 16, 0, ptr_reg, var->alloc);
+ break;
default:
printf("ERROR\n");
}
@@ -274,6 +287,23 @@ x86_emit_store_dest (OrcProgram *program, OrcVariable *var)
case ORC_RULE_MMX_4:
x86_emit_mov_mmx_memoffset (program, 8, var->alloc, 0, ptr_reg);
break;
+ case ORC_RULE_SSE_1:
+ /* FIXME we might be using ecx twice here */
+ if (ptr_reg == X86_ECX) {
+ printf("ERROR\n");
+ }
+ x86_emit_mov_sse_reg (program, var->alloc, X86_ECX);
+ x86_emit_mov_reg_memoffset (program, 2, X86_ECX, 0, ptr_reg);
+ break;
+ case ORC_RULE_SSE_2:
+ x86_emit_mov_sse_memoffset (program, 4, var->alloc, 0, ptr_reg);
+ break;
+ case ORC_RULE_SSE_4:
+ x86_emit_mov_sse_memoffset (program, 8, var->alloc, 0, ptr_reg);
+ break;
+ case ORC_RULE_SSE_8:
+ x86_emit_mov_sse_memoffset (program, 16, var->alloc, 0, ptr_reg);
+ break;
default:
printf("ERROR\n");
}
@@ -302,7 +332,7 @@ orc_program_assemble_x86 (OrcProgram *program)
(int)ORC_STRUCT_OFFSET(OrcExecutor,counter1), x86_exec_ptr);
x86_emit_je (program, 1);
- program->rule_set = ORC_RULE_MMX_1;
+ program->rule_set = ORC_RULE_SSE_1;
program->n_per_loop = 1;
program->loop_shift = 0;
x86_emit_label (program, 0);
@@ -320,9 +350,9 @@ orc_program_assemble_x86 (OrcProgram *program)
(int)ORC_STRUCT_OFFSET(OrcExecutor,counter2), x86_exec_ptr);
x86_emit_je (program, 3);
- program->rule_set = ORC_RULE_MMX_4;
- program->n_per_loop = 4;
- program->loop_shift = 2;
+ program->rule_set = ORC_RULE_SSE_8;
+ program->n_per_loop = 8;
+ program->loop_shift = 3;
x86_emit_label (program, 2);
x86_emit_loop (program);
x86_emit_dec_memoffset (program, 4,
diff --git a/orc/orcprogram.c b/orc/orcprogram.c
index b64d41a..52bff06 100644
--- a/orc/orcprogram.c
+++ b/orc/orcprogram.c
@@ -24,10 +24,10 @@ orc_program_new (void)
#if defined(HAVE_POWERPC)
p->rule_set = ORC_RULE_ALTIVEC_1;
#else
- p->rule_set = ORC_RULE_MMX_1;
+ p->rule_set = ORC_RULE_SSE_8;
#endif
- p->n_per_loop = 1;
- p->loop_shift = 0;
+ p->n_per_loop = 8;
+ p->loop_shift = 3;
return p;
}
diff --git a/orc/orcrules-sse.c b/orc/orcrules-sse.c
index 2c56449..9f559e0 100644
--- a/orc/orcrules-sse.c
+++ b/orc/orcrules-sse.c
@@ -29,6 +29,7 @@ sse_emit_loadi_s16 (OrcProgram *p, int reg, int value)
x86_emit_mov_imm_reg (p, 4, value, X86_ECX);
printf(" movd %%ecx, %%%s\n", x86_get_regname_sse(reg));
+ *p->codeptr++ = 0x66;
*p->codeptr++ = 0x0f;
*p->codeptr++ = 0x6e;
x86_emit_modrm_reg (p, X86_ECX, reg);
@@ -36,6 +37,7 @@ sse_emit_loadi_s16 (OrcProgram *p, int reg, int value)
printf(" pshufw $0, %%%s, %%%s\n", x86_get_regname_sse(reg),
x86_get_regname_sse(reg));
+ *p->codeptr++ = 0x66;
*p->codeptr++ = 0x0f;
*p->codeptr++ = 0x70;
x86_emit_modrm_reg (p, reg, reg);
@@ -57,6 +59,7 @@ sse_rule_add_s16 (OrcProgram *p, void *user, OrcInstruction *insn)
x86_get_regname_sse(p->vars[insn->args[2]].alloc),
x86_get_regname_sse(p->vars[insn->args[0]].alloc));
+ *p->codeptr++ = 0x66;
*p->codeptr++ = 0x0f;
*p->codeptr++ = 0xfd;
x86_emit_modrm_reg (p, p->vars[insn->args[2]].alloc,
@@ -70,6 +73,7 @@ sse_rule_sub_s16 (OrcProgram *p, void *user, OrcInstruction *insn)
x86_get_regname_sse(p->vars[insn->args[2]].alloc),
x86_get_regname_sse(p->vars[insn->args[0]].alloc));
+ *p->codeptr++ = 0x66;
*p->codeptr++ = 0x0f;
*p->codeptr++ = 0xf9;
x86_emit_modrm_reg (p, p->vars[insn->args[2]].alloc,
@@ -83,6 +87,7 @@ sse_rule_mul_s16 (OrcProgram *p, void *user, OrcInstruction *insn)
x86_get_regname_sse(p->vars[insn->args[2]].alloc),
x86_get_regname_sse(p->vars[insn->args[0]].alloc));
+ *p->codeptr++ = 0x66;
*p->codeptr++ = 0x0f;
*p->codeptr++ = 0xd5;
x86_emit_modrm_reg (p, p->vars[insn->args[2]].alloc,
@@ -97,6 +102,7 @@ sse_rule_lshift_s16 (OrcProgram *p, void *user, OrcInstruction *insn)
p->vars[insn->args[2]].s16,
x86_get_regname_sse(p->vars[insn->args[0]].alloc));
+ *p->codeptr++ = 0x66;
*p->codeptr++ = 0x0f;
*p->codeptr++ = 0x71;
x86_emit_modrm_reg (p, p->vars[insn->args[0]].alloc, 6);
@@ -107,6 +113,7 @@ sse_rule_lshift_s16 (OrcProgram *p, void *user, OrcInstruction *insn)
x86_get_regname_sse(p->vars[insn->args[2]].alloc),
x86_get_regname_sse(p->vars[insn->args[0]].alloc));
+ *p->codeptr++ = 0x66;
*p->codeptr++ = 0x0f;
*p->codeptr++ = 0xf1;
x86_emit_modrm_reg (p, p->vars[insn->args[0]].alloc,
@@ -122,6 +129,7 @@ sse_rule_rshift_s16 (OrcProgram *p, void *user, OrcInstruction *insn)
p->vars[insn->args[2]].s16,
x86_get_regname_sse(p->vars[insn->args[0]].alloc));
+ *p->codeptr++ = 0x66;
*p->codeptr++ = 0x0f;
*p->codeptr++ = 0x71;
x86_emit_modrm_reg (p, p->vars[insn->args[0]].alloc, 4);
@@ -132,6 +140,7 @@ sse_rule_rshift_s16 (OrcProgram *p, void *user, OrcInstruction *insn)
x86_get_regname_sse(p->vars[insn->args[2]].alloc),
x86_get_regname_sse(p->vars[insn->args[0]].alloc));
+ *p->codeptr++ = 0x66;
*p->codeptr++ = 0x0f;
*p->codeptr++ = 0xe1;
x86_emit_modrm_reg (p, p->vars[insn->args[0]].alloc,
diff --git a/orc/x86.c b/orc/x86.c
index 7ef3878..55de4b3 100644
--- a/orc/x86.c
+++ b/orc/x86.c
@@ -102,10 +102,12 @@ x86_get_regname_mmx(int i)
const char *
x86_get_regname_sse(int i)
{
- static const char *x86_regs[] = { "xmm0", "xmm1", "xmm2", "xmm3",
- "xmm4", "xmm5", "xmm6", "xmm7" };
+ static const char *x86_regs[] = {
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
+ "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
+ };
- if (i>=X86_XMM0 && i<X86_XMM0 + 8) return x86_regs[i - X86_XMM0];
+ if (i>=X86_XMM0 && i<X86_XMM0 + 16) return x86_regs[i - X86_XMM0];
switch (i) {
case 0:
return "UNALLOCATED";
@@ -244,6 +246,35 @@ x86_emit_mov_memoffset_mmx (OrcProgram *program, int size, int offset,
}
void
+x86_emit_mov_memoffset_sse (OrcProgram *program, int size, int offset,
+ int reg1, int reg2)
+{
+ if (size == 4) {
+ printf(" movd %d(%%%s), %%%s\n", offset, x86_get_regname_ptr(reg1),
+ x86_get_regname_sse(reg2));
+ *program->codeptr++ = 0x66;
+ x86_emit_rex(program, 0, reg2, 0, reg1);
+ *program->codeptr++ = 0x0f;
+ *program->codeptr++ = 0x6e;
+ } else if (size == 8) {
+ printf(" movq %d(%%%s), %%%s\n", offset, x86_get_regname_ptr(reg1),
+ x86_get_regname_sse(reg2));
+ *program->codeptr++ = 0x66;
+ x86_emit_rex(program, 0, reg2, 0, reg1);
+ *program->codeptr++ = 0x0f;
+ *program->codeptr++ = 0x6f;
+ } else {
+ printf(" movdqu %d(%%%s), %%%s\n", offset, x86_get_regname_ptr(reg1),
+ x86_get_regname_sse(reg2));
+ x86_emit_rex(program, 0, reg2, 0, reg1);
+ *program->codeptr++ = 0xf3;
+ *program->codeptr++ = 0x0f;
+ *program->codeptr++ = 0x6f;
+ }
+ x86_emit_modrm_memoffset (program, reg2, offset, reg1);
+}
+
+void
x86_emit_mov_reg_memoffset (OrcProgram *program, int size, int reg1, int offset,
int reg2)
{
@@ -285,6 +316,35 @@ x86_emit_mov_mmx_memoffset (OrcProgram *program, int size, int reg1, int offset,
}
void
+x86_emit_mov_sse_memoffset (OrcProgram *program, int size, int reg1, int offset,
+ int reg2)
+{
+ if (size == 4) {
+ printf(" movd %%%s, %d(%%%s)\n", x86_get_regname_sse(reg1), offset,
+ x86_get_regname_ptr(reg2));
+ *program->codeptr++ = 0x66;
+ x86_emit_rex(program, 0, reg1, 0, reg2);
+ *program->codeptr++ = 0x0f;
+ *program->codeptr++ = 0x7e;
+ } else if (size == 8) {
+ printf(" movq %%%s, %d(%%%s)\n", x86_get_regname_sse(reg1), offset,
+ x86_get_regname_ptr(reg2));
+ *program->codeptr++ = 0x66;
+ x86_emit_rex(program, 0, reg1, 0, reg2);
+ *program->codeptr++ = 0x0f;
+ *program->codeptr++ = 0x7f;
+ } else {
+ printf(" movdqu %%%s, %d(%%%s)\n", x86_get_regname_sse(reg1), offset,
+ x86_get_regname_ptr(reg2));
+ *program->codeptr++ = 0xf3;
+ *program->codeptr++ = 0x0f;
+ *program->codeptr++ = 0x7f;
+ }
+
+ x86_emit_modrm_memoffset (program, reg1, offset, reg2);
+}
+
+void
x86_emit_mov_imm_reg (OrcProgram *program, int size, int value, int reg1)
{
if (size == 2) {
@@ -349,6 +409,30 @@ void x86_emit_mov_mmx_reg (OrcProgram *program, int reg1, int reg2)
x86_emit_modrm_reg (program, reg2, reg1);
}
+void x86_emit_mov_reg_sse (OrcProgram *program, int reg1, int reg2)
+{
+ /* FIXME */
+ printf(" movd %%%s, %%%s\n", x86_get_regname(reg1),
+ x86_get_regname_sse(reg2));
+ *program->codeptr++ = 0x66;
+ x86_emit_rex(program, 0, reg1, 0, reg2);
+ *program->codeptr++ = 0x0f;
+ *program->codeptr++ = 0x6e;
+ x86_emit_modrm_reg (program, reg1, reg2);
+}
+
+void x86_emit_mov_sse_reg (OrcProgram *program, int reg1, int reg2)
+{
+ /* FIXME */
+ printf(" movd %%%s, %%%s\n", x86_get_regname_sse(reg1),
+ x86_get_regname(reg2));
+ *program->codeptr++ = 0x66;
+ x86_emit_rex(program, 0, reg2, 0, reg1);
+ *program->codeptr++ = 0x0f;
+ *program->codeptr++ = 0x7e;
+ x86_emit_modrm_reg (program, reg2, reg1);
+}
+
void
x86_emit_test_reg_reg (OrcProgram *program, int size, int reg1, int reg2)
{
diff --git a/orc/x86.h b/orc/x86.h
index a844a32..dc47b11 100644
--- a/orc/x86.h
+++ b/orc/x86.h
@@ -12,13 +12,19 @@ void x86_emit_pop (OrcProgram *program, int size, int reg);
void x86_emit_mov_memoffset_reg (OrcProgram *program, int size, int offset, int reg1, int reg2);
void x86_emit_mov_memoffset_mmx (OrcProgram *program, int size, int offset,
int reg1, int reg2);
+void x86_emit_mov_memoffset_sse (OrcProgram *program, int size, int offset,
+ int reg1, int reg2);
void x86_emit_mov_reg_memoffset (OrcProgram *program, int size, int reg1, int offset, int reg2);
void x86_emit_mov_mmx_memoffset (OrcProgram *program, int size, int reg1, int offset,
int reg2);
+void x86_emit_mov_sse_memoffset (OrcProgram *program, int size, int reg1, int offset,
+ int reg2);
void x86_emit_mov_imm_reg (OrcProgram *program, int size, int value, int reg1);
void x86_emit_mov_reg_reg (OrcProgram *program, int size, int reg1, int reg2);
void x86_emit_mov_reg_mmx (OrcProgram *program, int reg1, int reg2);
void x86_emit_mov_mmx_reg (OrcProgram *program, int reg1, int reg2);
+void x86_emit_mov_reg_sse (OrcProgram *program, int reg1, int reg2);
+void x86_emit_mov_sse_reg (OrcProgram *program, int reg1, int reg2);
void x86_emit_test_reg_reg (OrcProgram *program, int size, int reg1, int reg2);
void x86_emit_sar_imm_reg (OrcProgram *program, int size, int value, int reg);
void x86_emit_dec_memoffset (OrcProgram *program, int size, int offset, int reg);