summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Schleef <ds@schleef.org>2010-03-20 13:07:01 -0700
committerDavid Schleef <ds@schleef.org>2010-06-23 22:37:38 -0700
commitbddb964194cc95f3fea60aa97536bf2bf602b9e1 (patch)
tree8ea6e8740e06b10889d1076c1a9b9b470515d312
parent854968c94c17a97460dd44fb9ddccd5845265d86 (diff)
downloadorc-bddb964194cc95f3fea60aa97536bf2bf602b9e1.tar.gz
Implement lower level composite opcodes
-rw-r--r--orc/orcopcodes.c80
-rw-r--r--orc/orcprogram-c.c94
-rw-r--r--orc/orcprogram.h4
3 files changed, 172 insertions, 6 deletions
diff --git a/orc/orcopcodes.c b/orc/orcopcodes.c
index 200f4c9..3be150a 100644
--- a/orc/orcopcodes.c
+++ b/orc/orcopcodes.c
@@ -693,10 +693,80 @@ convlf (OrcOpcodeExecutor *ex, void *user)
#define ORC_ARGB_G(x) (((x)>>8)&0xff)
#define ORC_ARGB_B(x) (((x)>>0)&0xff)
+#define ORC_ARGB16_A(x) (((x)>>48)&0xffff)
+#define ORC_ARGB16_R(x) (((x)>>32)&0xffff)
+#define ORC_ARGB16_G(x) (((x)>>16)&0xffff)
+#define ORC_ARGB16_B(x) (((x)>>0)&0xffff)
+
#define ORC_ARGB(a,r,g,b) \
((ORC_CLAMP((b),0,255)<<0)|(ORC_CLAMP((g),0,255)<<8)| \
(ORC_CLAMP((r),0,255)<<16)|(ORC_CLAMP((a),0,255)<<24))
+#define ORC_ARGB16(a,r,g,b) \
+ (((uint64_t)ORC_CLAMP((b),0,65535)<<0)|((uint64_t)ORC_CLAMP((g),0,65535)<<16)| \
+ ((uint64_t)ORC_CLAMP((r),0,65535)<<32)|((uint64_t)ORC_CLAMP((a),0,65535)<<48))
+
+
+static void
+convubw4 (OrcOpcodeExecutor *ex, void *user)
+{
+ unsigned int src = ex->src_values[0];
+
+ ex->dest_values[0] = ORC_ARGB16(
+ ORC_ARGB_A(src),
+ ORC_ARGB_R(src),
+ ORC_ARGB_G(src),
+ ORC_ARGB_B(src));
+}
+
+static void
+convwb4 (OrcOpcodeExecutor *ex, void *user)
+{
+ uint64_t src = ex->src_values[0];
+
+ ex->dest_values[0] = ORC_ARGB(
+ ORC_ARGB16_A(src),
+ ORC_ARGB16_R(src),
+ ORC_ARGB16_G(src),
+ ORC_ARGB16_B(src));
+}
+
+static void
+div255w4 (OrcOpcodeExecutor *ex, void *user)
+{
+ uint64_t src = ex->src_values[0];
+
+ ex->dest_values[0] = ORC_ARGB16(
+ ORC_DIVIDE_255(ORC_ARGB16_A(src)),
+ ORC_DIVIDE_255(ORC_ARGB16_R(src)),
+ ORC_DIVIDE_255(ORC_ARGB16_G(src)),
+ ORC_DIVIDE_255(ORC_ARGB16_B(src)));
+}
+
+static void
+splat0w4 (OrcOpcodeExecutor *ex, void *user)
+{
+ uint64_t src = ex->src_values[0];
+
+ ex->dest_values[0] = ORC_ARGB16(
+ ORC_ARGB_A(src),
+ ORC_ARGB_A(src),
+ ORC_ARGB_A(src),
+ ORC_ARGB_A(src));
+}
+
+static void
+mullw4 (OrcOpcodeExecutor *ex, void *user)
+{
+ uint64_t src1 = ex->src_values[0];
+ uint64_t src2 = ex->src_values[1];
+
+ ex->dest_values[0] = ORC_ARGB16(
+ ORC_ARGB16_A(src1)*ORC_ARGB16_A(src2),
+ ORC_ARGB16_R(src1)*ORC_ARGB16_R(src2),
+ ORC_ARGB16_G(src1)*ORC_ARGB16_G(src2),
+ ORC_ARGB16_B(src1)*ORC_ARGB16_B(src2));
+}
static void
compin (OrcOpcodeExecutor *ex, void *user)
@@ -746,7 +816,7 @@ compovera (OrcOpcodeExecutor *ex, void *user)
}
static void
-compadd (OrcOpcodeExecutor *ex, void *user)
+addusb4 (OrcOpcodeExecutor *ex, void *user)
{
unsigned int src1 = ex->src_values[0];
unsigned int src2 = ex->src_values[1];
@@ -959,11 +1029,17 @@ static OrcStaticOpcode opcodes[] = {
{ "convlf", convlf, NULL, ORC_STATIC_OPCODE_FLOAT_DEST, { 4 }, { 4 } },
/* pixel ops */
+ { "convubw4", convubw4, NULL, 0, { 8 }, { 4 } },
+ { "convwb4", convwb4, NULL, 0, { 4 }, { 8 } },
+ { "div255w4", div255w4, NULL, 0, { 8 }, { 8 } },
+ { "splat0w4", splat0w4, NULL, 0, { 8 }, { 8 } },
+ { "mullw4", mullw4, NULL, 0, { 8 }, { 8, 8 } },
+
{ "compin", compin, NULL, 0, { 4 }, { 4, 4 } },
{ "compina", compina, NULL, 0, { 1 }, { 1, 1 } },
{ "compover", compover, NULL, 0, { 4 }, { 4, 4 } },
{ "compovera", compovera, NULL, 0, { 1 }, { 1, 1 } },
- { "compadd", compadd, NULL, 0, { 4 }, { 4, 4 } },
+ { "addusb4", addusb4, NULL, 0, { 4 }, { 4, 4 } },
{ "compout", compout, NULL, 0, { 4 }, { 4, 4 } },
{ "compatop", compatop, NULL, 0, { 4 }, { 4, 4 } },
{ "compxor", compxor, NULL, 0, { 4 }, { 4, 4 } },
diff --git a/orc/orcprogram-c.c b/orc/orcprogram-c.c
index 6ec010e..817ef7b 100644
--- a/orc/orcprogram-c.c
+++ b/orc/orcprogram-c.c
@@ -120,6 +120,13 @@ orc_target_c_get_asm_preamble (void)
"#define ORC_ARGB(a,r,g,b) \\\n"
" ((ORC_CLAMP((b),0,255)<<0)|(ORC_CLAMP((g),0,255)<<8)| \\\n"
" (ORC_CLAMP((r),0,255)<<16)|(ORC_CLAMP((a),0,255)<<24))\n"
+ "#define ORC_ARGB16_A(x) (((x)>>48)&0xffff)\n"
+ "#define ORC_ARGB16_R(x) (((x)>>32)&0xffff)\n"
+ "#define ORC_ARGB16_G(x) (((x)>>16)&0xffff)\n"
+ "#define ORC_ARGB16_B(x) (((x)>>0)&0xffff)\n"
+ "#define ORC_ARGB16(a,r,g,b) \\\n"
+ " (((uint64_t)ORC_CLAMP((b),0,65535)<<0)|((uint64_t)ORC_CLAMP((g),0,65535)<<16)| \\\n"
+ " ((uint64_t)ORC_CLAMP((r),0,65535)<<32)|((uint64_t)ORC_CLAMP((a),0,65535)<<48))\n"
"#ifdef __GNUC_PREREQ\n"
"#if __GNUC_PREREQ(4,2)\n"
"#pragma GCC diagnostic ignored \"-Wstrict-aliasing\"\n"
@@ -716,6 +723,82 @@ c_rule_splitwb (OrcCompiler *p, void *user, OrcInstruction *insn)
}
static void
+c_rule_convubw4 (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ char dest[20], src1[20];
+
+ c_get_name (dest, p, insn->dest_args[0]);
+ c_get_name (src1, p, insn->src_args[0]);
+
+ ORC_ASM_CODE(p," %s = ORC_ARGB16(\n", dest);
+ ORC_ASM_CODE(p," ORC_ARGB_A(%s),\n", src1);
+ ORC_ASM_CODE(p," ORC_ARGB_R(%s),\n", src1);
+ ORC_ASM_CODE(p," ORC_ARGB_G(%s),\n", src1);
+ ORC_ASM_CODE(p," ORC_ARGB_B(%s));\n", src1);
+}
+
+static void
+c_rule_convwb4 (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ char dest[20], src1[20];
+
+ c_get_name (dest, p, insn->dest_args[0]);
+ c_get_name (src1, p, insn->src_args[0]);
+
+ ORC_ASM_CODE(p," %s = ORC_ARGB(\n", dest);
+ ORC_ASM_CODE(p," ORC_ARGB16_A(%s),\n", src1);
+ ORC_ASM_CODE(p," ORC_ARGB16_R(%s),\n", src1);
+ ORC_ASM_CODE(p," ORC_ARGB16_G(%s),\n", src1);
+ ORC_ASM_CODE(p," ORC_ARGB16_B(%s));\n", src1);
+}
+
+static void
+c_rule_div255w4 (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ char dest[20], src1[20];
+
+ c_get_name (dest, p, insn->dest_args[0]);
+ c_get_name (src1, p, insn->src_args[0]);
+
+ ORC_ASM_CODE(p," %s = ORC_ARGB16(\n", dest);
+ ORC_ASM_CODE(p," ORC_DIVIDE_255(ORC_ARGB16_A(%s)),\n", src1);
+ ORC_ASM_CODE(p," ORC_DIVIDE_255(ORC_ARGB16_R(%s)),\n", src1);
+ ORC_ASM_CODE(p," ORC_DIVIDE_255(ORC_ARGB16_G(%s)),\n", src1);
+ ORC_ASM_CODE(p," ORC_DIVIDE_255(ORC_ARGB16_B(%s)));\n", src1);
+}
+
+static void
+c_rule_splat0w4 (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ char dest[20], src1[20];
+
+ c_get_name (dest, p, insn->dest_args[0]);
+ c_get_name (src1, p, insn->src_args[0]);
+
+ ORC_ASM_CODE(p," %s = ORC_ARGB16(\n", dest);
+ ORC_ASM_CODE(p," ORC_ARGB16_A(%s),\n", src1);
+ ORC_ASM_CODE(p," ORC_ARGB16_A(%s),\n", src1);
+ ORC_ASM_CODE(p," ORC_ARGB16_A(%s),\n", src1);
+ ORC_ASM_CODE(p," ORC_ARGB16_A(%s));\n", src1);
+}
+
+static void
+c_rule_mullw4 (OrcCompiler *p, void *user, OrcInstruction *insn)
+{
+ char dest[20], src1[20], src2[20];
+
+ c_get_name (dest, p, insn->dest_args[0]);
+ c_get_name (src1, p, insn->src_args[0]);
+ c_get_name (src2, p, insn->src_args[1]);
+
+ ORC_ASM_CODE(p," %s = ORC_ARGB16(\n", dest);
+ ORC_ASM_CODE(p," ORC_ARGB16_A(%s)*ORC_ARGB16_A(%s),\n", src1, src2);
+ ORC_ASM_CODE(p," ORC_ARGB16_R(%s)*ORC_ARGB16_R(%s),\n", src1, src2);
+ ORC_ASM_CODE(p," ORC_ARGB16_G(%s)*ORC_ARGB16_G(%s),\n", src1, src2);
+ ORC_ASM_CODE(p," ORC_ARGB16_B(%s)*ORC_ARGB16_B(%s));\n", src1, src2);
+}
+
+static void
c_rule_compin (OrcCompiler *p, void *user, OrcInstruction *insn)
{
char dest[20], src1[20], src2[20];
@@ -772,7 +855,7 @@ c_rule_compovera (OrcCompiler *p, void *user, OrcInstruction *insn)
}
static void
-c_rule_compadd (OrcCompiler *p, void *user, OrcInstruction *insn)
+c_rule_addusb4 (OrcCompiler *p, void *user, OrcInstruction *insn)
{
char dest[20], src1[20], src2[20];
@@ -891,11 +974,18 @@ orc_c_init (void)
orc_rule_register (rule_set, "accsadubl", c_rule_accsadubl, NULL);
orc_rule_register (rule_set, "splitlw", c_rule_splitlw, NULL);
orc_rule_register (rule_set, "splitwb", c_rule_splitwb, NULL);
+
+ orc_rule_register (rule_set, "convubw4", c_rule_convubw4, NULL);
+ orc_rule_register (rule_set, "convwb4", c_rule_convwb4, NULL);
+ orc_rule_register (rule_set, "div255w4", c_rule_div255w4, NULL);
+ orc_rule_register (rule_set, "splat0w4", c_rule_splat0w4, NULL);
+ orc_rule_register (rule_set, "mullw4", c_rule_mullw4, NULL);
+
orc_rule_register (rule_set, "compin", c_rule_compin, NULL);
orc_rule_register (rule_set, "compina", c_rule_compina, NULL);
orc_rule_register (rule_set, "compover", c_rule_compover, NULL);
orc_rule_register (rule_set, "compovera", c_rule_compovera, NULL);
- orc_rule_register (rule_set, "compadd", c_rule_compadd, NULL);
+ orc_rule_register (rule_set, "addusb4", c_rule_addusb4, NULL);
orc_rule_register (rule_set, "compout", c_rule_compout, NULL);
orc_rule_register (rule_set, "compatop", c_rule_compatop, NULL);
orc_rule_register (rule_set, "compxor", c_rule_compxor, NULL);
diff --git a/orc/orcprogram.h b/orc/orcprogram.h
index c24948f..9e3e9ba 100644
--- a/orc/orcprogram.h
+++ b/orc/orcprogram.h
@@ -438,8 +438,8 @@ struct _OrcCompiler {
*/
struct _OrcOpcodeExecutor {
/*< private >*/
- int src_values[ORC_STATIC_OPCODE_N_SRC];
- int dest_values[ORC_STATIC_OPCODE_N_DEST];
+ int64_t src_values[ORC_STATIC_OPCODE_N_SRC];
+ int64_t dest_values[ORC_STATIC_OPCODE_N_DEST];
};
/**