summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--backend/src/backend/gen_insn_selection.cpp117
-rw-r--r--kernels/compiler_clod_ref.bmpbin0 -> 196662 bytes
-rw-r--r--kernels/compiler_mandelbrot_alternate_ref.bmpbin0 -> 196662 bytes
-rw-r--r--kernels/compiler_mandelbrot_ref.bmpbin0 -> 196662 bytes
-rw-r--r--kernels/compiler_ribbon_ref.bmpbin0 -> 196662 bytes
-rw-r--r--utests/compiler_clod.cpp19
-rw-r--r--utests/compiler_mandelbrot.cpp20
-rw-r--r--utests/compiler_mandelbrot_alternate.cpp28
-rw-r--r--utests/compiler_ribbon.cpp20
9 files changed, 154 insertions, 50 deletions
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 71175522..c429a96c 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -22,6 +22,56 @@
* \author Benjamin Segovia <benjamin.segovia@intel.com>
*/
+/* This is the instruction selection code. First of all, this is a bunch of c++
+ * crap. Sorry if this is not that readable. Anyway, the goal here is to take
+ * GenIR code (i.e. the very regular, very RISC IR) and to produce GenISA with
+ * virtual registers (i.e. regular GenIR registers).
+ *
+ * Overall idea:
+ * =============
+ *
+ * There is a lot of papers and research about that but I tried to keep it
+ * simple. No dynamic programming, nothing like this. Just a recursive maximal
+ * munch.
+ *
+ * Basically, the code is executed per basic block from bottom to top. Patterns
+ * of GenIR instructions are defined and each instruction is matched against the
+ * best pattern i.e. the pattern that catches the largest number of
+ * instructions. Once matched, a sequence of instructions is output.
+ *
+ * Each instruction the match depends on is then marked as "root" i.e. we
+ * indicate that each of these instructions must be generated: we indeed need their
+ * destinations for the next instructions (remember that we generate the code in
+ * reverse order)
+ *
+ * Patterns:
+ * =========
+ *
+ * There is a lot of patterns and I did not implement all of them obviously. I
+ * just quickly gather the complete code to make pattern implementation kind of
+ * easy. This is pretty verbose to add a pattern but it should be not too hard
+ * to add new ones.
+ *
+ * To create and register patterns, I just abused C++ pre-main. A bunch of
+ * patterns is then created and sorted per opcode (i.e. the opcode of the root
+ * of the pattern): this creates a library of patterns that may be used in
+ * run-time.
+ *
+ * TODO:
+ * =====
+ *
+ * Sadly, I recreated here a new DAG class. This is just a bad idea since we
+ * already have the DAG per basic block with the Function graph i.e. the
+ * complete graph of uses and definitions. I think we should be able to save a
+ * lot of code here if we can simply reuse the code from UD / DU chains.
+ *
+ * Finally, cross-block instruction selection is quite possible with this simple
+ * approach. Basically, instructions from dominating blocks could be merged and
+ * matched with other instructions in the dominated block. This leads to the
+ * interesting approach which consists in traversing the dominator tree in post
+ * order
+ */
+
#include "backend/gen_insn_selection.hpp"
#include "backend/gen_context.hpp"
#include "ir/function.hpp"
@@ -1497,17 +1547,23 @@ namespace gbe
};
/*! Compare instruction pattern */
- DECL_PATTERN(CompareInstruction)
+ class CompareInstructionPattern : public SelectionPattern
{
- INLINE bool emitOne(Selection::Opaque &sel, const ir::CompareInstruction &insn) const
+ public:
+ CompareInstructionPattern(void) : SelectionPattern(1,1) {
+ for (uint32_t op = 0; op < ir::OP_INVALID; ++op)
+ if (ir::isOpcodeFrom<ir::CompareInstruction>(ir::Opcode(op)) == true)
+ this->opcodes.push_back(ir::Opcode(op));
+ }
+
+ INLINE bool emit(Selection::Opaque &sel, SelectionDAG &dag) const
{
using namespace ir;
+ const ir::CompareInstruction &insn = cast<CompareInstruction>(dag.insn);
const Opcode opcode = insn.getOpcode();
const Type type = insn.getType();
const uint32_t genCmp = getGenCompare(opcode);
const Register dst = insn.getDst(0);
- const GenRegister src0 = sel.selReg(insn.getSrc(0), type);
- const GenRegister src1 = sel.selReg(insn.getSrc(1), type);
// Limit the compare to the active lanes. Use the same compare as for f0.0
sel.push();
@@ -1520,15 +1576,30 @@ namespace gbe
sel.CMP(GEN_CONDITIONAL_LE, blockip, labelReg);
sel.pop();
+ // Look for immediate values for the right source
+ GenRegister src0, src1;
+ SelectionDAG *dag0 = dag.child[0];
+ SelectionDAG *dag1 = dag.child[1];
+
+ // Right source can always be an immediate
+ if (OCL_OPTIMIZE_IMMEDIATE && dag1 != NULL && dag1->insn.getOpcode() == OP_LOADI) {
+ const auto &childInsn = cast<LoadImmInstruction>(dag1->insn);
+ src0 = sel.selReg(insn.getSrc(0), type);
+ src1 = getRegisterFromImmediate(childInsn.getImmediate());
+ if (dag0) dag0->isRoot = 1;
+ } else {
+ src0 = sel.selReg(insn.getSrc(0), type);
+ src1 = sel.selReg(insn.getSrc(1), type);
+ this->markAllChildren(dag);
+ }
+
sel.push();
sel.curr.physicalFlag = 0;
sel.curr.flagIndex = uint16_t(dst);
- printf("%i\n",(int) dst);
sel.CMP(genCmp, src0, src1);
sel.pop();
return true;
}
- DECL_CTOR(CompareInstruction, 1, 1);
};
/*! Convert instruction pattern */
@@ -1566,17 +1637,42 @@ namespace gbe
};
/*! Select instruction pattern */
- DECL_PATTERN(SelectInstruction)
+ class SelectInstructionPattern : public SelectionPattern
{
- INLINE bool emitOne(Selection::Opaque &sel, const ir::SelectInstruction &insn) const
+ public:
+ SelectInstructionPattern(void) : SelectionPattern(1,1) {
+ for (uint32_t op = 0; op < ir::OP_INVALID; ++op)
+ if (ir::isOpcodeFrom<ir::SelectInstruction>(ir::Opcode(op)) == true)
+ this->opcodes.push_back(ir::Opcode(op));
+ }
+
+ INLINE bool emit(Selection::Opaque &sel, SelectionDAG &dag) const
{
using namespace ir;
+ const ir::SelectInstruction &insn = cast<SelectInstruction>(dag.insn);
// Get all registers for the instruction
const Type type = insn.getType();
const GenRegister dst = sel.selReg(insn.getDst(0), type);
- const GenRegister src0 = sel.selReg(insn.getSrc(SelectInstruction::src0Index), type);
- const GenRegister src1 = sel.selReg(insn.getSrc(SelectInstruction::src1Index), type);
+
+ // Look for immediate values for the right source
+ GenRegister src0, src1;
+ SelectionDAG *dag0 = dag.child[0]; // source 0 is the predicate!
+ SelectionDAG *dag1 = dag.child[1];
+ SelectionDAG *dag2 = dag.child[2];
+
+ // Right source can always be an immediate
+ if (OCL_OPTIMIZE_IMMEDIATE && dag2 != NULL && dag2->insn.getOpcode() == OP_LOADI) {
+ const auto &childInsn = cast<LoadImmInstruction>(dag2->insn);
+ src0 = sel.selReg(insn.getSrc(SelectInstruction::src0Index), type);
+ src1 = getRegisterFromImmediate(childInsn.getImmediate());
+ if (dag0) dag0->isRoot = 1;
+ if (dag1) dag1->isRoot = 1;
+ } else {
+ src0 = sel.selReg(insn.getSrc(SelectInstruction::src0Index), type);
+ src1 = sel.selReg(insn.getSrc(SelectInstruction::src1Index), type);
+ this->markAllChildren(dag);
+ }
// Since we cannot predicate the select instruction with our current mask,
// we need to perform the selection in two steps (one to select, one to
@@ -1598,7 +1694,6 @@ namespace gbe
sel.MOV(dst, tmp);
return true;
}
- DECL_CTOR(SelectInstruction, 1, 1);
};
/*! Label instruction pattern */
diff --git a/kernels/compiler_clod_ref.bmp b/kernels/compiler_clod_ref.bmp
new file mode 100644
index 00000000..71afda90
--- /dev/null
+++ b/kernels/compiler_clod_ref.bmp
Binary files differ
diff --git a/kernels/compiler_mandelbrot_alternate_ref.bmp b/kernels/compiler_mandelbrot_alternate_ref.bmp
new file mode 100644
index 00000000..011d5836
--- /dev/null
+++ b/kernels/compiler_mandelbrot_alternate_ref.bmp
Binary files differ
diff --git a/kernels/compiler_mandelbrot_ref.bmp b/kernels/compiler_mandelbrot_ref.bmp
new file mode 100644
index 00000000..494bf8b7
--- /dev/null
+++ b/kernels/compiler_mandelbrot_ref.bmp
Binary files differ
diff --git a/kernels/compiler_ribbon_ref.bmp b/kernels/compiler_ribbon_ref.bmp
new file mode 100644
index 00000000..2225f454
--- /dev/null
+++ b/kernels/compiler_ribbon_ref.bmp
Binary files differ
diff --git a/utests/compiler_clod.cpp b/utests/compiler_clod.cpp
index 360d197b..9ea570a0 100644
--- a/utests/compiler_clod.cpp
+++ b/utests/compiler_clod.cpp
@@ -20,8 +20,8 @@
#include "utest_helper.hpp"
static int *dst = NULL;
-static const int w = 1024;
-static const int h = 1024;
+static const int w = 256;
+static const int h = 256;
static void compiler_clod(void)
{
@@ -32,17 +32,20 @@ static void compiler_clod(void)
const float fy = float(h);
OCL_CREATE_KERNEL("compiler_clod");
- cl_mem cl_dst = clCreateBuffer(ctx, 0, sz, NULL, NULL);
- OCL_CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &cl_dst);
+ OCL_CREATE_BUFFER(buf[0], 0, sz, NULL);
+ OCL_CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &buf[0]);
OCL_CALL (clSetKernelArg, kernel, 1, sizeof(float), &fx);
OCL_CALL (clSetKernelArg, kernel, 2, sizeof(float), &fy);
OCL_CALL (clSetKernelArg, kernel, 3, sizeof(int), &w);
OCL_CALL (clEnqueueNDRangeKernel, queue, kernel, 2, NULL, global, local, 0, NULL, NULL);
- dst = (int *) clIntelMapBuffer(cl_dst, NULL);
+ OCL_MAP_BUFFER(0);
+ dst = (int*) buf_data[0];
- cl_write_bmp(dst, w, h, "clod.bmp");
- OCL_CALL (clIntelUnmapBuffer, cl_dst);
- OCL_CALL (clReleaseMemObject, cl_dst);
+ /* Save the image (for debug purpose) */
+ cl_write_bmp(dst, w, h, "compiler_clod.bmp");
+
+ /* Compare with the golden image */
+ OCL_CHECK_IMAGE(dst, w, h, "compiler_clod_ref.bmp");
}
MAKE_UTEST_FROM_FUNCTION(compiler_clod);
diff --git a/utests/compiler_mandelbrot.cpp b/utests/compiler_mandelbrot.cpp
index aa70f3de..7758dae3 100644
--- a/utests/compiler_mandelbrot.cpp
+++ b/utests/compiler_mandelbrot.cpp
@@ -20,9 +20,8 @@
#include "utest_helper.hpp"
static int *dst = NULL;
-static const size_t w = 64;
-static const size_t h = 64;
-static const size_t iter = 4;
+static const size_t w = 256;
+static const size_t h = 256;
static void compiler_mandelbrot(void)
{
@@ -32,14 +31,17 @@ static void compiler_mandelbrot(void)
OCL_CREATE_KERNEL("compiler_mandelbrot");
- cl_mem cl_dst = clCreateBuffer(ctx, 0, sz, NULL, NULL);
- OCL_CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &cl_dst);
+ OCL_CREATE_BUFFER(buf[0], 0, sz, NULL);
+ OCL_CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &buf[0]);
OCL_CALL (clEnqueueNDRangeKernel, queue, kernel, 2, NULL, global, local, 0, NULL, NULL);
- dst = (int *) clIntelMapBuffer(cl_dst, NULL);
+ OCL_MAP_BUFFER(0);
+ dst = (int *) buf_data[0];
- cl_write_bmp(dst, w, h, "mandelbrot.bmp");
- OCL_CALL (clIntelUnmapBuffer, cl_dst);
- OCL_CALL (clReleaseMemObject, cl_dst);
+ /* Save the image (for debug purpose) */
+ cl_write_bmp(dst, w, h, "compiler_mandelbrot.bmp");
+
+ /* Compare with the golden image */
+ OCL_CHECK_IMAGE(dst, w, h, "compiler_mandelbrot_ref.bmp");
}
MAKE_UTEST_FROM_FUNCTION(compiler_mandelbrot);
diff --git a/utests/compiler_mandelbrot_alternate.cpp b/utests/compiler_mandelbrot_alternate.cpp
index 74fc7164..2e5d59fc 100644
--- a/utests/compiler_mandelbrot_alternate.cpp
+++ b/utests/compiler_mandelbrot_alternate.cpp
@@ -20,9 +20,8 @@
#include "utest_helper.hpp"
static int *dst = NULL;
-static const size_t w = 64;
-static const size_t h = 64;
-static const size_t iter = 4;
+static const size_t w = 256;
+static const size_t h = 256;
static const float criterium = 4.f;
static void compiler_mandelbrot_alternate(void)
@@ -30,22 +29,25 @@ static void compiler_mandelbrot_alternate(void)
const size_t global[2] = {w, h};
const size_t local[2] = {16, 1};
const size_t sz = w * h * sizeof(char[4]);
- const float rcpW = 1.f / float(w);
- const float rcpH = 1.f / float(h);
+ const float rcpWidth = 1.f / float(w);
+ const float rcpHeight = 1.f / float(h);
OCL_CREATE_KERNEL("compiler_mandelbrot_alternate");
- cl_mem cl_dst = clCreateBuffer(ctx, 0, sz, NULL, NULL);
- OCL_CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &cl_dst);
- OCL_CALL (clSetKernelArg, kernel, 1, sizeof(float), &rcpW);
- OCL_CALL (clSetKernelArg, kernel, 2, sizeof(float), &rcpH);
+ OCL_CREATE_BUFFER(buf[0], 0, sz, NULL);
+ OCL_CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &buf[0]);
+ OCL_CALL (clSetKernelArg, kernel, 1, sizeof(float), &rcpWidth);
+ OCL_CALL (clSetKernelArg, kernel, 2, sizeof(float), &rcpHeight);
OCL_CALL (clSetKernelArg, kernel, 3, sizeof(float), &criterium);
OCL_CALL (clEnqueueNDRangeKernel, queue, kernel, 2, NULL, global, local, 0, NULL, NULL);
- dst = (int *) clIntelMapBuffer(cl_dst, NULL);
+ OCL_MAP_BUFFER(0);
+ dst = (int *) buf_data[0];
- cl_write_bmp(dst, w, h, "mandelbrot.bmp");
- OCL_CALL (clIntelUnmapBuffer, cl_dst);
- OCL_CALL (clReleaseMemObject, cl_dst);
+ /* Save the image (for debug purpose) */
+ cl_write_bmp(dst, w, h, "compiler_mandelbrot_alternate.bmp");
+
+ /* Compare with the golden image */
+ OCL_CHECK_IMAGE(dst, w, h, "compiler_mandelbrot_alternate_ref.bmp");
}
MAKE_UTEST_FROM_FUNCTION(compiler_mandelbrot_alternate);
diff --git a/utests/compiler_ribbon.cpp b/utests/compiler_ribbon.cpp
index 733b8ac9..1ef41a98 100644
--- a/utests/compiler_ribbon.cpp
+++ b/utests/compiler_ribbon.cpp
@@ -20,8 +20,8 @@
#include "utest_helper.hpp"
static int *dst = NULL;
-static const int w = 1024;
-static const int h = 1024;
+static const int w = 256;
+static const int h = 256;
static void compiler_ribbon(void)
{
@@ -32,19 +32,21 @@ static void compiler_ribbon(void)
const float fy = float(h);
OCL_CREATE_KERNEL("compiler_ribbon");
- cl_mem cl_dst = clCreateBuffer(ctx, 0, sz, NULL, NULL);
- OCL_CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &cl_dst);
+ OCL_CREATE_BUFFER(buf[0], 0, sz, NULL);
+ OCL_CALL (clSetKernelArg, kernel, 0, sizeof(cl_mem), &buf[0]);
OCL_CALL (clSetKernelArg, kernel, 1, sizeof(float), &fx);
OCL_CALL (clSetKernelArg, kernel, 2, sizeof(float), &fy);
OCL_CALL (clSetKernelArg, kernel, 3, sizeof(int), &w);
OCL_CALL (clEnqueueNDRangeKernel, queue, kernel, 2, NULL, global, local, 0, NULL, NULL);
- dst = (int *) clIntelMapBuffer(cl_dst, NULL);
+ OCL_MAP_BUFFER(0);
+ dst = (int*) buf_data[0];
- cl_write_bmp(dst, w, h, "ribbon.bmp");
- OCL_CALL (clIntelUnmapBuffer, cl_dst);
- OCL_CALL (clReleaseMemObject, cl_dst);
+ /* Save the image (for debug purpose) */
+ cl_write_bmp(dst, w, h, "compiler_ribbon.bmp");
+
+ /* Compare with the golden image */
+ OCL_CHECK_IMAGE(dst, w, h, "compiler_ribbon_ref.bmp");
}
MAKE_UTEST_FROM_FUNCTION(compiler_ribbon);
-