summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRuiling Song <ruiling.song@intel.com>2014-05-30 16:22:30 +0800
committerZhigang Gong <zhigang.gong@intel.com>2014-05-30 17:54:30 +0800
commit871389f906d6f6d33703851eee7111390c562bc5 (patch)
tree7c5114317478ea619a1be920d6cf72c52ee30082
parentdc84f4e0aacf80e7b137c39fcf4858de9e191bf4 (diff)
downloadbeignet-871389f906d6f6d33703851eee7111390c562bc5.tar.gz
GBE: Fix bitcast between long and other type.
As we store long low/high 32bits separately, when we do bitcast like int64 --> int16, the horizontal stride of the int64's low/high half should be set as 2 instead of 4. This fix an regression of opencv test: Imgproc/Threshold.Mat/40, where GetParam() = (16SC1, 0, 0, false) Signed-off-by: Ruiling Song <ruiling.song@intel.com> Reviewed-by: Zhigang Gong <zhigang.gong@linux.intel.com>
-rw-r--r--backend/src/backend/gen_insn_selection.cpp11
1 files changed, 10 insertions, 1 deletions
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index b651c195..3530d2c9 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -3090,7 +3090,7 @@ namespace gbe
wideReg = sel.selReg(insn.getDst(index/multiple), narrowType);
narrowReg = sel.selReg(insn.getSrc(i), narrowType); //retype to narrow type
}
- if(wideReg.hstride != GEN_VERTICAL_STRIDE_0) {
+ if(wideReg.hstride != GEN_HORIZONTAL_STRIDE_0) {
if(multiple == 2) {
wideReg = sel.unpacked_uw(wideReg.reg());
wideReg = GenRegister::retype(wideReg, getGenType(narrowType));
@@ -3107,6 +3107,15 @@ namespace gbe
wideReg.subphysical = 1;
}
if(isInt64) {
+ if(wideReg.hstride != GEN_HORIZONTAL_STRIDE_0) {
+ // as we store long by bottom & high part separately, we have to divide hstride by 2
+ if (wideReg.hstride == GEN_HORIZONTAL_STRIDE_2)
+ wideReg.hstride = GEN_HORIZONTAL_STRIDE_1;
+ else if (wideReg.hstride == GEN_HORIZONTAL_STRIDE_4)
+ wideReg.hstride = GEN_HORIZONTAL_STRIDE_2;
+ else
+ GBE_ASSERT(0);
+ }
// offset to next half
wideReg.subphysical = 1;
if(i >= multiple/2)