summaryrefslogtreecommitdiff
path: root/libavcodec/sh4
diff options
context:
space:
mode:
Diffstat (limited to 'libavcodec/sh4')
-rw-r--r--libavcodec/sh4/dsputil_align.c23
-rw-r--r--libavcodec/sh4/qpel.c80
2 files changed, 45 insertions, 58 deletions
diff --git a/libavcodec/sh4/dsputil_align.c b/libavcodec/sh4/dsputil_align.c
index 46205602cc..a808dd1f82 100644
--- a/libavcodec/sh4/dsputil_align.c
+++ b/libavcodec/sh4/dsputil_align.c
@@ -26,15 +26,13 @@
#define LP(p) *(uint32_t*)(p)
-#define BYTE_VEC(c) ((c)*0x01010101UL)
-
#define UNPACK(ph,pl,tt0,tt1) do { \
uint32_t t0,t1; t0=tt0;t1=tt1; \
- ph = ( (t0 & ~BYTE_VEC(0x03))>>2) + ( (t1 & ~BYTE_VEC(0x03))>>2); \
- pl = (t0 & BYTE_VEC(0x03)) + (t1 & BYTE_VEC(0x03)); } while(0)
+ ph = ( (t0 & ~BYTE_VEC32(0x03))>>2) + ( (t1 & ~BYTE_VEC32(0x03))>>2); \
+ pl = (t0 & BYTE_VEC32(0x03)) + (t1 & BYTE_VEC32(0x03)); } while(0)
-#define rnd_PACK(ph,pl,nph,npl) ph + nph + (((pl + npl + BYTE_VEC(0x02))>>2) & BYTE_VEC(0x03))
-#define no_rnd_PACK(ph,pl,nph,npl) ph + nph + (((pl + npl + BYTE_VEC(0x01))>>2) & BYTE_VEC(0x03))
+#define rnd_PACK(ph,pl,nph,npl) ph + nph + (((pl + npl + BYTE_VEC32(0x02))>>2) & BYTE_VEC32(0x03))
+#define no_rnd_PACK(ph,pl,nph,npl) ph + nph + (((pl + npl + BYTE_VEC32(0x01))>>2) & BYTE_VEC32(0x03))
/* little endian */
#define MERGE1(a,b,ofs) (ofs==0)?a:( ((a)>>(8*ofs))|((b)<<(32-8*ofs)) )
@@ -46,18 +44,7 @@
#define put(d,s) d = s
-#define avg(d,s) d = rnd_avg2(s,d)
-
-static inline uint32_t rnd_avg2(uint32_t a, uint32_t b)
-{
- return (a | b) - (((a ^ b) & ~BYTE_VEC(0x01)) >> 1);
-}
-
-static inline uint32_t no_rnd_avg2(uint32_t a, uint32_t b)
-{
- return (a & b) + (((a ^ b) & ~BYTE_VEC(0x01)) >> 1);
-}
-
+#define avg(d,s) d = rnd_avg32(s,d)
#define OP_C4(ofs) \
ref-=ofs; \
diff --git a/libavcodec/sh4/qpel.c b/libavcodec/sh4/qpel.c
index 0085d5f5a1..2e6ac8ce31 100644
--- a/libavcodec/sh4/qpel.c
+++ b/libavcodec/sh4/qpel.c
@@ -9,8 +9,8 @@
/*static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
{\
do {\
- OP(LP(dst ),no_rnd_avg2(LD32(src1 ),LD32(src2 )) ); \
- OP(LP(dst+4),no_rnd_avg2(LD32(src1+4),LD32(src2+4)) ); \
+ OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LD32(src2 )) ); \
+ OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \
src1+=src_stride1; \
src2+=src_stride2; \
dst+=dst_stride; \
@@ -20,8 +20,8 @@
static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
{\
do {\
- OP(LP(dst ),rnd_avg2(LD32(src1 ),LD32(src2 )) ); \
- OP(LP(dst+4),rnd_avg2(LD32(src1+4),LD32(src2+4)) ); \
+ OP(LP(dst ),rnd_avg32(LD32(src1 ),LD32(src2 )) ); \
+ OP(LP(dst+4),rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \
src1+=src_stride1; \
src2+=src_stride2; \
dst+=dst_stride; \
@@ -31,7 +31,7 @@ static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, cons
static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
{\
do {\
- OP(LP(dst ),rnd_avg2(LD32(src1 ),LD32(src2 )) ); \
+ OP(LP(dst ),rnd_avg32(LD32(src1 ),LD32(src2 )) ); \
src1+=src_stride1; \
src2+=src_stride2; \
dst+=dst_stride; \
@@ -41,10 +41,10 @@ static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, cons
static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
{\
do {\
- OP(LP(dst ),no_rnd_avg2(LD32(src1 ),LD32(src2 )) ); \
- OP(LP(dst+4),no_rnd_avg2(LD32(src1+4),LD32(src2+4)) ); \
- OP(LP(dst+8),no_rnd_avg2(LD32(src1+8),LD32(src2+8)) ); \
- OP(LP(dst+12),no_rnd_avg2(LD32(src1+12),LD32(src2+12)) ); \
+ OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LD32(src2 )) ); \
+ OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \
+ OP(LP(dst+8),no_rnd_avg32(LD32(src1+8),LD32(src2+8)) ); \
+ OP(LP(dst+12),no_rnd_avg32(LD32(src1+12),LD32(src2+12)) ); \
src1+=src_stride1; \
src2+=src_stride2; \
dst+=dst_stride; \
@@ -54,10 +54,10 @@ static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *sr
static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
{\
do {\
- OP(LP(dst ),rnd_avg2(LD32(src1 ),LD32(src2 )) ); \
- OP(LP(dst+4),rnd_avg2(LD32(src1+4),LD32(src2+4)) ); \
- OP(LP(dst+8),rnd_avg2(LD32(src1+8),LD32(src2+8)) ); \
- OP(LP(dst+12),rnd_avg2(LD32(src1+12),LD32(src2+12)) ); \
+ OP(LP(dst ),rnd_avg32(LD32(src1 ),LD32(src2 )) ); \
+ OP(LP(dst+4),rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \
+ OP(LP(dst+8),rnd_avg32(LD32(src1+8),LD32(src2+8)) ); \
+ OP(LP(dst+12),rnd_avg32(LD32(src1+12),LD32(src2+12)) ); \
src1+=src_stride1; \
src2+=src_stride2; \
dst+=dst_stride; \
@@ -67,7 +67,7 @@ static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, con
static inline void OPNAME ## _pixels4_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
{\
do {\
- OP(LP(dst ),rnd_avg2(LP(src1 ),LP(src2 )) ); \
+ OP(LP(dst ),rnd_avg32(LP(src1 ),LP(src2 )) ); \
src1+=src_stride1; \
src2+=src_stride2; \
dst+=dst_stride; \
@@ -77,7 +77,7 @@ static inline void OPNAME ## _pixels4_l2_aligned(uint8_t *dst, const uint8_t *sr
static inline void OPNAME ## _pixels4_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
{\
do {\
- OP(LP(dst ),rnd_avg2(LD32(src1 ),LP(src2 )) ); \
+ OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \
src1+=src_stride1; \
src2+=src_stride2; \
dst+=dst_stride; \
@@ -87,10 +87,10 @@ static inline void OPNAME ## _pixels4_l2_aligned2(uint8_t *dst, const uint8_t *s
static inline void OPNAME ## _no_rnd_pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
{\
do {\
- OP(LP(dst ),no_rnd_avg2(LD32(src1 ),LP(src2 )) ); \
- OP(LP(dst+4),no_rnd_avg2(LD32(src1+4),LP(src2+4)) ); \
- OP(LP(dst+8),no_rnd_avg2(LD32(src1+8),LP(src2+8)) ); \
- OP(LP(dst+12),no_rnd_avg2(LD32(src1+12),LP(src2+12)) ); \
+ OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LP(src2 )) ); \
+ OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LP(src2+4)) ); \
+ OP(LP(dst+8),no_rnd_avg32(LD32(src1+8),LP(src2+8)) ); \
+ OP(LP(dst+12),no_rnd_avg32(LD32(src1+12),LP(src2+12)) ); \
src1+=src_stride1; \
src2+=src_stride2; \
dst+=dst_stride; \
@@ -100,10 +100,10 @@ static inline void OPNAME ## _no_rnd_pixels16_l2_aligned2(uint8_t *dst, const ui
static inline void OPNAME ## _pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
{\
do {\
- OP(LP(dst ),rnd_avg2(LD32(src1 ),LP(src2 )) ); \
- OP(LP(dst+4),rnd_avg2(LD32(src1+4),LP(src2+4)) ); \
- OP(LP(dst+8),rnd_avg2(LD32(src1+8),LP(src2+8)) ); \
- OP(LP(dst+12),rnd_avg2(LD32(src1+12),LP(src2+12)) ); \
+ OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \
+ OP(LP(dst+4),rnd_avg32(LD32(src1+4),LP(src2+4)) ); \
+ OP(LP(dst+8),rnd_avg32(LD32(src1+8),LP(src2+8)) ); \
+ OP(LP(dst+12),rnd_avg32(LD32(src1+12),LP(src2+12)) ); \
src1+=src_stride1; \
src2+=src_stride2; \
dst+=dst_stride; \
@@ -113,8 +113,8 @@ static inline void OPNAME ## _pixels16_l2_aligned2(uint8_t *dst, const uint8_t *
static inline void OPNAME ## _no_rnd_pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
{\
do { /* onlye src2 aligned */\
- OP(LP(dst ),no_rnd_avg2(LD32(src1 ),LP(src2 )) ); \
- OP(LP(dst+4),no_rnd_avg2(LD32(src1+4),LP(src2+4)) ); \
+ OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LP(src2 )) ); \
+ OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LP(src2+4)) ); \
src1+=src_stride1; \
src2+=src_stride2; \
dst+=dst_stride; \
@@ -124,8 +124,8 @@ static inline void OPNAME ## _no_rnd_pixels8_l2_aligned2(uint8_t *dst, const uin
static inline void OPNAME ## _pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
{\
do {\
- OP(LP(dst ),rnd_avg2(LD32(src1 ),LP(src2 )) ); \
- OP(LP(dst+4),rnd_avg2(LD32(src1+4),LP(src2+4)) ); \
+ OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \
+ OP(LP(dst+4),rnd_avg32(LD32(src1+4),LP(src2+4)) ); \
src1+=src_stride1; \
src2+=src_stride2; \
dst+=dst_stride; \
@@ -135,8 +135,8 @@ static inline void OPNAME ## _pixels8_l2_aligned2(uint8_t *dst, const uint8_t *s
static inline void OPNAME ## _no_rnd_pixels8_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
{\
do {\
- OP(LP(dst ),no_rnd_avg2(LP(src1 ),LP(src2 )) ); \
- OP(LP(dst+4),no_rnd_avg2(LP(src1+4),LP(src2+4)) ); \
+ OP(LP(dst ),no_rnd_avg32(LP(src1 ),LP(src2 )) ); \
+ OP(LP(dst+4),no_rnd_avg32(LP(src1+4),LP(src2+4)) ); \
src1+=src_stride1; \
src2+=src_stride2; \
dst+=dst_stride; \
@@ -146,8 +146,8 @@ static inline void OPNAME ## _no_rnd_pixels8_l2_aligned(uint8_t *dst, const uint
static inline void OPNAME ## _pixels8_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
{\
do {\
- OP(LP(dst ),rnd_avg2(LP(src1 ),LP(src2 )) ); \
- OP(LP(dst+4),rnd_avg2(LP(src1+4),LP(src2+4)) ); \
+ OP(LP(dst ),rnd_avg32(LP(src1 ),LP(src2 )) ); \
+ OP(LP(dst+4),rnd_avg32(LP(src1+4),LP(src2+4)) ); \
src1+=src_stride1; \
src2+=src_stride2; \
dst+=dst_stride; \
@@ -157,10 +157,10 @@ static inline void OPNAME ## _pixels8_l2_aligned(uint8_t *dst, const uint8_t *sr
static inline void OPNAME ## _no_rnd_pixels16_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
{\
do {\
- OP(LP(dst ),no_rnd_avg2(LP(src1 ),LP(src2 )) ); \
- OP(LP(dst+4),no_rnd_avg2(LP(src1+4),LP(src2+4)) ); \
- OP(LP(dst+8),no_rnd_avg2(LP(src1+8),LP(src2+8)) ); \
- OP(LP(dst+12),no_rnd_avg2(LP(src1+12),LP(src2+12)) ); \
+ OP(LP(dst ),no_rnd_avg32(LP(src1 ),LP(src2 )) ); \
+ OP(LP(dst+4),no_rnd_avg32(LP(src1+4),LP(src2+4)) ); \
+ OP(LP(dst+8),no_rnd_avg32(LP(src1+8),LP(src2+8)) ); \
+ OP(LP(dst+12),no_rnd_avg32(LP(src1+12),LP(src2+12)) ); \
src1+=src_stride1; \
src2+=src_stride2; \
dst+=dst_stride; \
@@ -170,10 +170,10 @@ static inline void OPNAME ## _no_rnd_pixels16_l2_aligned(uint8_t *dst, const uin
static inline void OPNAME ## _pixels16_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
{\
do {\
- OP(LP(dst ),rnd_avg2(LP(src1 ),LP(src2 )) ); \
- OP(LP(dst+4),rnd_avg2(LP(src1+4),LP(src2+4)) ); \
- OP(LP(dst+8),rnd_avg2(LP(src1+8),LP(src2+8)) ); \
- OP(LP(dst+12),rnd_avg2(LP(src1+12),LP(src2+12)) ); \
+ OP(LP(dst ),rnd_avg32(LP(src1 ),LP(src2 )) ); \
+ OP(LP(dst+4),rnd_avg32(LP(src1+4),LP(src2+4)) ); \
+ OP(LP(dst+8),rnd_avg32(LP(src1+8),LP(src2+8)) ); \
+ OP(LP(dst+12),rnd_avg32(LP(src1+12),LP(src2+12)) ); \
src1+=src_stride1; \
src2+=src_stride2; \
dst+=dst_stride; \
@@ -353,7 +353,7 @@ static inline void OPNAME ## _no_rnd_pixels16_l4_aligned0(uint8_t *dst, const ui
} \
\
-#define op_avg(a, b) a = rnd_avg2(a,b)
+#define op_avg(a, b) a = rnd_avg32(a,b)
#define op_put(a, b) a = b
PIXOP2(avg, op_avg)