diff options
Diffstat (limited to 'libavcodec/sh4')
-rw-r--r-- | libavcodec/sh4/dsputil_align.c | 23 | ||||
-rw-r--r-- | libavcodec/sh4/qpel.c | 80 |
2 files changed, 45 insertions, 58 deletions
diff --git a/libavcodec/sh4/dsputil_align.c b/libavcodec/sh4/dsputil_align.c index 46205602cc..a808dd1f82 100644 --- a/libavcodec/sh4/dsputil_align.c +++ b/libavcodec/sh4/dsputil_align.c @@ -26,15 +26,13 @@ #define LP(p) *(uint32_t*)(p) -#define BYTE_VEC(c) ((c)*0x01010101UL) - #define UNPACK(ph,pl,tt0,tt1) do { \ uint32_t t0,t1; t0=tt0;t1=tt1; \ - ph = ( (t0 & ~BYTE_VEC(0x03))>>2) + ( (t1 & ~BYTE_VEC(0x03))>>2); \ - pl = (t0 & BYTE_VEC(0x03)) + (t1 & BYTE_VEC(0x03)); } while(0) + ph = ( (t0 & ~BYTE_VEC32(0x03))>>2) + ( (t1 & ~BYTE_VEC32(0x03))>>2); \ + pl = (t0 & BYTE_VEC32(0x03)) + (t1 & BYTE_VEC32(0x03)); } while(0) -#define rnd_PACK(ph,pl,nph,npl) ph + nph + (((pl + npl + BYTE_VEC(0x02))>>2) & BYTE_VEC(0x03)) -#define no_rnd_PACK(ph,pl,nph,npl) ph + nph + (((pl + npl + BYTE_VEC(0x01))>>2) & BYTE_VEC(0x03)) +#define rnd_PACK(ph,pl,nph,npl) ph + nph + (((pl + npl + BYTE_VEC32(0x02))>>2) & BYTE_VEC32(0x03)) +#define no_rnd_PACK(ph,pl,nph,npl) ph + nph + (((pl + npl + BYTE_VEC32(0x01))>>2) & BYTE_VEC32(0x03)) /* little endian */ #define MERGE1(a,b,ofs) (ofs==0)?a:( ((a)>>(8*ofs))|((b)<<(32-8*ofs)) ) @@ -46,18 +44,7 @@ #define put(d,s) d = s -#define avg(d,s) d = rnd_avg2(s,d) - -static inline uint32_t rnd_avg2(uint32_t a, uint32_t b) -{ - return (a | b) - (((a ^ b) & ~BYTE_VEC(0x01)) >> 1); -} - -static inline uint32_t no_rnd_avg2(uint32_t a, uint32_t b) -{ - return (a & b) + (((a ^ b) & ~BYTE_VEC(0x01)) >> 1); -} - +#define avg(d,s) d = rnd_avg32(s,d) #define OP_C4(ofs) \ ref-=ofs; \ diff --git a/libavcodec/sh4/qpel.c b/libavcodec/sh4/qpel.c index 0085d5f5a1..2e6ac8ce31 100644 --- a/libavcodec/sh4/qpel.c +++ b/libavcodec/sh4/qpel.c @@ -9,8 +9,8 @@ /*static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ do {\ - OP(LP(dst ),no_rnd_avg2(LD32(src1 ),LD32(src2 )) ); \ - OP(LP(dst+4),no_rnd_avg2(LD32(src1+4),LD32(src2+4)) ); \ + OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ + OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \ src1+=src_stride1; \ src2+=src_stride2; \ dst+=dst_stride; \ @@ -20,8 +20,8 @@ static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ do {\ - OP(LP(dst ),rnd_avg2(LD32(src1 ),LD32(src2 )) ); \ - OP(LP(dst+4),rnd_avg2(LD32(src1+4),LD32(src2+4)) ); \ + OP(LP(dst ),rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ + OP(LP(dst+4),rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \ src1+=src_stride1; \ src2+=src_stride2; \ dst+=dst_stride; \ @@ -31,7 +31,7 @@ static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, cons static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ do {\ - OP(LP(dst ),rnd_avg2(LD32(src1 ),LD32(src2 )) ); \ + OP(LP(dst ),rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ src1+=src_stride1; \ src2+=src_stride2; \ dst+=dst_stride; \ @@ -41,10 +41,10 @@ static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, cons static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ do {\ - OP(LP(dst ),no_rnd_avg2(LD32(src1 ),LD32(src2 )) ); \ - OP(LP(dst+4),no_rnd_avg2(LD32(src1+4),LD32(src2+4)) ); \ - OP(LP(dst+8),no_rnd_avg2(LD32(src1+8),LD32(src2+8)) ); \ - OP(LP(dst+12),no_rnd_avg2(LD32(src1+12),LD32(src2+12)) ); \ + OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ + OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \ + OP(LP(dst+8),no_rnd_avg32(LD32(src1+8),LD32(src2+8)) ); \ + OP(LP(dst+12),no_rnd_avg32(LD32(src1+12),LD32(src2+12)) ); \ src1+=src_stride1; \ src2+=src_stride2; \ dst+=dst_stride; \ @@ -54,10 +54,10 @@ static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *sr static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ do {\ - OP(LP(dst ),rnd_avg2(LD32(src1 ),LD32(src2 )) ); \ - OP(LP(dst+4),rnd_avg2(LD32(src1+4),LD32(src2+4)) ); \ - OP(LP(dst+8),rnd_avg2(LD32(src1+8),LD32(src2+8)) ); \ - OP(LP(dst+12),rnd_avg2(LD32(src1+12),LD32(src2+12)) ); \ + OP(LP(dst ),rnd_avg32(LD32(src1 ),LD32(src2 )) ); \ + OP(LP(dst+4),rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \ + OP(LP(dst+8),rnd_avg32(LD32(src1+8),LD32(src2+8)) ); \ + OP(LP(dst+12),rnd_avg32(LD32(src1+12),LD32(src2+12)) ); \ src1+=src_stride1; \ src2+=src_stride2; \ dst+=dst_stride; \ @@ -67,7 +67,7 @@ static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, con static inline void OPNAME ## _pixels4_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ do {\ - OP(LP(dst ),rnd_avg2(LP(src1 ),LP(src2 )) ); \ + OP(LP(dst ),rnd_avg32(LP(src1 ),LP(src2 )) ); \ src1+=src_stride1; \ src2+=src_stride2; \ dst+=dst_stride; \ @@ -77,7 +77,7 @@ static inline void OPNAME ## _pixels4_l2_aligned(uint8_t *dst, const uint8_t *sr static inline void OPNAME ## _pixels4_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ do {\ - OP(LP(dst ),rnd_avg2(LD32(src1 ),LP(src2 )) ); \ + OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \ src1+=src_stride1; \ src2+=src_stride2; \ dst+=dst_stride; \ @@ -87,10 +87,10 @@ static inline void OPNAME ## _pixels4_l2_aligned2(uint8_t *dst, const uint8_t *s static inline void OPNAME ## _no_rnd_pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ do {\ - OP(LP(dst ),no_rnd_avg2(LD32(src1 ),LP(src2 )) ); \ - OP(LP(dst+4),no_rnd_avg2(LD32(src1+4),LP(src2+4)) ); \ - OP(LP(dst+8),no_rnd_avg2(LD32(src1+8),LP(src2+8)) ); \ - OP(LP(dst+12),no_rnd_avg2(LD32(src1+12),LP(src2+12)) ); \ + OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LP(src2 )) ); \ + OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LP(src2+4)) ); \ + OP(LP(dst+8),no_rnd_avg32(LD32(src1+8),LP(src2+8)) ); \ + OP(LP(dst+12),no_rnd_avg32(LD32(src1+12),LP(src2+12)) ); \ src1+=src_stride1; \ src2+=src_stride2; \ dst+=dst_stride; \ @@ -100,10 +100,10 @@ static inline void OPNAME ## _no_rnd_pixels16_l2_aligned2(uint8_t *dst, const ui static inline void OPNAME ## _pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ do {\ - OP(LP(dst ),rnd_avg2(LD32(src1 ),LP(src2 )) ); \ - OP(LP(dst+4),rnd_avg2(LD32(src1+4),LP(src2+4)) ); \ - OP(LP(dst+8),rnd_avg2(LD32(src1+8),LP(src2+8)) ); \ - OP(LP(dst+12),rnd_avg2(LD32(src1+12),LP(src2+12)) ); \ + OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \ + OP(LP(dst+4),rnd_avg32(LD32(src1+4),LP(src2+4)) ); \ + OP(LP(dst+8),rnd_avg32(LD32(src1+8),LP(src2+8)) ); \ + OP(LP(dst+12),rnd_avg32(LD32(src1+12),LP(src2+12)) ); \ src1+=src_stride1; \ src2+=src_stride2; \ dst+=dst_stride; \ @@ -113,8 +113,8 @@ static inline void OPNAME ## _pixels16_l2_aligned2(uint8_t *dst, const uint8_t * static inline void OPNAME ## _no_rnd_pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ do { /* onlye src2 aligned */\ - OP(LP(dst ),no_rnd_avg2(LD32(src1 ),LP(src2 )) ); \ - OP(LP(dst+4),no_rnd_avg2(LD32(src1+4),LP(src2+4)) ); \ + OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LP(src2 )) ); \ + OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LP(src2+4)) ); \ src1+=src_stride1; \ src2+=src_stride2; \ dst+=dst_stride; \ @@ -124,8 +124,8 @@ static inline void OPNAME ## _no_rnd_pixels8_l2_aligned2(uint8_t *dst, const uin static inline void OPNAME ## _pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ do {\ - OP(LP(dst ),rnd_avg2(LD32(src1 ),LP(src2 )) ); \ - OP(LP(dst+4),rnd_avg2(LD32(src1+4),LP(src2+4)) ); \ + OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \ + OP(LP(dst+4),rnd_avg32(LD32(src1+4),LP(src2+4)) ); \ src1+=src_stride1; \ src2+=src_stride2; \ dst+=dst_stride; \ @@ -135,8 +135,8 @@ static inline void OPNAME ## _pixels8_l2_aligned2(uint8_t *dst, const uint8_t *s static inline void OPNAME ## _no_rnd_pixels8_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ do {\ - OP(LP(dst ),no_rnd_avg2(LP(src1 ),LP(src2 )) ); \ - OP(LP(dst+4),no_rnd_avg2(LP(src1+4),LP(src2+4)) ); \ + OP(LP(dst ),no_rnd_avg32(LP(src1 ),LP(src2 )) ); \ + OP(LP(dst+4),no_rnd_avg32(LP(src1+4),LP(src2+4)) ); \ src1+=src_stride1; \ src2+=src_stride2; \ dst+=dst_stride; \ @@ -146,8 +146,8 @@ static inline void OPNAME ## _no_rnd_pixels8_l2_aligned(uint8_t *dst, const uint static inline void OPNAME ## _pixels8_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ do {\ - OP(LP(dst ),rnd_avg2(LP(src1 ),LP(src2 )) ); \ - OP(LP(dst+4),rnd_avg2(LP(src1+4),LP(src2+4)) ); \ + OP(LP(dst ),rnd_avg32(LP(src1 ),LP(src2 )) ); \ + OP(LP(dst+4),rnd_avg32(LP(src1+4),LP(src2+4)) ); \ src1+=src_stride1; \ src2+=src_stride2; \ dst+=dst_stride; \ @@ -157,10 +157,10 @@ static inline void OPNAME ## _pixels8_l2_aligned(uint8_t *dst, const uint8_t *sr static inline void OPNAME ## _no_rnd_pixels16_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ do {\ - OP(LP(dst ),no_rnd_avg2(LP(src1 ),LP(src2 )) ); \ - OP(LP(dst+4),no_rnd_avg2(LP(src1+4),LP(src2+4)) ); \ - OP(LP(dst+8),no_rnd_avg2(LP(src1+8),LP(src2+8)) ); \ - OP(LP(dst+12),no_rnd_avg2(LP(src1+12),LP(src2+12)) ); \ + OP(LP(dst ),no_rnd_avg32(LP(src1 ),LP(src2 )) ); \ + OP(LP(dst+4),no_rnd_avg32(LP(src1+4),LP(src2+4)) ); \ + OP(LP(dst+8),no_rnd_avg32(LP(src1+8),LP(src2+8)) ); \ + OP(LP(dst+12),no_rnd_avg32(LP(src1+12),LP(src2+12)) ); \ src1+=src_stride1; \ src2+=src_stride2; \ dst+=dst_stride; \ @@ -170,10 +170,10 @@ static inline void OPNAME ## _no_rnd_pixels16_l2_aligned(uint8_t *dst, const uin static inline void OPNAME ## _pixels16_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \ {\ do {\ - OP(LP(dst ),rnd_avg2(LP(src1 ),LP(src2 )) ); \ - OP(LP(dst+4),rnd_avg2(LP(src1+4),LP(src2+4)) ); \ - OP(LP(dst+8),rnd_avg2(LP(src1+8),LP(src2+8)) ); \ - OP(LP(dst+12),rnd_avg2(LP(src1+12),LP(src2+12)) ); \ + OP(LP(dst ),rnd_avg32(LP(src1 ),LP(src2 )) ); \ + OP(LP(dst+4),rnd_avg32(LP(src1+4),LP(src2+4)) ); \ + OP(LP(dst+8),rnd_avg32(LP(src1+8),LP(src2+8)) ); \ + OP(LP(dst+12),rnd_avg32(LP(src1+12),LP(src2+12)) ); \ src1+=src_stride1; \ src2+=src_stride2; \ dst+=dst_stride; \ @@ -353,7 +353,7 @@ static inline void OPNAME ## _no_rnd_pixels16_l4_aligned0(uint8_t *dst, const ui } \ \ -#define op_avg(a, b) a = rnd_avg2(a,b) +#define op_avg(a, b) a = rnd_avg32(a,b) #define op_put(a, b) a = b PIXOP2(avg, op_avg) |