summaryrefslogtreecommitdiff
path: root/libavutil/mips
diff options
context:
space:
mode:
authorKaustubh Raste <kaustubh.raste@imgtec.com>2017-10-24 12:41:30 +0530
committerMichael Niedermayer <michael@niedermayer.cc>2017-10-25 21:50:37 +0200
commit736a48901fa0061f52d3f6679546d4d6b5fdb510 (patch)
tree3ba9586fbe518131df8f96cda79715bdd44a2f4a /libavutil/mips
parentce0a52e9e92950be9350b09fbdc0b0cfdfb862ec (diff)
downloadffmpeg-736a48901fa0061f52d3f6679546d4d6b5fdb510.tar.gz
avcodec/mips: Improve hevc bi weighted hv mc msa functions
Use immediate unsigned saturation for clip to max saving one vector register. Signed-off-by: Kaustubh Raste <kaustubh.raste@imgtec.com> Reviewed-by: Manojkumar Bhosale <Manojkumar.Bhosale@imgtec.com> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
Diffstat (limited to 'libavutil/mips')
-rw-r--r--libavutil/mips/generic_macros_msa.h35
1 files changed, 35 insertions, 0 deletions
diff --git a/libavutil/mips/generic_macros_msa.h b/libavutil/mips/generic_macros_msa.h
index c892529f05..6a46704663 100644
--- a/libavutil/mips/generic_macros_msa.h
+++ b/libavutil/mips/generic_macros_msa.h
@@ -1088,6 +1088,25 @@
out_m; \
} )
+#define CLIP_SW_0_255_MAX_SATU(in) \
+( { \
+ v4i32 out_m; \
+ \
+ out_m = __msa_maxi_s_w((v4i32) in, 0); \
+ out_m = (v4i32) __msa_sat_u_w((v4u32) out_m, 7); \
+ out_m; \
+} )
+#define CLIP_SW2_0_255_MAX_SATU(in0, in1) \
+{ \
+ in0 = CLIP_SW_0_255_MAX_SATU(in0); \
+ in1 = CLIP_SW_0_255_MAX_SATU(in1); \
+}
+#define CLIP_SW4_0_255_MAX_SATU(in0, in1, in2, in3) \
+{ \
+ CLIP_SW2_0_255_MAX_SATU(in0, in1); \
+ CLIP_SW2_0_255_MAX_SATU(in2, in3); \
+}
+
/* Description : Addition of 4 signed word elements
4 signed word elements of input vector are added together and
resulted integer sum is returned
@@ -2244,6 +2263,22 @@
out3 = in6 - in7; \
}
+/* Description : Sign extend byte elements from right half of the vector
+ Arguments : Input - in (byte vector)
+ Output - out (sign extended halfword vector)
+ Return Type - signed halfword
+ Details : Sign bit of byte elements from input vector 'in' is
+ extracted and interleaved with same vector 'in' to generate
+ 8 halfword elements keeping sign intact
+*/
+#define UNPCK_R_SB_SH(in, out) \
+{ \
+ v16i8 sign_m; \
+ \
+ sign_m = __msa_clti_s_b((v16i8) in, 0); \
+ out = (v8i16) __msa_ilvr_b(sign_m, (v16i8) in); \
+}
+
/* Description : Sign extend halfword elements from right half of the vector
Arguments : Inputs - in (input halfword vector)
Outputs - out (sign extended word vectors)