summaryrefslogtreecommitdiff
path: root/libswscale
diff options
context:
space:
mode:
Diffstat (limited to 'libswscale')
-rw-r--r--libswscale/riscv/rgb2rgb.c10
-rw-r--r--libswscale/riscv/rgb2rgb_rvv.S53
2 files changed, 63 insertions, 0 deletions
diff --git a/libswscale/riscv/rgb2rgb.c b/libswscale/riscv/rgb2rgb.c
index 32c1546827..37a2cd5ea1 100644
--- a/libswscale/riscv/rgb2rgb.c
+++ b/libswscale/riscv/rgb2rgb.c
@@ -33,6 +33,12 @@ void ff_shuffle_bytes_3210_rvv(const uint8_t *src, uint8_t *dst, int src_len);
void ff_interleave_bytes_rvv(const uint8_t *src1, const uint8_t *src2,
uint8_t *dst, int width, int height, int s1stride,
int s2stride, int dstride);
+void ff_uyvytoyuv422_rvv(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+ const uint8_t *src, int width, int height,
+ int ystride, int uvstride, int src_stride);
+void ff_yuyvtoyuv422_rvv(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+ const uint8_t *src, int width, int height,
+ int ystride, int uvstride, int src_stride);
av_cold void rgb2rgb_init_riscv(void)
{
@@ -46,6 +52,10 @@ av_cold void rgb2rgb_init_riscv(void)
shuffle_bytes_3012 = ff_shuffle_bytes_3012_rvv;
shuffle_bytes_3210 = ff_shuffle_bytes_3210_rvv;
interleaveBytes = ff_interleave_bytes_rvv;
+#if (__riscv_xlen == 64)
+ uyvytoyuv422 = ff_uyvytoyuv422_rvv;
+ yuyvtoyuv422 = ff_yuyvtoyuv422_rvv;
+#endif
}
#endif
}
diff --git a/libswscale/riscv/rgb2rgb_rvv.S b/libswscale/riscv/rgb2rgb_rvv.S
index 7f8c2efd80..5626d906eb 100644
--- a/libswscale/riscv/rgb2rgb_rvv.S
+++ b/libswscale/riscv/rgb2rgb_rvv.S
@@ -102,3 +102,56 @@ func ff_interleave_bytes_rvv, zve32x
ret
endfunc
+
+#if (__riscv_xlen == 64)
+.macro yuy2_to_i422p v_y0, v_y1, v_u, v_v
+ addi sp, sp, -16
+ sd s0, (sp)
+ sd s1, 8(sp)
+ addi a4, a4, 1
+ lw s0, 16(sp)
+ srai a4, a4, 1 // pixel width -> chroma width
+ li s1, 2
+1:
+ mv t4, a4
+ mv t3, a3
+ mv t0, a0
+ addi t6, a0, 1
+ mv t1, a1
+ mv t2, a2
+ addi a5, a5, -1
+2:
+ vsetvli t5, t4, e8, m1, ta, ma
+ sub t4, t4, t5
+ vlseg4e8.v v8, (t3)
+ sh2add t3, t5, t3
+ vsse8.v \v_y0, (t0), s1
+ sh1add t0, t5, t0
+ vsse8.v \v_y1, (t6), s1
+ sh1add t6, t5, t6
+ vse8.v \v_u, (t1)
+ add t1, t5, t1
+ vse8.v \v_v, (t2)
+ add t2, t5, t2
+ bnez t4, 2b
+
+ add a3, a3, s0
+ add a0, a0, a6
+ add a1, a1, a7
+ add a2, a2, a7
+ bnez a5, 1b
+
+ ld s1, 8(sp)
+ ld s0, (sp)
+ addi sp, sp, 16
+ ret
+.endm
+
+func ff_uyvytoyuv422_rvv, zve32x
+ yuy2_to_i422p v9, v11, v8, v10
+endfunc
+
+func ff_yuyvtoyuv422_rvv, zve32x
+ yuy2_to_i422p v8, v10, v9, v11
+endfunc
+#endif