;****************************************************************************** ;* SIMD-optimized UTVideo functions ;* Copyright (c) 2017 Paul B Mahol ;* Copyright (c) 2017 Jokyo Images ;* ;* This file is part of FFmpeg. ;* ;* FFmpeg is free software; you can redistribute it and/or ;* modify it under the terms of the GNU Lesser General Public ;* License as published by the Free Software Foundation; either ;* version 2.1 of the License, or (at your option) any later version. ;* ;* FFmpeg is distributed in the hope that it will be useful, ;* but WITHOUT ANY WARRANTY; without even the implied warranty of ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;* Lesser General Public License for more details. ;* ;* You should have received a copy of the GNU Lesser General Public ;* License along with FFmpeg; if not, write to the Free Software ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ;****************************************************************************** %include "libavutil/x86/x86util.asm" SECTION_RODATA cextern pb_80 cextern pw_512 cextern pw_1023 SECTION .text ;------------------------------------------------------------------------------------------- ; void restore_rgb_planes(uint8_t *src_r, uint8_t *src_g, uint8_t *src_b, ; ptrdiff_t linesize_r, ptrdiff_t linesize_g, ptrdiff_t linesize_b, ; int width, int height) ;------------------------------------------------------------------------------------------- %macro RESTORE_RGB_PLANES 0 cglobal restore_rgb_planes, 7 + ARCH_X86_64, 7 + ARCH_X86_64 * 2, 4, src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, w, h, x movsxdifnidn wq, wd add src_rq, wq add src_gq, wq add src_bq, wq neg wq %if ARCH_X86_64 == 0 mov wm, wq DEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x %define wq r6m %define hd r7mp %endif mova m3, [pb_80] .nextrow: mov xq, wq .loop: mova m0, [src_rq + xq] mova m1, [src_gq + xq] mova m2, [src_bq + xq] psubb m1, m3 paddb m0, m1 paddb m2, m1 mova [src_rq+xq], m0 mova [src_bq+xq], m2 add xq, mmsize jl .loop add src_rq, linesize_rq add src_gq, linesize_gq add src_bq, linesize_bq sub hd, 1 jg .nextrow RET %endmacro INIT_XMM sse2 RESTORE_RGB_PLANES %if HAVE_AVX2_EXTERNAL INIT_YMM avx2 RESTORE_RGB_PLANES %endif ;------------------------------------------------------------------------------------------- ; void restore_rgb_planes10(uint16_t *src_r, uint16_t *src_g, uint16_t *src_b, ; ptrdiff_t linesize_r, ptrdiff_t linesize_g, ptrdiff_t linesize_b, ; int width, int height) ;------------------------------------------------------------------------------------------- %macro RESTORE_RGB_PLANES10 0 cglobal restore_rgb_planes10, 7 + ARCH_X86_64, 7 + ARCH_X86_64 * 2, 5, src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, w, h, x shl wd, 1 shl linesize_rq, 1 shl linesize_gq, 1 shl linesize_bq, 1 add src_rq, wq add src_gq, wq add src_bq, wq mova m3, [pw_512] mova m4, [pw_1023] neg wq %if ARCH_X86_64 == 0 mov wm, wq DEFINE_ARGS src_r, src_g, src_b, linesize_r, linesize_g, linesize_b, x %define wq r6m %define hd r7mp %endif .nextrow: mov xq, wq .loop: mova m0, [src_rq + xq] mova m1, [src_gq + xq] mova m2, [src_bq + xq] psubw m1, m3 paddw m0, m1 paddw m2, m1 pand m0, m4 pand m2, m4 mova [src_rq+xq], m0 mova [src_bq+xq], m2 add xq, mmsize jl .loop add src_rq, linesize_rq add src_gq, linesize_gq add src_bq, linesize_bq sub hd, 1 jg .nextrow RET %endmacro INIT_XMM sse2 RESTORE_RGB_PLANES10 %if HAVE_AVX2_EXTERNAL INIT_YMM avx2 RESTORE_RGB_PLANES10 %endif