diff options
Diffstat (limited to 'libswscale')
34 files changed, 1740 insertions, 996 deletions
diff --git a/libswscale/Makefile b/libswscale/Makefile index 0799b458be..dd00f7d708 100644 --- a/libswscale/Makefile +++ b/libswscale/Makefile @@ -1,3 +1,5 @@ +include $(SUBDIR)../config.mak + NAME = swscale FFLIBS = avutil diff --git a/libswscale/bfin/internal_bfin.S b/libswscale/bfin/internal_bfin.S index b007f07f53..eab30aa6ce 100644 --- a/libswscale/bfin/internal_bfin.S +++ b/libswscale/bfin/internal_bfin.S @@ -5,20 +5,20 @@ * Blackfin video color space converter operations * convert I420 YV12 to RGB in various formats * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/libswscale/bfin/swscale_bfin.c b/libswscale/bfin/swscale_bfin.c index f2fe871fc8..a51b2af56c 100644 --- a/libswscale/bfin/swscale_bfin.c +++ b/libswscale/bfin/swscale_bfin.c @@ -3,20 +3,20 @@ * * Blackfin software video scaler operations * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/libswscale/bfin/yuv2rgb_bfin.c b/libswscale/bfin/yuv2rgb_bfin.c index 4078a18660..dae7f314e7 100644 --- a/libswscale/bfin/yuv2rgb_bfin.c +++ b/libswscale/bfin/yuv2rgb_bfin.c @@ -4,23 +4,24 @@ * Blackfin video color space converter operations * convert I420 YV12 to RGB in various formats * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "libavutil/pixdesc.h" #include <stdint.h> #include "config.h" @@ -195,7 +196,7 @@ SwsFunc ff_yuv2rgb_get_func_ptr_bfin(SwsContext *c) } av_log(c, AV_LOG_INFO, "BlackFin accelerated color space converter %s\n", - sws_format_name(c->dstFormat)); + av_get_pix_fmt_name(c->dstFormat)); return f; } diff --git a/libswscale/colorspace-test.c b/libswscale/colorspace-test.c index 135924c1ab..6d16785b12 100644 --- a/libswscale/colorspace-test.c +++ b/libswscale/colorspace-test.c @@ -1,20 +1,20 @@ /* * Copyright (C) 2002 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -33,7 +33,7 @@ #define FUNC(s, d, n) { s, d, #n, n } -int main(void) +int main(int argc, char **argv) { int i, funcNum; uint8_t *srcBuffer = av_malloc(SIZE); @@ -54,6 +54,7 @@ int main(void) const char *name; void (*func)(const uint8_t *src, uint8_t *dst, int src_size); } func_info[] = { + FUNC(2, 2, rgb12to15), FUNC(2, 2, rgb15to16), FUNC(2, 3, rgb15to24), FUNC(2, 4, rgb15to32), @@ -66,6 +67,7 @@ int main(void) FUNC(4, 2, rgb32to16), FUNC(4, 3, rgb32to24), FUNC(2, 2, rgb16to15), + FUNC(2, 2, rgb12tobgr12), FUNC(2, 2, rgb15tobgr15), FUNC(2, 2, rgb15tobgr16), FUNC(2, 3, rgb15tobgr24), @@ -82,6 +84,12 @@ int main(void) FUNC(4, 2, rgb32tobgr16), FUNC(4, 3, rgb32tobgr24), FUNC(4, 4, shuffle_bytes_2103), /* rgb32tobgr32 */ + FUNC(6, 6, rgb48tobgr48_nobswap), + FUNC(6, 6, rgb48tobgr48_bswap), + FUNC(8, 6, rgb64to48_nobswap), + FUNC(8, 6, rgb64to48_bswap), + FUNC(8, 6, rgb64tobgr48_nobswap), + FUNC(8, 6, rgb64tobgr48_bswap), FUNC(0, 0, NULL) }; int width; diff --git a/libswscale/input.c b/libswscale/input.c index 40ed122427..9234646435 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -1,20 +1,20 @@ /* - * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> + * Copyright (C) 2001-2012 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -30,6 +30,7 @@ #include "libavutil/intreadwrite.h" #include "libavutil/mathematics.h" #include "libavutil/pixdesc.h" +#include "libavutil/avassert.h" #include "config.h" #include "rgb2rgb.h" #include "swscale.h" @@ -51,6 +52,86 @@ #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b) #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r) +static av_always_inline void +rgb64ToY_c_template(uint16_t *dst, const uint16_t *src, int width, + enum PixelFormat origin) +{ + int i; + for (i = 0; i < width; i++) { + unsigned int r_b = input_pixel(&src[i*4+0]); + unsigned int g = input_pixel(&src[i*4+1]); + unsigned int b_r = input_pixel(&src[i*4+2]); + + dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + } +} + +static av_always_inline void +rgb64ToUV_c_template(uint16_t *dstU, uint16_t *dstV, + const uint16_t *src1, const uint16_t *src2, + int width, enum PixelFormat origin) +{ + int i; + av_assert1(src1==src2); + for (i = 0; i < width; i++) { + int r_b = input_pixel(&src1[i*4+0]); + int g = input_pixel(&src1[i*4+1]); + int b_r = input_pixel(&src1[i*4+2]); + + dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + } +} + +static av_always_inline void +rgb64ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV, + const uint16_t *src1, const uint16_t *src2, + int width, enum PixelFormat origin) +{ + int i; + av_assert1(src1==src2); + for (i = 0; i < width; i++) { + int r_b = (input_pixel(&src1[8 * i + 0]) + input_pixel(&src1[8 * i + 4]) + 1) >> 1; + int g = (input_pixel(&src1[8 * i + 1]) + input_pixel(&src1[8 * i + 5]) + 1) >> 1; + int b_r = (input_pixel(&src1[8 * i + 2]) + input_pixel(&src1[8 * i + 6]) + 1) >> 1; + + dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT; + } +} + +#define rgb64funcs(pattern, BE_LE, origin) \ +static void pattern ## 64 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused0, const uint8_t *unused1,\ + int width, uint32_t *unused) \ +{ \ + const uint16_t *src = (const uint16_t *) _src; \ + uint16_t *dst = (uint16_t *) _dst; \ + rgb64ToY_c_template(dst, src, width, origin); \ +} \ + \ +static void pattern ## 64 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \ + const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \ + int width, uint32_t *unused) \ +{ \ + const uint16_t *src1 = (const uint16_t *) _src1, \ + *src2 = (const uint16_t *) _src2; \ + uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \ + rgb64ToUV_c_template(dstU, dstV, src1, src2, width, origin); \ +} \ + \ +static void pattern ## 64 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \ + const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \ + int width, uint32_t *unused) \ +{ \ + const uint16_t *src1 = (const uint16_t *) _src1, \ + *src2 = (const uint16_t *) _src2; \ + uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \ + rgb64ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \ +} + +rgb64funcs(rgb, LE, PIX_FMT_RGBA64LE) +rgb64funcs(rgb, BE, PIX_FMT_RGBA64BE) + static av_always_inline void rgb48ToY_c_template(uint16_t *dst, const uint16_t *src, int width, enum PixelFormat origin) @@ -73,7 +154,7 @@ static av_always_inline void rgb48ToUV_c_template(uint16_t *dstU, enum PixelFormat origin) { int i; - assert(src1 == src2); + av_assert1(src1 == src2); for (i = 0; i < width; i++) { int r_b = input_pixel(&src1[i * 3 + 0]); int g = input_pixel(&src1[i * 3 + 1]); @@ -92,7 +173,7 @@ static av_always_inline void rgb48ToUV_half_c_template(uint16_t *dstU, enum PixelFormat origin) { int i; - assert(src1 == src2); + av_assert1(src1 == src2); for (i = 0; i < width; i++) { int r_b = (input_pixel(&src1[6 * i + 0]) + input_pixel(&src1[6 * i + 3]) + 1) >> 1; @@ -113,6 +194,7 @@ static av_always_inline void rgb48ToUV_half_c_template(uint16_t *dstU, #define rgb48funcs(pattern, BE_LE, origin) \ static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, \ const uint8_t *_src, \ + const uint8_t *unused0, const uint8_t *unused1,\ int width, \ uint32_t *unused) \ { \ @@ -123,6 +205,7 @@ static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, \ \ static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, \ uint8_t *_dstV, \ + const uint8_t *unused0, \ const uint8_t *_src1, \ const uint8_t *_src2, \ int width, \ @@ -137,6 +220,7 @@ static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, \ \ static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, \ uint8_t *_dstV, \ + const uint8_t *unused0, \ const uint8_t *_src1, \ const uint8_t *_src2, \ int width, \ @@ -162,7 +246,7 @@ rgb48funcs(bgr, BE, PIX_FMT_BGR48BE) : (isBE(origin) ? AV_RB16(&src[(i) * 2]) \ : AV_RL16(&src[(i) * 2]))) -static av_always_inline void rgb16_32ToY_c_template(uint8_t *dst, +static av_always_inline void rgb16_32ToY_c_template(int16_t *dst, const uint8_t *src, int width, enum PixelFormat origin, @@ -173,7 +257,7 @@ static av_always_inline void rgb16_32ToY_c_template(uint8_t *dst, int gsh, int bsh, int S) { const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh; - const unsigned rnd = 33u << (S - 1); + const unsigned rnd = (32<<((S)-1)) + (1<<(S-7)); int i; for (i = 0; i < width; i++) { @@ -182,12 +266,12 @@ static av_always_inline void rgb16_32ToY_c_template(uint8_t *dst, int g = (px & maskg) >> shg; int r = (px & maskr) >> shr; - dst[i] = (ry * r + gy * g + by * b + rnd) >> S; + dst[i] = (ry * r + gy * g + by * b + rnd) >> ((S)-6); } } -static av_always_inline void rgb16_32ToUV_c_template(uint8_t *dstU, - uint8_t *dstV, +static av_always_inline void rgb16_32ToUV_c_template(int16_t *dstU, + int16_t *dstV, const uint8_t *src, int width, enum PixelFormat origin, @@ -199,7 +283,7 @@ static av_always_inline void rgb16_32ToUV_c_template(uint8_t *dstU, { const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh, rv = RV << rsh, gv = GV << gsh, bv = BV << bsh; - const unsigned rnd = 257u << (S - 1); + const unsigned rnd = (256u<<((S)-1)) + (1<<(S-7)); int i; for (i = 0; i < width; i++) { @@ -208,13 +292,13 @@ static av_always_inline void rgb16_32ToUV_c_template(uint8_t *dstU, int g = (px & maskg) >> shg; int r = (px & maskr) >> shr; - dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S; - dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S; + dstU[i] = (ru * r + gu * g + bu * b + rnd) >> ((S)-6); + dstV[i] = (rv * r + gv * g + bv * b + rnd) >> ((S)-6); } } -static av_always_inline void rgb16_32ToUV_half_c_template(uint8_t *dstU, - uint8_t *dstV, +static av_always_inline void rgb16_32ToUV_half_c_template(int16_t *dstU, + int16_t *dstV, const uint8_t *src, int width, enum PixelFormat origin, @@ -227,7 +311,7 @@ static av_always_inline void rgb16_32ToUV_half_c_template(uint8_t *dstU, const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh, rv = RV << rsh, gv = GV << gsh, bv = BV << bsh, maskgx = ~(maskr | maskb); - const unsigned rnd = 257u << S; + const unsigned rnd = (256U<<(S)) + (1<<(S-6)); int i; maskr |= maskr << 1; @@ -249,8 +333,8 @@ static av_always_inline void rgb16_32ToUV_half_c_template(uint8_t *dstU, } r = (rb & maskr) >> shr; - dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1); - dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1); + dstU[i] = (ru * r + gu * g + bu * b + (unsigned)rnd) >> ((S)-6+1); + dstV[i] = (rv * r + gv * g + bv * b + (unsigned)rnd) >> ((S)-6+1); } } @@ -258,28 +342,28 @@ static av_always_inline void rgb16_32ToUV_half_c_template(uint8_t *dstU, #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \ maskg, maskb, rsh, gsh, bsh, S) \ -static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \ +static void name ## ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, \ int width, uint32_t *unused) \ { \ - rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \ + rgb16_32ToY_c_template((int16_t*)dst, src, width, fmt, shr, shg, shb, shp, \ maskr, maskg, maskb, rsh, gsh, bsh, S); \ } \ \ static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \ - const uint8_t *src, const uint8_t *dummy, \ + const uint8_t *unused0, const uint8_t *src, const uint8_t *dummy, \ int width, uint32_t *unused) \ { \ - rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, \ + rgb16_32ToUV_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \ shr, shg, shb, shp, \ maskr, maskg, maskb, rsh, gsh, bsh, S); \ } \ \ static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \ - const uint8_t *src, \ + const uint8_t *unused0, const uint8_t *src, \ const uint8_t *dummy, \ int width, uint32_t *unused) \ { \ - rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, \ + rgb16_32ToUV_half_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \ shr, shg, shb, shp, \ maskr, maskg, maskb, \ rsh, gsh, bsh, S); \ @@ -302,71 +386,124 @@ rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x0 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT + 7) rgb16_32_wrapper(PIX_FMT_RGB444BE, rgb12be, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT + 4) -static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, - uint32_t *unused) +static void gbr24pToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, + const uint8_t *gsrc, const uint8_t *bsrc, const uint8_t *rsrc, + int width, uint32_t *unused) { + uint16_t *dstU = (uint16_t *)_dstU; + uint16_t *dstV = (uint16_t *)_dstV; int i; - for (i = 0; i < width; i++) - dst[i] = src[4 * i]; + for (i = 0; i < width; i++) { + unsigned int g = gsrc[2*i] + gsrc[2*i+1]; + unsigned int b = bsrc[2*i] + bsrc[2*i+1]; + unsigned int r = rsrc[2*i] + rsrc[2*i+1]; + + dstU[i] = (RU*r + GU*g + BU*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1); + dstV[i] = (RV*r + GV*g + BV*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1); + } } -static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, - uint32_t *unused) +static void rgba64ToA_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, + const uint8_t *unused2, int width, uint32_t *unused) { + int16_t *dst = (int16_t *)_dst; + const uint16_t *src = (const uint16_t *)_src; int i; for (i = 0; i < width; i++) dst[i] = src[4 * i + 3]; } -static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal) +static void abgrToA_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) +{ + int16_t *dst = (int16_t *)_dst; + int i; + for (i=0; i<width; i++) { + dst[i]= src[4*i]<<6; + } +} + +static void rgbaToA_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) +{ + int16_t *dst = (int16_t *)_dst; + int i; + for (i=0; i<width; i++) { + dst[i]= src[4*i+3]<<6; + } +} + +static void palToA_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *pal) +{ + int16_t *dst = (int16_t *)_dst; + int i; + for (i=0; i<width; i++) { + int d= src[i]; + + dst[i]= (pal[d] >> 24)<<6; + } +} + +static void palToY_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *pal) { + int16_t *dst = (int16_t *)_dst; int i; for (i = 0; i < width; i++) { int d = src[i]; - dst[i] = pal[d] & 0xFF; + dst[i] = (pal[d] & 0xFF)<<6; } } -static void palToUV_c(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, +static void palToUV_c(uint8_t *_dstU, uint8_t *_dstV, + const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *pal) { + uint16_t *dstU = (uint16_t *)_dstU; + int16_t *dstV = (int16_t *)_dstV; int i; - assert(src1 == src2); + av_assert1(src1 == src2); for (i = 0; i < width; i++) { int p = pal[src1[i]]; - dstU[i] = p >> 8; - dstV[i] = p >> 16; + dstU[i] = (uint8_t)(p>> 8)<<6; + dstV[i] = (uint8_t)(p>>16)<<6; } } -static void monowhite2Y_c(uint8_t *dst, const uint8_t *src, - int width, uint32_t *unused) +static void monowhite2Y_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) { + int16_t *dst = (int16_t *)_dst; int i, j; width = (width + 7) >> 3; for (i = 0; i < width; i++) { int d = ~src[i]; for (j = 0; j < 8; j++) - dst[8 * i + j] = ((d >> (7 - j)) & 1) * 255; + dst[8*i+j]= ((d>>(7-j))&1) * 16383; + } + if(width&7){ + int d= ~src[i]; + for (j = 0; j < (width&7); j++) + dst[8*i+j]= ((d>>(7-j))&1) * 16383; } } -static void monoblack2Y_c(uint8_t *dst, const uint8_t *src, - int width, uint32_t *unused) +static void monoblack2Y_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) { + int16_t *dst = (int16_t *)_dst; int i, j; width = (width + 7) >> 3; for (i = 0; i < width; i++) { int d = src[i]; for (j = 0; j < 8; j++) - dst[8 * i + j] = ((d >> (7 - j)) & 1) * 255; + dst[8*i+j]= ((d>>(7-j))&1) * 16383; + } + if(width&7){ + int d = src[i]; + for (j = 0; j < (width&7); j++) + dst[8*i+j] = ((d>>(7-j))&1) * 16383; } } -static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width, +static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) { int i; @@ -374,7 +511,7 @@ static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width, dst[i] = src[2 * i]; } -static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, +static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) { int i; @@ -382,10 +519,10 @@ static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, dstU[i] = src1[4 * i + 1]; dstV[i] = src1[4 * i + 3]; } - assert(src1 == src2); + av_assert1(src1 == src2); } -static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, +static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) { int i; @@ -395,7 +532,7 @@ static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, dst[i] = av_bswap16(src[i]); } -static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1, +static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, int width, uint32_t *unused) { int i; @@ -410,7 +547,7 @@ static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1, /* This is almost identical to the previous, end exists only because * yuy2ToY/UV)(dst, src + 1, ...) would have 100% unaligned accesses. */ -static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width, +static void uyvyToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) { int i; @@ -418,7 +555,7 @@ static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width, dst[i] = src[2 * i + 1]; } -static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, +static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) { int i; @@ -426,7 +563,7 @@ static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, dstU[i] = src1[4 * i + 0]; dstV[i] = src1[4 * i + 2]; } - assert(src1 == src2); + av_assert1(src1 == src2); } static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2, @@ -440,14 +577,14 @@ static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2, } static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, + const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) { nvXXtoUV_c(dstU, dstV, src1, width); } static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, + const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) { nvXXtoUV_c(dstV, dstU, src1, width); @@ -455,114 +592,127 @@ static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV, #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos)) -static void bgr24ToY_c(uint8_t *dst, const uint8_t *src, +static void bgr24ToY_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) { + int16_t *dst = (int16_t *)_dst; int i; for (i = 0; i < width; i++) { int b = src[i * 3 + 0]; int g = src[i * 3 + 1]; int r = src[i * 3 + 2]; - dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT); + dst[i] = ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6)); } } -static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, +static void bgr24ToUV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) { + int16_t *dstU = (int16_t *)_dstU; + int16_t *dstV = (int16_t *)_dstV; int i; for (i = 0; i < width; i++) { int b = src1[3 * i + 0]; int g = src1[3 * i + 1]; int r = src1[3 * i + 2]; - dstU[i] = (RU * r + GU * g + BU * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; - dstV[i] = (RV * r + GV * g + BV * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; + dstU[i] = (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); + dstV[i] = (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); } - assert(src1 == src2); + av_assert1(src1 == src2); } -static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, +static void bgr24ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) { + int16_t *dstU = (int16_t *)_dstU; + int16_t *dstV = (int16_t *)_dstV; int i; for (i = 0; i < width; i++) { int b = src1[6 * i + 0] + src1[6 * i + 3]; int g = src1[6 * i + 1] + src1[6 * i + 4]; int r = src1[6 * i + 2] + src1[6 * i + 5]; - dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1); - dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1); + dstU[i] = (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5); + dstV[i] = (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5); } - assert(src1 == src2); + av_assert1(src1 == src2); } -static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width, +static void rgb24ToY_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) { + int16_t *dst = (int16_t *)_dst; int i; for (i = 0; i < width; i++) { int r = src[i * 3 + 0]; int g = src[i * 3 + 1]; int b = src[i * 3 + 2]; - dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT); + dst[i] = ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6)); } } -static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, +static void rgb24ToUV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) { + int16_t *dstU = (int16_t *)_dstU; + int16_t *dstV = (int16_t *)_dstV; int i; - assert(src1 == src2); + av_assert1(src1 == src2); for (i = 0; i < width; i++) { int r = src1[3 * i + 0]; int g = src1[3 * i + 1]; int b = src1[3 * i + 2]; - dstU[i] = (RU * r + GU * g + BU * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; - dstV[i] = (RV * r + GV * g + BV * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; + dstU[i] = (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); + dstV[i] = (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); } } -static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, +static void rgb24ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) { + int16_t *dstU = (int16_t *)_dstU; + int16_t *dstV = (int16_t *)_dstV; int i; - assert(src1 == src2); + av_assert1(src1 == src2); for (i = 0; i < width; i++) { int r = src1[6 * i + 0] + src1[6 * i + 3]; int g = src1[6 * i + 1] + src1[6 * i + 4]; int b = src1[6 * i + 2] + src1[6 * i + 5]; - dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1); - dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1); + dstU[i] = (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5); + dstV[i] = (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5); } } -static void planar_rgb_to_y(uint8_t *dst, const uint8_t *src[4], int width) +static void planar_rgb_to_y(uint8_t *_dst, const uint8_t *src[4], int width) { + uint16_t *dst = (uint16_t *)_dst; int i; for (i = 0; i < width; i++) { int g = src[0][i]; int b = src[1][i]; int r = src[2][i]; - dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT); + dst[i] = (RY*r + GY*g + BY*b + (0x801<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6); } } -static void planar_rgb_to_uv(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4], int width) +static void planar_rgb_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *src[4], int width) { + uint16_t *dstU = (uint16_t *)_dstU; + uint16_t *dstV = (uint16_t *)_dstV; int i; for (i = 0; i < width; i++) { int g = src[0][i]; int b = src[1][i]; int r = src[2][i]; - dstU[i] = (RU * r + GU * g + BU * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; - dstV[i] = (RV * r + GV * g + BV * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT; + dstU[i] = (RU*r + GU*g + BU*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6); + dstV[i] = (RV*r + GV*g + BV*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6); } } @@ -603,6 +753,26 @@ static void planar_rgb10be_to_y(uint8_t *dst, const uint8_t *src[4], int w) planar_rgb16_to_y(dst, src, w, 10, 1); } +static void planar_rgb12le_to_y(uint8_t *dst, const uint8_t *src[4], int w) +{ + planar_rgb16_to_y(dst, src, w, 12, 0); +} + +static void planar_rgb12be_to_y(uint8_t *dst, const uint8_t *src[4], int w) +{ + planar_rgb16_to_y(dst, src, w, 12, 1); +} + +static void planar_rgb14le_to_y(uint8_t *dst, const uint8_t *src[4], int w) +{ + planar_rgb16_to_y(dst, src, w, 14, 0); +} + +static void planar_rgb14be_to_y(uint8_t *dst, const uint8_t *src[4], int w) +{ + planar_rgb16_to_y(dst, src, w, 14, 1); +} + static void planar_rgb16le_to_y(uint8_t *dst, const uint8_t *src[4], int w) { planar_rgb16_to_y(dst, src, w, 16, 0); @@ -656,6 +826,30 @@ static void planar_rgb10be_to_uv(uint8_t *dstU, uint8_t *dstV, planar_rgb16_to_uv(dstU, dstV, src, w, 10, 1); } +static void planar_rgb12le_to_uv(uint8_t *dstU, uint8_t *dstV, + const uint8_t *src[4], int w) +{ + planar_rgb16_to_uv(dstU, dstV, src, w, 12, 0); +} + +static void planar_rgb12be_to_uv(uint8_t *dstU, uint8_t *dstV, + const uint8_t *src[4], int w) +{ + planar_rgb16_to_uv(dstU, dstV, src, w, 12, 1); +} + +static void planar_rgb14le_to_uv(uint8_t *dstU, uint8_t *dstV, + const uint8_t *src[4], int w) +{ + planar_rgb16_to_uv(dstU, dstV, src, w, 14, 0); +} + +static void planar_rgb14be_to_uv(uint8_t *dstU, uint8_t *dstV, + const uint8_t *src[4], int w) +{ + planar_rgb16_to_uv(dstU, dstV, src, w, 14, 1); +} + static void planar_rgb16le_to_uv(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4], int w) { @@ -699,6 +893,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case PIX_FMT_GBRP10LE: c->readChrPlanar = planar_rgb10le_to_uv; break; + case PIX_FMT_GBRP12LE: + c->readChrPlanar = planar_rgb12le_to_uv; + break; + case PIX_FMT_GBRP14LE: + c->readChrPlanar = planar_rgb14le_to_uv; + break; case PIX_FMT_GBRP16LE: c->readChrPlanar = planar_rgb16le_to_uv; break; @@ -708,6 +908,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case PIX_FMT_GBRP10BE: c->readChrPlanar = planar_rgb10be_to_uv; break; + case PIX_FMT_GBRP12BE: + c->readChrPlanar = planar_rgb12be_to_uv; + break; + case PIX_FMT_GBRP14BE: + c->readChrPlanar = planar_rgb14be_to_uv; + break; case PIX_FMT_GBRP16BE: c->readChrPlanar = planar_rgb16be_to_uv; break; @@ -721,6 +927,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case PIX_FMT_YUV422P10LE: case PIX_FMT_YUV444P10LE: case PIX_FMT_YUV420P10LE: + case PIX_FMT_YUV422P12LE: + case PIX_FMT_YUV444P12LE: + case PIX_FMT_YUV420P12LE: + case PIX_FMT_YUV422P14LE: + case PIX_FMT_YUV444P14LE: + case PIX_FMT_YUV420P14LE: case PIX_FMT_YUV420P16LE: case PIX_FMT_YUV422P16LE: case PIX_FMT_YUV444P16LE: @@ -733,6 +945,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case PIX_FMT_YUV444P10BE: case PIX_FMT_YUV422P10BE: case PIX_FMT_YUV420P10BE: + case PIX_FMT_YUV444P12BE: + case PIX_FMT_YUV422P12BE: + case PIX_FMT_YUV420P12BE: + case PIX_FMT_YUV444P14BE: + case PIX_FMT_YUV422P14BE: + case PIX_FMT_YUV420P14BE: case PIX_FMT_YUV420P16BE: case PIX_FMT_YUV422P16BE: case PIX_FMT_YUV444P16BE: @@ -742,6 +960,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) } if (c->chrSrcHSubSample) { switch (srcFormat) { + case PIX_FMT_RGBA64BE: + c->chrToYV12 = rgb64BEToUV_half_c; + break; + case PIX_FMT_RGBA64LE: + c->chrToYV12 = rgb64LEToUV_half_c; + break; case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_half_c; break; @@ -775,6 +999,9 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break; + case PIX_FMT_GBR24P : + c->chrToYV12 = gbr24pToUV_half_c; + break; case PIX_FMT_BGR444LE: c->chrToYV12 = bgr12leToUV_half_c; break; @@ -811,6 +1038,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) } } else { switch (srcFormat) { + case PIX_FMT_RGBA64BE: + c->chrToYV12 = rgb64BEToUV_c; + break; + case PIX_FMT_RGBA64LE: + c->chrToYV12 = rgb64LEToUV_c; + break; case PIX_FMT_RGB48BE: c->chrToYV12 = rgb48BEToUV_c; break; @@ -889,6 +1122,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case PIX_FMT_GBRP10LE: c->readLumPlanar = planar_rgb10le_to_y; break; + case PIX_FMT_GBRP12LE: + c->readLumPlanar = planar_rgb12le_to_y; + break; + case PIX_FMT_GBRP14LE: + c->readLumPlanar = planar_rgb14le_to_y; + break; case PIX_FMT_GBRP16LE: c->readLumPlanar = planar_rgb16le_to_y; break; @@ -898,6 +1137,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case PIX_FMT_GBRP10BE: c->readLumPlanar = planar_rgb10be_to_y; break; + case PIX_FMT_GBRP12BE: + c->readLumPlanar = planar_rgb12be_to_y; + break; + case PIX_FMT_GBRP14BE: + c->readLumPlanar = planar_rgb14be_to_y; + break; case PIX_FMT_GBRP16BE: c->readLumPlanar = planar_rgb16be_to_y; break; @@ -911,6 +1156,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case PIX_FMT_YUV444P10LE: case PIX_FMT_YUV422P10LE: case PIX_FMT_YUV420P10LE: + case PIX_FMT_YUV444P12LE: + case PIX_FMT_YUV422P12LE: + case PIX_FMT_YUV420P12LE: + case PIX_FMT_YUV444P14LE: + case PIX_FMT_YUV422P14LE: + case PIX_FMT_YUV420P14LE: case PIX_FMT_YUV420P16LE: case PIX_FMT_YUV422P16LE: case PIX_FMT_YUV444P16LE: @@ -924,6 +1175,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case PIX_FMT_YUV444P10BE: case PIX_FMT_YUV422P10BE: case PIX_FMT_YUV420P10BE: + case PIX_FMT_YUV444P12BE: + case PIX_FMT_YUV422P12BE: + case PIX_FMT_YUV420P12BE: + case PIX_FMT_YUV444P14BE: + case PIX_FMT_YUV422P14BE: + case PIX_FMT_YUV420P14BE: case PIX_FMT_YUV420P16BE: case PIX_FMT_YUV422P16BE: case PIX_FMT_YUV444P16BE: @@ -1017,9 +1274,17 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break; + case PIX_FMT_RGBA64BE: + c->lumToYV12 = rgb64BEToY_c; + break; + case PIX_FMT_RGBA64LE: + c->lumToYV12 = rgb64LEToY_c; + break; } if (c->alpPixBuf) { switch (srcFormat) { + case PIX_FMT_RGBA64LE: + case PIX_FMT_RGBA64BE: c->alpToYV12 = rgba64ToA_c; break; case PIX_FMT_BGRA: case PIX_FMT_RGBA: c->alpToYV12 = rgbaToA_c; @@ -1031,6 +1296,9 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break; + case PIX_FMT_PAL8 : + c->alpToYV12 = palToA_c; + break; } } } diff --git a/libswscale/options.c b/libswscale/options.c index 7ed5254aa8..c02c084a87 100644 --- a/libswscale/options.c +++ b/libswscale/options.c @@ -1,20 +1,20 @@ /* * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -66,7 +66,12 @@ static const AVOption options[] = { { NULL } }; -const AVClass sws_context_class = { "SWScaler", sws_context_to_name, options }; +const AVClass sws_context_class = { + .class_name = "SWScaler", + .item_name = sws_context_to_name, + .option = options, + .category = AV_CLASS_CATEGORY_SWSCALER, +}; const AVClass *sws_get_class(void) { diff --git a/libswscale/output.c b/libswscale/output.c index 43d5435b98..8955665f66 100644 --- a/libswscale/output.c +++ b/libswscale/output.c @@ -1,20 +1,20 @@ /* - * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> + * Copyright (C) 2001-2012 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -26,6 +26,7 @@ #include "libavutil/attributes.h" #include "libavutil/avutil.h" +#include "libavutil/avassert.h" #include "libavutil/bswap.h" #include "libavutil/cpu.h" #include "libavutil/intreadwrite.h" @@ -36,24 +37,27 @@ #include "swscale.h" #include "swscale_internal.h" -DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={ +DECLARE_ALIGNED(8, const uint8_t, dither_2x2_4)[][8]={ { 1, 3, 1, 3, 1, 3, 1, 3, }, { 2, 0, 2, 0, 2, 0, 2, 0, }, +{ 1, 3, 1, 3, 1, 3, 1, 3, }, }; -DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={ +DECLARE_ALIGNED(8, const uint8_t, dither_2x2_8)[][8]={ { 6, 2, 6, 2, 6, 2, 6, 2, }, { 0, 4, 0, 4, 0, 4, 0, 4, }, +{ 6, 2, 6, 2, 6, 2, 6, 2, }, }; -DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={ +DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[][8]={ { 8, 4, 11, 7, 8, 4, 11, 7, }, { 2, 14, 1, 13, 2, 14, 1, 13, }, { 10, 6, 9, 5, 10, 6, 9, 5, }, { 0, 12, 3, 15, 0, 12, 3, 15, }, +{ 8, 4, 11, 7, 8, 4, 11, 7, }, }; -DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={ +DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[][8]={ { 17, 9, 23, 15, 16, 8, 22, 14, }, { 5, 29, 3, 27, 4, 28, 2, 26, }, { 21, 13, 19, 11, 20, 12, 18, 10, }, @@ -62,9 +66,10 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={ { 4, 28, 2, 26, 5, 29, 3, 27, }, { 20, 12, 18, 10, 21, 13, 19, 11, }, { 1, 25, 7, 31, 0, 24, 6, 30, }, +{ 17, 9, 23, 15, 16, 8, 22, 14, }, }; -DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={ +DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[][8]={ { 0, 55, 14, 68, 3, 58, 17, 72, }, { 37, 18, 50, 32, 40, 22, 54, 35, }, { 9, 64, 5, 59, 13, 67, 8, 63, }, @@ -73,10 +78,11 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={ { 39, 21, 52, 34, 38, 19, 51, 33, }, { 11, 66, 7, 62, 10, 65, 6, 60, }, { 48, 30, 43, 25, 47, 29, 42, 24, }, +{ 0, 55, 14, 68, 3, 58, 17, 72, }, }; #if 1 -DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={ +DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[][8]={ {117, 62, 158, 103, 113, 58, 155, 100, }, { 34, 199, 21, 186, 31, 196, 17, 182, }, {144, 89, 131, 76, 141, 86, 127, 72, }, @@ -85,10 +91,11 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={ { 28, 193, 14, 179, 38, 203, 24, 189, }, {138, 83, 124, 69, 148, 93, 134, 79, }, { 7, 172, 48, 213, 3, 168, 45, 210, }, +{117, 62, 158, 103, 113, 58, 155, 100, }, }; #elif 1 // tries to correct a gamma of 1.5 -DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={ +DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[][8]={ { 0, 143, 18, 200, 2, 156, 25, 215, }, { 78, 28, 125, 64, 89, 36, 138, 74, }, { 10, 180, 3, 161, 16, 195, 8, 175, }, @@ -97,10 +104,11 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={ { 85, 33, 134, 71, 81, 30, 130, 67, }, { 14, 190, 6, 171, 12, 185, 5, 166, }, {117, 57, 101, 44, 113, 54, 97, 41, }, +{ 0, 143, 18, 200, 2, 156, 25, 215, }, }; #elif 1 // tries to correct a gamma of 2.0 -DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={ +DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[][8]={ { 0, 124, 8, 193, 0, 140, 12, 213, }, { 55, 14, 104, 42, 66, 19, 119, 52, }, { 3, 168, 1, 145, 6, 187, 3, 162, }, @@ -109,10 +117,11 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={ { 62, 17, 114, 48, 58, 16, 109, 45, }, { 5, 181, 2, 157, 4, 175, 1, 151, }, { 95, 36, 78, 26, 90, 34, 74, 24, }, +{ 0, 124, 8, 193, 0, 140, 12, 213, }, }; #else // tries to correct a gamma of 2.5 -DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={ +DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[][8]={ { 0, 107, 3, 187, 0, 125, 6, 212, }, { 39, 7, 86, 28, 49, 11, 102, 36, }, { 1, 158, 0, 131, 3, 180, 1, 151, }, @@ -121,6 +130,7 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={ { 45, 9, 96, 33, 42, 8, 91, 30, }, { 2, 172, 1, 144, 2, 165, 0, 137, }, { 77, 23, 60, 15, 72, 21, 56, 14, }, +{ 0, 107, 3, 187, 0, 125, 6, 212, }, }; #endif @@ -136,7 +146,8 @@ yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW, int big_endian, int output_bits) { int i; - int shift = 19 - output_bits; + int shift = 3; + av_assert0(output_bits == 16); for (i = 0; i < dstW; i++) { int val = src[i] + (1 << (shift - 1)); @@ -150,10 +161,11 @@ yuv2planeX_16_c_template(const int16_t *filter, int filterSize, int big_endian, int output_bits) { int i; - int shift = 15 + 16 - output_bits; + int shift = 15; + av_assert0(output_bits == 16); for (i = 0; i < dstW; i++) { - int val = 1 << (30-output_bits); + int val = 1 << (shift - 1); int j; /* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline @@ -200,7 +212,7 @@ yuv2planeX_10_c_template(const int16_t *filter, int filterSize, int shift = 11 + 16 - output_bits; for (i = 0; i < dstW; i++) { - int val = 1 << (26-output_bits); + int val = 1 << (shift - 1); int j; for (j = 0; j < filterSize; j++) @@ -393,14 +405,14 @@ yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0, for (i = 0; i < dstW; i += 8) { int acc = 0; - accumulate_bit(acc, (buf0[i + 0] >> 7) + d128[0]); - accumulate_bit(acc, (buf0[i + 1] >> 7) + d128[1]); - accumulate_bit(acc, (buf0[i + 2] >> 7) + d128[2]); - accumulate_bit(acc, (buf0[i + 3] >> 7) + d128[3]); - accumulate_bit(acc, (buf0[i + 4] >> 7) + d128[4]); - accumulate_bit(acc, (buf0[i + 5] >> 7) + d128[5]); - accumulate_bit(acc, (buf0[i + 6] >> 7) + d128[6]); - accumulate_bit(acc, (buf0[i + 7] >> 7) + d128[7]); + accumulate_bit(acc, ((buf0[i + 0] + 64) >> 7) + d128[0]); + accumulate_bit(acc, ((buf0[i + 1] + 64) >> 7) + d128[1]); + accumulate_bit(acc, ((buf0[i + 2] + 64) >> 7) + d128[2]); + accumulate_bit(acc, ((buf0[i + 3] + 64) >> 7) + d128[3]); + accumulate_bit(acc, ((buf0[i + 4] + 64) >> 7) + d128[4]); + accumulate_bit(acc, ((buf0[i + 5] + 64) >> 7) + d128[5]); + accumulate_bit(acc, ((buf0[i + 6] + 64) >> 7) + d128[6]); + accumulate_bit(acc, ((buf0[i + 7] + 64) >> 7) + d128[7]); output_pixel(*dest++, acc); } @@ -516,10 +528,12 @@ yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2], int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19; int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19; - Y1 = av_clip_uint8(Y1); - Y2 = av_clip_uint8(Y2); - U = av_clip_uint8(U); - V = av_clip_uint8(V); + if ((Y1 | Y2 | U | V) & 0x100) { + Y1 = av_clip_uint8(Y1); + Y2 = av_clip_uint8(Y2); + U = av_clip_uint8(U); + V = av_clip_uint8(V); + } output_pixels(i * 4, Y1, U, Y2, V); } @@ -536,10 +550,17 @@ yuv2422_1_c_template(SwsContext *c, const int16_t *buf0, if (uvalpha < 2048) { for (i = 0; i < ((dstW + 1) >> 1); i++) { - int Y1 = buf0[i * 2] >> 7; - int Y2 = buf0[i * 2 + 1] >> 7; - int U = ubuf0[i] >> 7; - int V = vbuf0[i] >> 7; + int Y1 = (buf0[i * 2 ]+64) >> 7; + int Y2 = (buf0[i * 2 + 1]+64) >> 7; + int U = (ubuf0[i] +64) >> 7; + int V = (vbuf0[i] +64) >> 7; + + if ((Y1 | Y2 | U | V) & 0x100) { + Y1 = av_clip_uint8(Y1); + Y2 = av_clip_uint8(Y2); + U = av_clip_uint8(U); + V = av_clip_uint8(V); + } Y1 = av_clip_uint8(Y1); Y2 = av_clip_uint8(Y2); @@ -551,10 +572,17 @@ yuv2422_1_c_template(SwsContext *c, const int16_t *buf0, } else { const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1]; for (i = 0; i < ((dstW + 1) >> 1); i++) { - int Y1 = buf0[i * 2] >> 7; - int Y2 = buf0[i * 2 + 1] >> 7; - int U = (ubuf0[i] + ubuf1[i]) >> 8; - int V = (vbuf0[i] + vbuf1[i]) >> 8; + int Y1 = (buf0[i * 2 ] + 64) >> 7; + int Y2 = (buf0[i * 2 + 1] + 64) >> 7; + int U = (ubuf0[i] + ubuf1[i]+128) >> 8; + int V = (vbuf0[i] + vbuf1[i]+128) >> 8; + + if ((Y1 | Y2 | U | V) & 0x100) { + Y1 = av_clip_uint8(Y1); + Y2 = av_clip_uint8(Y2); + U = av_clip_uint8(U); + V = av_clip_uint8(V); + } Y1 = av_clip_uint8(Y1); Y2 = av_clip_uint8(Y2); @@ -812,7 +840,7 @@ YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE) * correct RGB values into the destination buffer. */ static av_always_inline void -yuv2rgb_write(uint8_t *_dest, int i, unsigned Y1, unsigned Y2, +yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2, unsigned A1, unsigned A2, const void *_r, const void *_g, const void *_b, int y, enum PixelFormat target, int hasAlpha) @@ -848,6 +876,7 @@ yuv2rgb_write(uint8_t *_dest, int i, unsigned Y1, unsigned Y2, #define r_b ((target == PIX_FMT_RGB24) ? r : b) #define b_r ((target == PIX_FMT_RGB24) ? b : r) + dest[i * 6 + 0] = r_b[Y1]; dest[i * 6 + 1] = g[Y1]; dest[i * 6 + 2] = b_r[Y1]; @@ -953,12 +982,6 @@ yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter, Y2 >>= 19; U >>= 19; V >>= 19; - if ((Y1 | Y2 | U | V) & 0x100) { - Y1 = av_clip_uint8(Y1); - Y2 = av_clip_uint8(Y2); - U = av_clip_uint8(U); - V = av_clip_uint8(V); - } if (hasAlpha) { A1 = 1 << 18; A2 = 1 << 18; @@ -974,10 +997,9 @@ yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter, } } - /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/ - r = c->table_rV[V]; - g = (c->table_gU[U] + c->table_gV[V]); - b = c->table_bU[U]; + r = c->table_rV[V + YUVRGB_TABLE_HEADROOM]; + g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]); + b = c->table_bU[U + YUVRGB_TABLE_HEADROOM]; yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0, r, g, b, y, target, hasAlpha); @@ -1006,16 +1028,9 @@ yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2], int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19; int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19; int A1, A2; - const void *r, *g, *b; - - Y1 = av_clip_uint8(Y1); - Y2 = av_clip_uint8(Y2); - U = av_clip_uint8(U); - V = av_clip_uint8(V); - - r = c->table_rV[V]; - g = (c->table_gU[U] + c->table_gV[V]); - b = c->table_bU[U]; + const void *r = c->table_rV[V + YUVRGB_TABLE_HEADROOM], + *g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]), + *b = c->table_bU[U + YUVRGB_TABLE_HEADROOM]; if (hasAlpha) { A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19; @@ -1041,25 +1056,18 @@ yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0, if (uvalpha < 2048) { for (i = 0; i < ((dstW + 1) >> 1); i++) { - int Y1 = buf0[i * 2] >> 7; - int Y2 = buf0[i * 2 + 1] >> 7; - int U = ubuf0[i] >> 7; - int V = vbuf0[i] >> 7; + int Y1 = (buf0[i * 2 ] + 64) >> 7; + int Y2 = (buf0[i * 2 + 1] + 64) >> 7; + int U = (ubuf0[i] + 64) >> 7; + int V = (vbuf0[i] + 64) >> 7; int A1, A2; - const void *r, *g, *b; - - Y1 = av_clip_uint8(Y1); - Y2 = av_clip_uint8(Y2); - U = av_clip_uint8(U); - V = av_clip_uint8(V); - - r = c->table_rV[V]; - g = (c->table_gU[U] + c->table_gV[V]); - b = c->table_bU[U]; + const void *r = c->table_rV[V + YUVRGB_TABLE_HEADROOM], + *g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]), + *b = c->table_bU[U + YUVRGB_TABLE_HEADROOM]; if (hasAlpha) { - A1 = abuf0[i * 2 ] >> 7; - A2 = abuf0[i * 2 + 1] >> 7; + A1 = abuf0[i * 2 ] * 255 + 16384 >> 15; + A2 = abuf0[i * 2 + 1] * 255 + 16384 >> 15; A1 = av_clip_uint8(A1); A2 = av_clip_uint8(A2); } @@ -1070,25 +1078,18 @@ yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0, } else { const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1]; for (i = 0; i < ((dstW + 1) >> 1); i++) { - int Y1 = buf0[i * 2] >> 7; - int Y2 = buf0[i * 2 + 1] >> 7; - int U = (ubuf0[i] + ubuf1[i]) >> 8; - int V = (vbuf0[i] + vbuf1[i]) >> 8; + int Y1 = (buf0[i * 2 ] + 64) >> 7; + int Y2 = (buf0[i * 2 + 1] + 64) >> 7; + int U = (ubuf0[i] + ubuf1[i] + 128) >> 8; + int V = (vbuf0[i] + vbuf1[i] + 128) >> 8; int A1, A2; - const void *r, *g, *b; - - Y1 = av_clip_uint8(Y1); - Y2 = av_clip_uint8(Y2); - U = av_clip_uint8(U); - V = av_clip_uint8(V); - - r = c->table_rV[V]; - g = (c->table_gU[U] + c->table_gV[V]); - b = c->table_bU[U]; + const void *r = c->table_rV[V + YUVRGB_TABLE_HEADROOM], + *g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]), + *b = c->table_bU[U + YUVRGB_TABLE_HEADROOM]; if (hasAlpha) { - A1 = abuf0[i * 2 ] >> 7; - A2 = abuf0[i * 2 + 1] >> 7; + A1 = (abuf0[i * 2 ] + 64) >> 7; + A2 = (abuf0[i * 2 + 1] + 64) >> 7; A1 = av_clip_uint8(A1); A2 = av_clip_uint8(A2); } @@ -1164,9 +1165,9 @@ yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter, for (i = 0; i < dstW; i++) { int j; - int Y = 0; - int U = -128 << 19; - int V = -128 << 19; + int Y = 1<<9; + int U = (1<<9)-(128 << 19); + int V = (1<<9)-(128 << 19); int R, G, B, A; for (j = 0; j < lumFilterSize; j++) { @@ -1180,7 +1181,7 @@ yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter, U >>= 10; V >>= 10; if (hasAlpha) { - A = 1 << 21; + A = 1 << 18; for (j = 0; j < lumFilterSize; j++) { A += alpSrc[j][i] * lumFilter[j]; } @@ -1223,7 +1224,6 @@ yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter, dest[1] = B >> 22; dest[2] = G >> 22; dest[3] = R >> 22; - dest += 4; break; case PIX_FMT_BGR24: dest[0] = B >> 22; @@ -1354,7 +1354,10 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c, *yuv2packedX = yuv2bgr24_full_X_c; break; } + if(!*yuv2packedX) + goto YUV_PACKED; } else { + YUV_PACKED: switch (dstFormat) { case PIX_FMT_RGB48LE: *yuv2packed1 = yuv2rgb48le_1_c; diff --git a/libswscale/ppc/swscale_altivec.c b/libswscale/ppc/swscale_altivec.c index 0e66ec1f7b..c621866eed 100644 --- a/libswscale/ppc/swscale_altivec.c +++ b/libswscale/ppc/swscale_altivec.c @@ -4,20 +4,20 @@ * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org> * based on the equivalent C code in swscale.c * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -318,7 +318,7 @@ av_cold void ff_sws_init_swScale_altivec(SwsContext *c) if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC)) return; - if (c->srcBpc == 8 && c->dstBpc <= 10) { + if (c->srcBpc == 8 && c->dstBpc <= 14) { c->hyScale = c->hcScale = hScale_altivec_real; } if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) && diff --git a/libswscale/ppc/yuv2rgb_altivec.c b/libswscale/ppc/yuv2rgb_altivec.c index 380c76f4d1..ade134d779 100644 --- a/libswscale/ppc/yuv2rgb_altivec.c +++ b/libswscale/ppc/yuv2rgb_altivec.c @@ -3,20 +3,20 @@ * * copyright (C) 2004 Marc Hoffman <marc.hoffman@analog.com> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -97,6 +97,7 @@ #include "libswscale/swscale_internal.h" #include "libavutil/attributes.h" #include "libavutil/cpu.h" +#include "libavutil/pixdesc.h" #include "yuv2rgb_altivec.h" #undef PROFILE_THE_BEAST @@ -317,12 +318,7 @@ static int altivec_ ## name(SwsContext *c, const unsigned char **in, \ const ubyte *ui = in[1]; \ const ubyte *vi = in[2]; \ \ - vector unsigned char *oute = \ - (vector unsigned char *) \ - (oplanes[0] + srcSliceY * outstrides[0]); \ - vector unsigned char *outo = \ - (vector unsigned char *) \ - (oplanes[0] + srcSliceY * outstrides[0] + outstrides[0]); \ + vector unsigned char *oute, *outo; \ \ /* loop moves y{1, 2}i by w */ \ instrides_scl[0] = instrides[0] * 2 - w; \ @@ -332,6 +328,9 @@ static int altivec_ ## name(SwsContext *c, const unsigned char **in, \ instrides_scl[2] = instrides[2] - w / 2; \ \ for (i = 0; i < h / 2; i++) { \ + oute = (vector unsigned char *)(oplanes[0] + outstrides[0] * \ + (srcSliceY + i * 2)); \ + outo = oute + (outstrides[0] >> 4); \ vec_dstst(outo, (0x02000002 | (((w * 3 + 32) / 32) << 16)), 0); \ vec_dstst(oute, (0x02000002 | (((w * 3 + 32) / 32) << 16)), 1); \ \ @@ -429,9 +428,6 @@ static int altivec_ ## name(SwsContext *c, const unsigned char **in, \ vi += 8; \ } \ \ - outo += (outstrides[0]) >> 4; \ - oute += (outstrides[0]) >> 4; \ - \ ui += instrides_scl[1]; \ vi += instrides_scl[2]; \ y1i += instrides_scl[0]; \ @@ -736,7 +732,7 @@ static av_always_inline void ff_yuv2packedX_altivec(SwsContext *c, if (!printed_error_message) { av_log(c, AV_LOG_ERROR, "altivec_yuv2packedX doesn't support %s output\n", - sws_format_name(c->dstFormat)); + av_get_pix_fmt_name(c->dstFormat)); printed_error_message = 1; } return; @@ -824,7 +820,7 @@ static av_always_inline void ff_yuv2packedX_altivec(SwsContext *c, /* Unreachable, I think. */ av_log(c, AV_LOG_ERROR, "altivec_yuv2packedX doesn't support %s output\n", - sws_format_name(c->dstFormat)); + av_get_pix_fmt_name(c->dstFormat)); return; } diff --git a/libswscale/ppc/yuv2rgb_altivec.h b/libswscale/ppc/yuv2rgb_altivec.h index 2c5e7ed876..aa52a4743e 100644 --- a/libswscale/ppc/yuv2rgb_altivec.h +++ b/libswscale/ppc/yuv2rgb_altivec.h @@ -4,20 +4,20 @@ * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org> * based on the equivalent C code in swscale.c * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/libswscale/ppc/yuv2yuv_altivec.c b/libswscale/ppc/yuv2yuv_altivec.c index 45d766bd02..af78782b54 100644 --- a/libswscale/ppc/yuv2yuv_altivec.c +++ b/libswscale/ppc/yuv2yuv_altivec.c @@ -4,20 +4,20 @@ * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org> * based on the equivalent C code in swscale.c * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c index 26ef64879c..321e5ff1b8 100644 --- a/libswscale/rgb2rgb.c +++ b/libswscale/rgb2rgb.c @@ -6,20 +6,20 @@ * Written by Nick Kurshev. * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -181,13 +181,13 @@ void rgb16tobgr32(const uint8_t *src, uint8_t *dst, int src_size) register uint16_t bgr = *s++; #if HAVE_BIGENDIAN *d++ = 255; - *d++ = (bgr & 0x1F) << 3; - *d++ = (bgr & 0x7E0) >> 3; - *d++ = (bgr & 0xF800) >> 8; + *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); + *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9); + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); #else - *d++ = (bgr & 0xF800) >> 8; - *d++ = (bgr & 0x7E0) >> 3; - *d++ = (bgr & 0x1F) << 3; + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); + *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9); + *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); *d++ = 255; #endif } @@ -220,9 +220,9 @@ void rgb16to24(const uint8_t *src, uint8_t *dst, int src_size) while (s < end) { register uint16_t bgr = *s++; - *d++ = (bgr & 0xF800) >> 8; - *d++ = (bgr & 0x7E0) >> 3; - *d++ = (bgr & 0x1F) << 3; + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); + *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9); + *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); } } @@ -256,13 +256,13 @@ void rgb15tobgr32(const uint8_t *src, uint8_t *dst, int src_size) register uint16_t bgr = *s++; #if HAVE_BIGENDIAN *d++ = 255; - *d++ = (bgr & 0x1F) << 3; - *d++ = (bgr & 0x3E0) >> 2; - *d++ = (bgr & 0x7C00) >> 7; + *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); + *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7); + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); #else - *d++ = (bgr & 0x7C00) >> 7; - *d++ = (bgr & 0x3E0) >> 2; - *d++ = (bgr & 0x1F) << 3; + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); + *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7); + *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); *d++ = 255; #endif } @@ -276,9 +276,9 @@ void rgb15to24(const uint8_t *src, uint8_t *dst, int src_size) while (s < end) { register uint16_t bgr = *s++; - *d++ = (bgr & 0x7C00) >> 7; - *d++ = (bgr & 0x3E0) >> 2; - *d++ = (bgr & 0x1F) << 3; + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); + *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7); + *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); } } @@ -315,18 +315,6 @@ void rgb12tobgr12(const uint8_t *src, uint8_t *dst, int src_size) } } -void bgr8torgb8(const uint8_t *src, uint8_t *dst, int src_size) -{ - int i, num_pixels = src_size; - - for (i = 0; i < num_pixels; i++) { - register uint8_t rgb = src[i]; - unsigned r = (rgb & 0x07); - unsigned g = (rgb & 0x38) >> 3; - unsigned b = (rgb & 0xC0) >> 6; - dst[i] = ((b << 1) & 0x07) | ((g & 0x07) << 3) | ((r & 0x03) << 6); - } -} #define DEFINE_SHUFFLE_BYTES(a, b, c, d) \ void shuffle_bytes_ ## a ## b ## c ## d(const uint8_t *src, \ @@ -346,3 +334,57 @@ DEFINE_SHUFFLE_BYTES(0, 3, 2, 1) DEFINE_SHUFFLE_BYTES(1, 2, 3, 0) DEFINE_SHUFFLE_BYTES(3, 0, 1, 2) DEFINE_SHUFFLE_BYTES(3, 2, 1, 0) + +#define DEFINE_RGB48TOBGR48(need_bswap, swap) \ +void rgb48tobgr48_ ## need_bswap(const uint8_t *src, \ + uint8_t *dst, int src_size) \ +{ \ + uint16_t *d = (uint16_t *)dst; \ + uint16_t *s = (uint16_t *)src; \ + int i, num_pixels = src_size >> 1; \ + \ + for (i = 0; i < num_pixels; i += 3) { \ + d[i ] = swap ? av_bswap16(s[i + 2]) : s[i + 2]; \ + d[i + 1] = swap ? av_bswap16(s[i + 1]) : s[i + 1]; \ + d[i + 2] = swap ? av_bswap16(s[i ]) : s[i ]; \ + } \ +} + +DEFINE_RGB48TOBGR48(nobswap, 0) +DEFINE_RGB48TOBGR48(bswap, 1) + +#define DEFINE_RGB64TOBGR48(need_bswap, swap) \ +void rgb64tobgr48_ ## need_bswap(const uint8_t *src, \ + uint8_t *dst, int src_size) \ +{ \ + uint16_t *d = (uint16_t *)dst; \ + uint16_t *s = (uint16_t *)src; \ + int i, num_pixels = src_size >> 3; \ + \ + for (i = 0; i < num_pixels; i++) { \ + d[3 * i ] = swap ? av_bswap16(s[4 * i + 2]) : s[4 * i + 2]; \ + d[3 * i + 1] = swap ? av_bswap16(s[4 * i + 1]) : s[4 * i + 1]; \ + d[3 * i + 2] = swap ? av_bswap16(s[4 * i ]) : s[4 * i ]; \ + } \ +} + +DEFINE_RGB64TOBGR48(nobswap, 0) +DEFINE_RGB64TOBGR48(bswap, 1) + +#define DEFINE_RGB64TO48(need_bswap, swap) \ +void rgb64to48_ ## need_bswap(const uint8_t *src, \ + uint8_t *dst, int src_size) \ +{ \ + uint16_t *d = (uint16_t *)dst; \ + uint16_t *s = (uint16_t *)src; \ + int i, num_pixels = src_size >> 3; \ + \ + for (i = 0; i < num_pixels; i++) { \ + d[3 * i ] = swap ? av_bswap16(s[4 * i ]) : s[4 * i ]; \ + d[3 * i + 1] = swap ? av_bswap16(s[4 * i + 1]) : s[4 * i + 1]; \ + d[3 * i + 2] = swap ? av_bswap16(s[4 * i + 2]) : s[4 * i + 2]; \ + } \ +} + +DEFINE_RGB64TO48(nobswap, 0) +DEFINE_RGB64TO48(bswap, 1) diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h index 42f468fe21..e37f0fbbbe 100644 --- a/libswscale/rgb2rgb.h +++ b/libswscale/rgb2rgb.h @@ -6,20 +6,20 @@ * Written by Nick Kurshev. * YUV & runtime CPU stuff by Michael (michaelni@gmx.at) * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -52,6 +52,12 @@ extern void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int src_size); extern void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size); +void rgb64tobgr48_nobswap(const uint8_t *src, uint8_t *dst, int src_size); +void rgb64tobgr48_bswap(const uint8_t *src, uint8_t *dst, int src_size); +void rgb48tobgr48_nobswap(const uint8_t *src, uint8_t *dst, int src_size); +void rgb48tobgr48_bswap(const uint8_t *src, uint8_t *dst, int src_size); +void rgb64to48_nobswap(const uint8_t *src, uint8_t *dst, int src_size); +void rgb64to48_bswap(const uint8_t *src, uint8_t *dst, int src_size); void rgb24to32(const uint8_t *src, uint8_t *dst, int src_size); void rgb32to24(const uint8_t *src, uint8_t *dst, int src_size); void rgb16tobgr32(const uint8_t *src, uint8_t *dst, int src_size); @@ -64,7 +70,6 @@ void rgb15tobgr16(const uint8_t *src, uint8_t *dst, int src_size); void rgb15tobgr15(const uint8_t *src, uint8_t *dst, int src_size); void rgb12tobgr12(const uint8_t *src, uint8_t *dst, int src_size); void rgb12to15(const uint8_t *src, uint8_t *dst, int src_size); -void bgr8torgb8(const uint8_t *src, uint8_t *dst, int src_size); void shuffle_bytes_0321(const uint8_t *src, uint8_t *dst, int src_size); void shuffle_bytes_1230(const uint8_t *src, uint8_t *dst, int src_size); diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c index d1a43e01cb..c05cdc8549 100644 --- a/libswscale/rgb2rgb_template.c +++ b/libswscale/rgb2rgb_template.c @@ -7,20 +7,20 @@ * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) * lot of big-endian byte order fixes by Alex Beregszaszi * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -238,27 +238,6 @@ static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, int src_size) } } -/* - * I use less accurate approximation here by simply left-shifting the input - * value and filling the low order bits with zeroes. This method improves PNG - * compression but this scheme cannot reproduce white exactly, since it does - * not generate an all-ones maximum value; the net effect is to darken the - * image slightly. - * - * The better method should be "left bit replication": - * - * 4 3 2 1 0 - * --------- - * 1 1 0 1 1 - * - * 7 6 5 4 3 2 1 0 - * ---------------- - * 1 1 0 1 1 1 1 0 - * |=======| |===| - * | leftmost bits repeated to fill open bits - * | - * original bits - */ static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size) { @@ -268,9 +247,9 @@ static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst, while (s < end) { register uint16_t bgr = *s++; - *d++ = (bgr & 0x1F) << 3; - *d++ = (bgr & 0x3E0) >> 2; - *d++ = (bgr & 0x7C00) >> 7; + *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); + *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7); + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); } } @@ -283,9 +262,9 @@ static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst, while (s < end) { register uint16_t bgr = *s++; - *d++ = (bgr & 0x1F) << 3; - *d++ = (bgr & 0x7E0) >> 3; - *d++ = (bgr & 0xF800) >> 8; + *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); + *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9); + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); } } @@ -299,13 +278,13 @@ static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, int src_size) register uint16_t bgr = *s++; #if HAVE_BIGENDIAN *d++ = 255; - *d++ = (bgr & 0x7C00) >> 7; - *d++ = (bgr & 0x3E0) >> 2; - *d++ = (bgr & 0x1F) << 3; + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); + *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7); + *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); #else - *d++ = (bgr & 0x1F) << 3; - *d++ = (bgr & 0x3E0) >> 2; - *d++ = (bgr & 0x7C00) >> 7; + *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); + *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7); + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); *d++ = 255; #endif } @@ -321,13 +300,13 @@ static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, int src_size) register uint16_t bgr = *s++; #if HAVE_BIGENDIAN *d++ = 255; - *d++ = (bgr & 0xF800) >> 8; - *d++ = (bgr & 0x7E0) >> 3; - *d++ = (bgr & 0x1F) << 3; + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); + *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9); + *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); #else - *d++ = (bgr & 0x1F) << 3; - *d++ = (bgr & 0x7E0) >> 3; - *d++ = (bgr & 0xF800) >> 8; + *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2); + *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9); + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); *d++ = 255; #endif } @@ -665,6 +644,9 @@ void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, ydst += lumStride; src += srcStride; + if (y+1 == height) + break; + for (i = 0; i < chromWidth; i++) { unsigned int b = src[6 * i + 0]; unsigned int g = src[6 * i + 1]; diff --git a/libswscale/sparc/yuv2rgb_vis.c b/libswscale/sparc/yuv2rgb_vis.c index d7102a3a45..bb9ab22dbe 100644 --- a/libswscale/sparc/yuv2rgb_vis.c +++ b/libswscale/sparc/yuv2rgb_vis.c @@ -2,20 +2,20 @@ * VIS optimized software YUV to RGB converter * Copyright (c) 2007 Denes Balatoni <dbalatoni@programozo.hu> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/libswscale/swscale-test.c b/libswscale/swscale-test.c index 3497dffbe0..ef6c55ce02 100644 --- a/libswscale/swscale-test.c +++ b/libswscale/swscale-test.c @@ -1,20 +1,20 @@ /* - * Copyright (C) 2003 Michael Niedermayer <michaelni@gmx.at> + * Copyright (C) 2003-2011 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -103,6 +103,7 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h, av_image_fill_linesizes(srcStride, srcFormat, srcW); for (p = 0; p < 4; p++) { + srcStride[p] = FFALIGN(srcStride[p], 16); if (srcStride[p]) src[p] = av_mallocz(srcStride[p] * srcH + 16); if (srcStride[p] && !src[p]) { @@ -136,6 +137,7 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h, * allocated with av_malloc). */ /* An extra 16 bytes is being allocated because some scalers may write * out of bounds. */ + dstStride[i] = FFALIGN(dstStride[i], 16); if (dstStride[i]) dst[i] = av_mallocz(dstStride[i] * dstH + 16); if (dstStride[i] && !dst[i]) { @@ -175,6 +177,7 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h, ssdA = r->ssdA; } else { for (i = 0; i < 4; i++) { + refStride[i] = FFALIGN(refStride[i], 16); if (refStride[i]) out[i] = av_mallocz(refStride[i] * h); if (refStride[i] && !out[i]) { diff --git a/libswscale/swscale.c b/libswscale/swscale.c index 9da250e1d1..ae79eb66e8 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -1,20 +1,20 @@ /* - * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> + * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -24,6 +24,7 @@ #include <stdio.h> #include <string.h> +#include "libavutil/avassert.h" #include "libavutil/avutil.h" #include "libavutil/bswap.h" #include "libavutil/cpu.h" @@ -71,6 +72,9 @@ static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1; int sh = bits - 4; + if((isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15) + sh= 9; + for (i = 0; i < dstW; i++) { int j; int srcPos = filterPos[i]; @@ -92,6 +96,9 @@ static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint16_t *src = (const uint16_t *) _src; int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1; + if(sh<15) + sh= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1; + for (i = 0; i < dstW; i++) { int j; int srcPos = filterPos[i]; @@ -208,7 +215,7 @@ static void lumRangeFromJpeg16_c(int16_t *_dst, int width) int i; int32_t *dst = (int32_t *) _dst; for (i = 0; i < width; i++) - dst[i] = (dst[i] * 14071 + (33561947 << 4)) >> 14; + dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12; } static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth, @@ -222,6 +229,8 @@ static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth, dst[i] = (src[xx] << 7) + (src[xx + 1] - src[xx]) * xalpha; xpos += xInc; } + for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) + dst[i] = src[srcW-1]*128; } // *** horizontal scale Y line to temp buffer @@ -234,13 +243,13 @@ static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth, uint8_t *formatConvBuffer, uint32_t *pal, int isAlpha) { - void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = + void (*toYV12)(uint8_t *, const uint8_t *, const uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12; void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange; const uint8_t *src = src_in[isAlpha ? 3 : 0]; if (toYV12) { - toYV12(formatConvBuffer, src, srcW, pal); + toYV12(formatConvBuffer, src, src_in[1], src_in[2], srcW, pal); src = formatConvBuffer; } else if (c->readLumPlanar && !isAlpha) { c->readLumPlanar(formatConvBuffer, src_in, srcW); @@ -271,6 +280,10 @@ static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2, dst2[i] = (src2[xx] * (xalpha ^ 127) + src2[xx + 1] * xalpha); xpos += xInc; } + for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) { + dst1[i] = src1[srcW-1]*128; + dst2[i] = src2[srcW-1]*128; + } } static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, @@ -285,13 +298,13 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, const uint8_t *src1 = src_in[1], *src2 = src_in[2]; if (c->chrToYV12) { uint8_t *buf2 = formatConvBuffer + - FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16); - c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal); - src1 = formatConvBuffer; - src2 = buf2; + FFALIGN(srcW*2+78, 16); + c->chrToYV12(formatConvBuffer, buf2, src_in[0], src1, src2, srcW, pal); + src1= formatConvBuffer; + src2= buf2; } else if (c->readChrPlanar) { uint8_t *buf2 = formatConvBuffer + - FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16); + FFALIGN(srcW*2+78, 16); c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW); src1 = formatConvBuffer; src2 = buf2; @@ -392,8 +405,8 @@ static int swScale(SwsContext *c, const uint8_t *src[], DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n", vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize); - if (dstStride[0] % 8 != 0 || dstStride[1] % 8 != 0 || - dstStride[2] % 8 != 0 || dstStride[3] % 8 != 0) { + if (dstStride[0]%16 !=0 || dstStride[1]%16 !=0 || + dstStride[2]%16 !=0 || dstStride[3]%16 != 0) { static int warnedAlready = 0; // FIXME maybe move this into the context if (flags & SWS_PRINT_INFO && !warnedAlready) { av_log(c, AV_LOG_WARNING, @@ -403,6 +416,18 @@ static int swScale(SwsContext *c, const uint8_t *src[], } } + if ((int)dst[0]%16 || (int)dst[1]%16 || (int)dst[2]%16 || (int)src[0]%16 || (int)src[1]%16 || (int)src[2]%16 + || dstStride[0]%16 || dstStride[1]%16 || dstStride[2]%16 || dstStride[3]%16 + || srcStride[0]%16 || srcStride[1]%16 || srcStride[2]%16 || srcStride[3]%16 + ) { + static int warnedAlready=0; + int cpu_flags = av_get_cpu_flags(); + if (HAVE_MMX2 && (cpu_flags & AV_CPU_FLAG_SSE2) && !warnedAlready){ + av_log(c, AV_LOG_WARNING, "Warning: data is not aligned! This can lead to a speedloss\n"); + warnedAlready=1; + } + } + /* Note the user might start scaling the picture in the middle so this * will not get executed. This is not really intended but works * currently, so people might do it. */ @@ -427,6 +452,7 @@ static int swScale(SwsContext *c, const uint8_t *src[], dst[2] + dstStride[2] * chrDstY, (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL, }; + int use_mmx_vfilter= c->use_mmx_vfilter; // First line needed as input const int firstLumSrcY = FFMAX(1 - vLumFilterSize, vLumFilterPos[dstY]); @@ -531,98 +557,74 @@ static int swScale(SwsContext *c, const uint8_t *src[], * this array's tail */ ff_sws_init_output_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX, &yuv2packed1, &yuv2packed2, &yuv2packedX); + use_mmx_vfilter= 0; } { - const int16_t **lumSrcPtr = (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; - const int16_t **chrUSrcPtr = (const int16_t **)chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; - const int16_t **chrVSrcPtr = (const int16_t **)chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; + const int16_t **lumSrcPtr = (const int16_t **)(void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; + const int16_t **chrUSrcPtr = (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; + const int16_t **chrVSrcPtr = (const int16_t **)(void*) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; const int16_t **alpSrcPtr = (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? - (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; - - if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) { - const int16_t **tmpY = (const int16_t **)lumPixBuf + - 2 * vLumBufSize; - int neg = -firstLumSrcY, i; - int end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize); - for (i = 0; i < neg; i++) - tmpY[i] = lumSrcPtr[neg]; - for (; i < end; i++) - tmpY[i] = lumSrcPtr[i]; - for (; i < vLumFilterSize; i++) - tmpY[i] = tmpY[i - 1]; - lumSrcPtr = tmpY; - - if (alpSrcPtr) { - const int16_t **tmpA = (const int16_t **)alpPixBuf + - 2 * vLumBufSize; - for (i = 0; i < neg; i++) - tmpA[i] = alpSrcPtr[neg]; - for (; i < end; i++) - tmpA[i] = alpSrcPtr[i]; - for (; i < vLumFilterSize; i++) - tmpA[i] = tmpA[i - 1]; - alpSrcPtr = tmpA; - } - } - if (firstChrSrcY < 0 || - firstChrSrcY + vChrFilterSize > c->chrSrcH) { - const int16_t **tmpU = (const int16_t **)chrUPixBuf + 2 * vChrBufSize, - **tmpV = (const int16_t **)chrVPixBuf + 2 * vChrBufSize; - int neg = -firstChrSrcY, i; - int end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize); - for (i = 0; i < neg; i++) { - tmpU[i] = chrUSrcPtr[neg]; - tmpV[i] = chrVSrcPtr[neg]; - } - for (; i < end; i++) { - tmpU[i] = chrUSrcPtr[i]; - tmpV[i] = chrVSrcPtr[i]; - } - for (; i < vChrFilterSize; i++) { - tmpU[i] = tmpU[i - 1]; - tmpV[i] = tmpV[i - 1]; - } - chrUSrcPtr = tmpU; - chrVSrcPtr = tmpV; - } + (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; + int16_t *vLumFilter = c->vLumFilter; + int16_t *vChrFilter = c->vChrFilter; if (isPlanarYUV(dstFormat) || (isGray(dstFormat) && !isALPHA(dstFormat))) { // YV12 like const int chrSkipMask = (1 << c->chrDstVSubSample) - 1; + vLumFilter += dstY * vLumFilterSize; + vChrFilter += chrDstY * vChrFilterSize; + +// av_assert0(use_mmx_vfilter != ( +// yuv2planeX == yuv2planeX_10BE_c +// || yuv2planeX == yuv2planeX_10LE_c +// || yuv2planeX == yuv2planeX_9BE_c +// || yuv2planeX == yuv2planeX_9LE_c +// || yuv2planeX == yuv2planeX_16BE_c +// || yuv2planeX == yuv2planeX_16LE_c +// || yuv2planeX == yuv2planeX_8_c) || !ARCH_X86); + + if(use_mmx_vfilter){ + vLumFilter= c->lumMmxFilter; + vChrFilter= c->chrMmxFilter; + } + if (vLumFilterSize == 1) { yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0); } else { - yuv2planeX(vLumFilter + dstY * vLumFilterSize, - vLumFilterSize, lumSrcPtr, dest[0], + yuv2planeX(vLumFilter, vLumFilterSize, + lumSrcPtr, dest[0], dstW, c->lumDither8, 0); } if (!((dstY & chrSkipMask) || isGray(dstFormat))) { if (yuv2nv12cX) { - yuv2nv12cX(c, vChrFilter + chrDstY * vChrFilterSize, + yuv2nv12cX(c, vChrFilter, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW); } else if (vChrFilterSize == 1) { yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0); yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3); } else { - yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, + yuv2planeX(vChrFilter, vChrFilterSize, chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0); - yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, + yuv2planeX(vChrFilter, vChrFilterSize, chrVSrcPtr, dest[2], - chrDstW, c->chrDither8, 3); + chrDstW, c->chrDither8, use_mmx_vfilter ? (c->uv_offx2 >> 1) : 3); } } if (CONFIG_SWSCALE_ALPHA && alpPixBuf) { + if(use_mmx_vfilter){ + vLumFilter= c->alpMmxFilter; + } if (vLumFilterSize == 1) { yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0); } else { - yuv2planeX(vLumFilter + dstY * vLumFilterSize, + yuv2planeX(vLumFilter, vLumFilterSize, alpSrcPtr, dest[3], dstW, c->lumDither8, 0); } @@ -687,8 +689,9 @@ static av_cold void sws_init_swScale_c(SwsContext *c) ff_sws_init_input_funcs(c); + if (c->srcBpc == 8) { - if (c->dstBpc <= 10) { + if (c->dstBpc <= 14) { c->hyScale = c->hcScale = hScale8To15_c; if (c->flags & SWS_FAST_BILINEAR) { c->hyscale_fast = hyscale_fast_c; @@ -698,12 +701,12 @@ static av_cold void sws_init_swScale_c(SwsContext *c) c->hyScale = c->hcScale = hScale8To19_c; } } else { - c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c + c->hyScale = c->hcScale = c->dstBpc > 14 ? hScale16To19_c : hScale16To15_c; } if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) { - if (c->dstBpc <= 10) { + if (c->dstBpc <= 14) { if (c->srcRange) { c->lumConvertRange = lumRangeFromJpeg_c; c->chrConvertRange = chrRangeFromJpeg_c; diff --git a/libswscale/swscale.h b/libswscale/swscale.h index c3efd48b1a..da29d47e3a 100644 --- a/libswscale/swscale.h +++ b/libswscale/swscale.h @@ -1,20 +1,20 @@ /* - * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> + * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -215,7 +215,13 @@ int sws_scale(struct SwsContext *c, const uint8_t *const srcSlice[], uint8_t *const dst[], const int dstStride[]); /** - * @param inv_table the yuv2rgb coefficients, normally ff_yuv2rgb_coeffs[x] + * @param dstRange flag indicating the while-black range of the output (1=jpeg / 0=mpeg) + * @param srcRange flag indicating the while-black range of the input (1=jpeg / 0=mpeg) + * @param table the yuv2rgb coefficients describing the output yuv space, normally ff_yuv2rgb_coeffs[x] + * @param inv_table the yuv2rgb coefficients describing the input yuv space, normally ff_yuv2rgb_coeffs[x] + * @param brightness 16.16 fixed point brightness correction + * @param contrast 16.16 fixed point contrast correction + * @param saturation 16.16 fixed point saturation correction * @return -1 if not supported */ int sws_setColorspaceDetails(struct SwsContext *c, const int inv_table[4], diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h index 2a7d2dbd1d..5a584f04ca 100644 --- a/libswscale/swscale_internal.h +++ b/libswscale/swscale_internal.h @@ -1,20 +1,20 @@ /* - * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> + * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -34,10 +34,14 @@ #define STR(s) AV_TOSTRING(s) // AV_STRINGIFY is too long +#define YUVRGB_TABLE_HEADROOM 128 + #define FAST_BGR2YV12 // use 7-bit instead of 15-bit coefficients #define MAX_FILTER_SIZE 256 +#define DITHER1XBPP + #if HAVE_BIGENDIAN #define ALT32_CORR (-1) #else @@ -315,10 +319,10 @@ typedef struct SwsContext { int dstY; ///< Last destination vertical line output from last slice. int flags; ///< Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc... void *yuvTable; // pointer to the yuv->rgb table start so it can be freed() - uint8_t *table_rV[256]; - uint8_t *table_gU[256]; - int table_gV[256]; - uint8_t *table_bU[256]; + uint8_t *table_rV[256 + 2*YUVRGB_TABLE_HEADROOM]; + uint8_t *table_gU[256 + 2*YUVRGB_TABLE_HEADROOM]; + int table_gV[256 + 2*YUVRGB_TABLE_HEADROOM]; + uint8_t *table_bU[256 + 2*YUVRGB_TABLE_HEADROOM]; //Colorspace stuff int contrast, brightness, saturation; // for sws_getColorspaceDetails @@ -326,6 +330,8 @@ typedef struct SwsContext { int dstColorspaceTable[4]; int srcRange; ///< 0 = MPG YUV range, 1 = JPG YUV range (source image). int dstRange; ///< 0 = MPG YUV range, 1 = JPG YUV range (destination image). + int src0Alpha; + int dst0Alpha; int yuv2rgb_y_offset; int yuv2rgb_y_coeff; int yuv2rgb_v2r_coeff; @@ -382,8 +388,8 @@ typedef struct SwsContext { // alignment of these values is not necessary, but merely here // to maintain the same offset across x8632 and x86-64. Once we // use proper offset macros in the asm, they can be removed. - DECLARE_ALIGNED(8, ptrdiff_t, uv_off_px); ///< offset (in pixels) between u and v planes - DECLARE_ALIGNED(8, ptrdiff_t, uv_off_byte); ///< offset (in bytes) between u and v planes + DECLARE_ALIGNED(8, ptrdiff_t, uv_off); ///< offset (in pixels) between u and v planes + DECLARE_ALIGNED(8, ptrdiff_t, uv_offx2); ///< offset (in bytes) between u and v planes DECLARE_ALIGNED(8, uint16_t, dither16)[8]; DECLARE_ALIGNED(8, uint32_t, dither32)[8]; @@ -417,6 +423,7 @@ typedef struct SwsContext { #if HAVE_VIS DECLARE_ALIGNED(8, uint64_t, sparc_coeffs)[10]; #endif + int use_mmx_vfilter; /* function pointers for swScale() */ yuv2planar1_fn yuv2plane1; @@ -427,14 +434,14 @@ typedef struct SwsContext { yuv2packedX_fn yuv2packedX; /// Unscaled conversion of luma plane to YV12 for horizontal scaler. - void (*lumToYV12)(uint8_t *dst, const uint8_t *src, + void (*lumToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3, int width, uint32_t *pal); /// Unscaled conversion of alpha plane to YV12 for horizontal scaler. - void (*alpToYV12)(uint8_t *dst, const uint8_t *src, + void (*alpToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3, int width, uint32_t *pal); /// Unscaled conversion of chroma planes to YV12 for horizontal scaler. void (*chrToYV12)(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, + const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, int width, uint32_t *pal); /** @@ -539,14 +546,22 @@ SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c); SwsFunc ff_yuv2rgb_get_func_ptr_bfin(SwsContext *c); void ff_bfin_get_unscaled_swscale(SwsContext *c); +#if FF_API_SWS_FORMAT_NAME +/** + * @deprecated Use av_get_pix_fmt_name() instead. + */ +attribute_deprecated const char *sws_format_name(enum PixelFormat format); +#endif #define is16BPS(x) \ (av_pix_fmt_descriptors[x].comp[0].depth_minus1 == 15) #define is9_OR_10BPS(x) \ - (av_pix_fmt_descriptors[x].comp[0].depth_minus1 == 8 || \ - av_pix_fmt_descriptors[x].comp[0].depth_minus1 == 9) + (av_pix_fmt_descriptors[x].comp[0].depth_minus1 >= 8 && \ + av_pix_fmt_descriptors[x].comp[0].depth_minus1 <= 13) + +#define isNBPS(x) is9_OR_10BPS(x) #define isBE(x) \ (av_pix_fmt_descriptors[x].flags & PIX_FMT_BE) @@ -561,7 +576,6 @@ const char *sws_format_name(enum PixelFormat format); #define isRGB(x) \ (av_pix_fmt_descriptors[x].flags & PIX_FMT_RGB) - #if 0 // FIXME #define isGray(x) \ (!(av_pix_fmt_descriptors[x].flags & PIX_FMT_PAL) && \ @@ -574,55 +588,95 @@ const char *sws_format_name(enum PixelFormat format); (x) == PIX_FMT_GRAY16LE) #endif -#define isRGBinInt(x) \ - ((x) == PIX_FMT_RGB48BE || \ - (x) == PIX_FMT_RGB48LE || \ - (x) == PIX_FMT_RGB32 || \ - (x) == PIX_FMT_RGB32_1 || \ - (x) == PIX_FMT_RGB24 || \ - (x) == PIX_FMT_RGB565BE || \ - (x) == PIX_FMT_RGB565LE || \ - (x) == PIX_FMT_RGB555BE || \ - (x) == PIX_FMT_RGB555LE || \ - (x) == PIX_FMT_RGB444BE || \ - (x) == PIX_FMT_RGB444LE || \ - (x) == PIX_FMT_RGB8 || \ - (x) == PIX_FMT_RGB4 || \ - (x) == PIX_FMT_RGB4_BYTE || \ - (x) == PIX_FMT_MONOBLACK || \ - (x) == PIX_FMT_MONOWHITE) - -#define isBGRinInt(x) \ - ((x) == PIX_FMT_BGR48BE || \ - (x) == PIX_FMT_BGR48LE || \ - (x) == PIX_FMT_BGR32 || \ - (x) == PIX_FMT_BGR32_1 || \ - (x) == PIX_FMT_BGR24 || \ - (x) == PIX_FMT_BGR565BE || \ - (x) == PIX_FMT_BGR565LE || \ - (x) == PIX_FMT_BGR555BE || \ - (x) == PIX_FMT_BGR555LE || \ - (x) == PIX_FMT_BGR444BE || \ - (x) == PIX_FMT_BGR444LE || \ - (x) == PIX_FMT_BGR8 || \ - (x) == PIX_FMT_BGR4 || \ - (x) == PIX_FMT_BGR4_BYTE || \ - (x) == PIX_FMT_MONOBLACK || \ - (x) == PIX_FMT_MONOWHITE) - -#define isAnyRGB(x) \ - (isRGBinInt(x) || \ - isBGRinInt(x)) +#define isRGBinInt(x) \ + ( \ + (x)==PIX_FMT_RGB48BE || \ + (x)==PIX_FMT_RGB48LE || \ + (x)==PIX_FMT_RGBA64BE || \ + (x)==PIX_FMT_RGBA64LE || \ + (x)==PIX_FMT_RGB32 || \ + (x)==PIX_FMT_RGB32_1 || \ + (x)==PIX_FMT_RGB24 || \ + (x)==PIX_FMT_RGB565BE || \ + (x)==PIX_FMT_RGB565LE || \ + (x)==PIX_FMT_RGB555BE || \ + (x)==PIX_FMT_RGB555LE || \ + (x)==PIX_FMT_RGB444BE || \ + (x)==PIX_FMT_RGB444LE || \ + (x)==PIX_FMT_RGB8 || \ + (x)==PIX_FMT_RGB4 || \ + (x)==PIX_FMT_RGB4_BYTE || \ + (x)==PIX_FMT_MONOBLACK || \ + (x)==PIX_FMT_MONOWHITE \ + ) +#define isBGRinInt(x) \ + ( \ + (x)==PIX_FMT_BGR48BE || \ + (x)==PIX_FMT_BGR48LE || \ + (x)==PIX_FMT_BGRA64BE || \ + (x)==PIX_FMT_BGRA64LE || \ + (x)==PIX_FMT_BGR32 || \ + (x)==PIX_FMT_BGR32_1 || \ + (x)==PIX_FMT_BGR24 || \ + (x)==PIX_FMT_BGR565BE || \ + (x)==PIX_FMT_BGR565LE || \ + (x)==PIX_FMT_BGR555BE || \ + (x)==PIX_FMT_BGR555LE || \ + (x)==PIX_FMT_BGR444BE || \ + (x)==PIX_FMT_BGR444LE || \ + (x)==PIX_FMT_BGR8 || \ + (x)==PIX_FMT_BGR4 || \ + (x)==PIX_FMT_BGR4_BYTE|| \ + (x)==PIX_FMT_MONOBLACK|| \ + (x)==PIX_FMT_MONOWHITE \ + ) + +#define isRGBinBytes(x) ( \ + (x)==PIX_FMT_RGB48BE \ + || (x)==PIX_FMT_RGB48LE \ + || (x)==PIX_FMT_RGBA64BE \ + || (x)==PIX_FMT_RGBA64LE \ + || (x)==PIX_FMT_RGBA \ + || (x)==PIX_FMT_ARGB \ + || (x)==PIX_FMT_RGB24 \ + ) +#define isBGRinBytes(x) ( \ + (x)==PIX_FMT_BGR48BE \ + || (x)==PIX_FMT_BGR48LE \ + || (x)==PIX_FMT_BGRA64BE \ + || (x)==PIX_FMT_BGRA64LE \ + || (x)==PIX_FMT_BGRA \ + || (x)==PIX_FMT_ABGR \ + || (x)==PIX_FMT_BGR24 \ + ) + +#define isAnyRGB(x) \ + ( \ + isRGBinInt(x) || \ + isBGRinInt(x) || \ + (x)==PIX_FMT_GBR24P \ + ) #define isALPHA(x) \ (av_pix_fmt_descriptors[x].nb_components == 2 || \ av_pix_fmt_descriptors[x].nb_components == 4) +#if 1 +#define isPacked(x) ( \ + (x)==PIX_FMT_PAL8 \ + || (x)==PIX_FMT_YUYV422 \ + || (x)==PIX_FMT_UYVY422 \ + || (x)==PIX_FMT_Y400A \ + || isRGBinInt(x) \ + || isBGRinInt(x) \ + ) +#else #define isPacked(x) \ ((av_pix_fmt_descriptors[x].nb_components >= 2 && \ !(av_pix_fmt_descriptors[x].flags & PIX_FMT_PLANAR)) || \ (x) == PIX_FMT_PAL8) +#endif #define isPlanar(x) \ (av_pix_fmt_descriptors[x].nb_components >= 2 && \ (av_pix_fmt_descriptors[x].flags & PIX_FMT_PLANAR)) @@ -641,6 +695,9 @@ const char *sws_format_name(enum PixelFormat format); extern const uint64_t ff_dither4[2]; extern const uint64_t ff_dither8[2]; +extern const uint8_t dithers[8][8][8]; +extern const uint16_t dither_scale[15][16]; + extern const AVClass sws_context_class; diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c index 595edf8ae1..9180f2eb5c 100644 --- a/libswscale/swscale_unscaled.c +++ b/libswscale/swscale_unscaled.c @@ -1,20 +1,20 @@ /* - * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> + * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -23,7 +23,6 @@ #include <math.h> #include <stdio.h> #include "config.h" -#include <assert.h> #include "swscale.h" #include "swscale_internal.h" #include "rgb2rgb.h" @@ -33,48 +32,7 @@ #include "libavutil/mathematics.h" #include "libavutil/bswap.h" #include "libavutil/pixdesc.h" - -DECLARE_ALIGNED(8, const uint8_t, dither_8x8_1)[8][8] = { - { 0, 1, 0, 1, 0, 1, 0, 1,}, - { 1, 0, 1, 0, 1, 0, 1, 0,}, - { 0, 1, 0, 1, 0, 1, 0, 1,}, - { 1, 0, 1, 0, 1, 0, 1, 0,}, - { 0, 1, 0, 1, 0, 1, 0, 1,}, - { 1, 0, 1, 0, 1, 0, 1, 0,}, - { 0, 1, 0, 1, 0, 1, 0, 1,}, - { 1, 0, 1, 0, 1, 0, 1, 0,}, -}; -DECLARE_ALIGNED(8, const uint8_t, dither_8x8_3)[8][8] = { - { 1, 2, 1, 2, 1, 2, 1, 2,}, - { 3, 0, 3, 0, 3, 0, 3, 0,}, - { 1, 2, 1, 2, 1, 2, 1, 2,}, - { 3, 0, 3, 0, 3, 0, 3, 0,}, - { 1, 2, 1, 2, 1, 2, 1, 2,}, - { 3, 0, 3, 0, 3, 0, 3, 0,}, - { 1, 2, 1, 2, 1, 2, 1, 2,}, - { 3, 0, 3, 0, 3, 0, 3, 0,}, -}; -DECLARE_ALIGNED(8, const uint8_t, dither_8x8_64)[8][8] = { - { 18, 34, 30, 46, 17, 33, 29, 45,}, - { 50, 2, 62, 14, 49, 1, 61, 13,}, - { 26, 42, 22, 38, 25, 41, 21, 37,}, - { 58, 10, 54, 6, 57, 9, 53, 5,}, - { 16, 32, 28, 44, 19, 35, 31, 47,}, - { 48, 0, 60, 12, 51, 3, 63, 15,}, - { 24, 40, 20, 36, 27, 43, 23, 39,}, - { 56, 8, 52, 4, 59, 11, 55, 7,}, -}; -extern const uint8_t dither_8x8_128[8][8]; -DECLARE_ALIGNED(8, const uint8_t, dither_8x8_256)[8][8] = { - { 72, 136, 120, 184, 68, 132, 116, 180,}, - { 200, 8, 248, 56, 196, 4, 244, 52,}, - { 104, 168, 88, 152, 100, 164, 84, 148,}, - { 232, 40, 216, 24, 228, 36, 212, 20,}, - { 64, 128, 102, 176, 76, 140, 124, 188,}, - { 192, 0, 240, 48, 204, 12, 252, 60,}, - { 96, 160, 80, 144, 108, 172, 92, 156,}, - { 224, 32, 208, 16, 236, 44, 220, 28,}, -}; +#include "libavutil/avassert.h" #define RGB2YUV_SHIFT 15 #define BY ( (int) (0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) @@ -87,6 +45,102 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_256)[8][8] = { #define RV ( (int) (0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) #define RU (-(int) (0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)) +DECLARE_ALIGNED(8, const uint8_t, dithers)[8][8][8]={ +{ + { 0, 1, 0, 1, 0, 1, 0, 1,}, + { 1, 0, 1, 0, 1, 0, 1, 0,}, + { 0, 1, 0, 1, 0, 1, 0, 1,}, + { 1, 0, 1, 0, 1, 0, 1, 0,}, + { 0, 1, 0, 1, 0, 1, 0, 1,}, + { 1, 0, 1, 0, 1, 0, 1, 0,}, + { 0, 1, 0, 1, 0, 1, 0, 1,}, + { 1, 0, 1, 0, 1, 0, 1, 0,}, +},{ + { 1, 2, 1, 2, 1, 2, 1, 2,}, + { 3, 0, 3, 0, 3, 0, 3, 0,}, + { 1, 2, 1, 2, 1, 2, 1, 2,}, + { 3, 0, 3, 0, 3, 0, 3, 0,}, + { 1, 2, 1, 2, 1, 2, 1, 2,}, + { 3, 0, 3, 0, 3, 0, 3, 0,}, + { 1, 2, 1, 2, 1, 2, 1, 2,}, + { 3, 0, 3, 0, 3, 0, 3, 0,}, +},{ + { 2, 4, 3, 5, 2, 4, 3, 5,}, + { 6, 0, 7, 1, 6, 0, 7, 1,}, + { 3, 5, 2, 4, 3, 5, 2, 4,}, + { 7, 1, 6, 0, 7, 1, 6, 0,}, + { 2, 4, 3, 5, 2, 4, 3, 5,}, + { 6, 0, 7, 1, 6, 0, 7, 1,}, + { 3, 5, 2, 4, 3, 5, 2, 4,}, + { 7, 1, 6, 0, 7, 1, 6, 0,}, +},{ + { 4, 8, 7, 11, 4, 8, 7, 11,}, + { 12, 0, 15, 3, 12, 0, 15, 3,}, + { 6, 10, 5, 9, 6, 10, 5, 9,}, + { 14, 2, 13, 1, 14, 2, 13, 1,}, + { 4, 8, 7, 11, 4, 8, 7, 11,}, + { 12, 0, 15, 3, 12, 0, 15, 3,}, + { 6, 10, 5, 9, 6, 10, 5, 9,}, + { 14, 2, 13, 1, 14, 2, 13, 1,}, +},{ + { 9, 17, 15, 23, 8, 16, 14, 22,}, + { 25, 1, 31, 7, 24, 0, 30, 6,}, + { 13, 21, 11, 19, 12, 20, 10, 18,}, + { 29, 5, 27, 3, 28, 4, 26, 2,}, + { 8, 16, 14, 22, 9, 17, 15, 23,}, + { 24, 0, 30, 6, 25, 1, 31, 7,}, + { 12, 20, 10, 18, 13, 21, 11, 19,}, + { 28, 4, 26, 2, 29, 5, 27, 3,}, +},{ + { 18, 34, 30, 46, 17, 33, 29, 45,}, + { 50, 2, 62, 14, 49, 1, 61, 13,}, + { 26, 42, 22, 38, 25, 41, 21, 37,}, + { 58, 10, 54, 6, 57, 9, 53, 5,}, + { 16, 32, 28, 44, 19, 35, 31, 47,}, + { 48, 0, 60, 12, 51, 3, 63, 15,}, + { 24, 40, 20, 36, 27, 43, 23, 39,}, + { 56, 8, 52, 4, 59, 11, 55, 7,}, +},{ + { 18, 34, 30, 46, 17, 33, 29, 45,}, + { 50, 2, 62, 14, 49, 1, 61, 13,}, + { 26, 42, 22, 38, 25, 41, 21, 37,}, + { 58, 10, 54, 6, 57, 9, 53, 5,}, + { 16, 32, 28, 44, 19, 35, 31, 47,}, + { 48, 0, 60, 12, 51, 3, 63, 15,}, + { 24, 40, 20, 36, 27, 43, 23, 39,}, + { 56, 8, 52, 4, 59, 11, 55, 7,}, +},{ + { 36, 68, 60, 92, 34, 66, 58, 90,}, + { 100, 4,124, 28, 98, 2,122, 26,}, + { 52, 84, 44, 76, 50, 82, 42, 74,}, + { 116, 20,108, 12,114, 18,106, 10,}, + { 32, 64, 56, 88, 38, 70, 62, 94,}, + { 96, 0,120, 24,102, 6,126, 30,}, + { 48, 80, 40, 72, 54, 86, 46, 78,}, + { 112, 16,104, 8,118, 22,110, 14,}, +}}; + +static const uint8_t flat64[8]={64,64,64,64,64,64,64,64}; + +const uint16_t dither_scale[15][16]={ +{ 2, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,}, +{ 2, 3, 7, 7, 13, 13, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,}, +{ 3, 3, 4, 15, 15, 29, 57, 57, 57, 113, 113, 113, 113, 113, 113, 113,}, +{ 3, 4, 4, 5, 31, 31, 61, 121, 241, 241, 241, 241, 481, 481, 481, 481,}, +{ 3, 4, 5, 5, 6, 63, 63, 125, 249, 497, 993, 993, 993, 993, 993, 1985,}, +{ 3, 5, 6, 6, 6, 7, 127, 127, 253, 505, 1009, 2017, 4033, 4033, 4033, 4033,}, +{ 3, 5, 6, 7, 7, 7, 8, 255, 255, 509, 1017, 2033, 4065, 8129,16257,16257,}, +{ 3, 5, 6, 8, 8, 8, 8, 9, 511, 511, 1021, 2041, 4081, 8161,16321,32641,}, +{ 3, 5, 7, 8, 9, 9, 9, 9, 10, 1023, 1023, 2045, 4089, 8177,16353,32705,}, +{ 3, 5, 7, 8, 10, 10, 10, 10, 10, 11, 2047, 2047, 4093, 8185,16369,32737,}, +{ 3, 5, 7, 8, 10, 11, 11, 11, 11, 11, 12, 4095, 4095, 8189,16377,32753,}, +{ 3, 5, 7, 9, 10, 12, 12, 12, 12, 12, 12, 13, 8191, 8191,16381,32761,}, +{ 3, 5, 7, 9, 10, 12, 13, 13, 13, 13, 13, 13, 14,16383,16383,32765,}, +{ 3, 5, 7, 9, 10, 12, 14, 14, 14, 14, 14, 14, 14, 15,32767,32767,}, +{ 3, 5, 7, 9, 11, 12, 14, 15, 15, 15, 15, 15, 15, 15, 16,65535,}, +}; + + static void fillPlane(uint8_t *plane, int stride, int width, int height, int y, uint8_t val) { @@ -98,6 +152,20 @@ static void fillPlane(uint8_t *plane, int stride, int width, int height, int y, } } +static void fillPlane16(uint8_t *plane, int stride, int width, int height, int y, + int alpha, int bits) +{ + int i, j; + uint8_t *ptr = plane + stride * y; + int v = alpha ? -1 : (1<<bits); + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) { + AV_WN16(ptr+2*j, v); + } + ptr += stride; + } +} + static void copyPlane(const uint8_t *src, int srcStride, int srcSliceY, int srcSliceH, int width, uint8_t *dst, int dstStride) @@ -310,7 +378,7 @@ static int palToRgbWrapper(SwsContext *c, const uint8_t *src[], int srcStride[], uint8_t *dstPtr = dst[0] + dstStride[0] * srcSliceY; const uint8_t *srcPtr = src[0]; - if (srcFormat == PIX_FMT_Y400A) { + if (srcFormat == PIX_FMT_GRAY8A) { switch (dstFormat) { case PIX_FMT_RGB32 : conv = gray8aToPacked32; break; case PIX_FMT_BGR32 : conv = gray8aToPacked32; break; @@ -332,7 +400,7 @@ static int palToRgbWrapper(SwsContext *c, const uint8_t *src[], int srcStride[], if (!conv) av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n", - sws_format_name(srcFormat), sws_format_name(dstFormat)); + av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat)); else { for (i = 0; i < srcSliceH; i++) { conv(srcPtr, dstPtr, c->srcW, (uint8_t *) c->pal_rgb); @@ -453,6 +521,20 @@ static int planarRgbToRgbWrapper(SwsContext *c, const uint8_t *src[], || (x) == PIX_FMT_ABGR \ ) +#define isRGBA64(x) ( \ + (x) == PIX_FMT_RGBA64LE \ + || (x) == PIX_FMT_RGBA64BE \ + || (x) == PIX_FMT_BGRA64LE \ + || (x) == PIX_FMT_BGRA64BE \ + ) + +#define isRGB48(x) ( \ + (x) == PIX_FMT_RGB48LE \ + || (x) == PIX_FMT_RGB48BE \ + || (x) == PIX_FMT_BGR48LE \ + || (x) == PIX_FMT_BGR48BE \ + ) + /* {RGB,BGR}{15,16,24,32,32_1} -> {RGB,BGR}{15,16,24,32} */ typedef void (* rgbConvFn) (const uint8_t *, uint8_t *, int); static rgbConvFn findRgbConvFn(SwsContext *c) @@ -467,10 +549,6 @@ static rgbConvFn findRgbConvFn(SwsContext *c) (((bpp + 7) >> 3) == 2 && \ (!(av_pix_fmt_descriptors[fmt].flags & PIX_FMT_BE) != !HAVE_BIGENDIAN)) - /* if this is non-native rgb444/555/565, don't handle it here. */ - if (IS_NOT_NE(srcId, srcFormat) || IS_NOT_NE(dstId, dstFormat)) - return NULL; - #define CONV_IS(src, dst) (srcFormat == PIX_FMT_##src && dstFormat == PIX_FMT_##dst) if (isRGBA32(srcFormat) && isRGBA32(dstFormat)) { @@ -486,6 +564,32 @@ static rgbConvFn findRgbConvFn(SwsContext *c) || CONV_IS(RGBA, BGRA)) conv = shuffle_bytes_2103; else if (CONV_IS(BGRA, ABGR) || CONV_IS(RGBA, ARGB)) conv = shuffle_bytes_3012; + } else if (isRGB48(srcFormat) && isRGB48(dstFormat)) { + if (CONV_IS(RGB48LE, BGR48LE) + || CONV_IS(BGR48LE, RGB48LE) + || CONV_IS(RGB48BE, BGR48BE) + || CONV_IS(BGR48BE, RGB48BE)) conv = rgb48tobgr48_nobswap; + else if (CONV_IS(RGB48LE, BGR48BE) + || CONV_IS(BGR48LE, RGB48BE) + || CONV_IS(RGB48BE, BGR48LE) + || CONV_IS(BGR48BE, RGB48LE)) conv = rgb48tobgr48_bswap; + } else if (isRGBA64(srcFormat) && isRGB48(dstFormat)) { + if (CONV_IS(RGBA64LE, BGR48LE) + || CONV_IS(BGRA64LE, RGB48LE) + || CONV_IS(RGBA64BE, BGR48BE) + || CONV_IS(BGRA64BE, RGB48BE)) conv = rgb64tobgr48_nobswap; + else if (CONV_IS(RGBA64LE, BGR48BE) + || CONV_IS(BGRA64LE, RGB48BE) + || CONV_IS(RGBA64BE, BGR48LE) + || CONV_IS(BGRA64BE, RGB48LE)) conv = rgb64tobgr48_bswap; + else if (CONV_IS(RGBA64LE, RGB48LE) + || CONV_IS(BGRA64LE, BGR48LE) + || CONV_IS(RGBA64BE, RGB48BE) + || CONV_IS(BGRA64BE, BGR48BE)) conv = rgb64to48_nobswap; + else if (CONV_IS(RGBA64LE, RGB48BE) + || CONV_IS(BGRA64LE, BGR48BE) + || CONV_IS(RGBA64BE, RGB48LE) + || CONV_IS(BGRA64BE, BGR48LE)) conv = rgb64to48_bswap; } else /* BGR -> BGR */ if ((isBGRinInt(srcFormat) && isBGRinInt(dstFormat)) || @@ -544,10 +648,13 @@ static int rgbToRgbWrapper(SwsContext *c, const uint8_t *src[], int srcStride[], if (!conv) { av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n", - sws_format_name(srcFormat), sws_format_name(dstFormat)); + av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat)); } else { const uint8_t *srcPtr = src[0]; uint8_t *dstPtr = dst[0]; + int src_bswap = IS_NOT_NE(c->srcFormatBpp, srcFormat); + int dst_bswap = IS_NOT_NE(c->dstFormatBpp, dstFormat); + if ((srcFormat == PIX_FMT_RGB32_1 || srcFormat == PIX_FMT_BGR32_1) && !isRGBA32(dstFormat)) srcPtr += ALT32_CORR; @@ -557,15 +664,23 @@ static int rgbToRgbWrapper(SwsContext *c, const uint8_t *src[], int srcStride[], dstPtr += ALT32_CORR; if (dstStride[0] * srcBpp == srcStride[0] * dstBpp && srcStride[0] > 0 && - !(srcStride[0] % srcBpp)) + !(srcStride[0] % srcBpp) && !dst_bswap && !src_bswap) conv(srcPtr, dstPtr + dstStride[0] * srcSliceY, srcSliceH * srcStride[0]); else { - int i; + int i, j; dstPtr += dstStride[0] * srcSliceY; for (i = 0; i < srcSliceH; i++) { - conv(srcPtr, dstPtr, c->srcW * srcBpp); + if(src_bswap) { + for(j=0; j<c->srcW; j++) + ((uint16_t*)c->formatConvBuffer)[j] = av_bswap16(((uint16_t*)srcPtr)[j]); + conv(c->formatConvBuffer, dstPtr, c->srcW * srcBpp); + }else + conv(srcPtr, dstPtr, c->srcW * srcBpp); + if(dst_bswap) + for(j=0; j<c->srcW; j++) + ((uint16_t*)dstPtr)[j] = av_bswap16(((uint16_t*)dstPtr)[j]); srcPtr += srcStride[0]; dstPtr += dstStride[0]; } @@ -623,7 +738,7 @@ static int packedCopyWrapper(SwsContext *c, const uint8_t *src[], while (length + c->srcW <= FFABS(dstStride[0]) && length + c->srcW <= FFABS(srcStride[0])) length += c->srcW; - assert(length != 0); + av_assert1(length != 0); for (i = 0; i < srcSliceH; i++) { memcpy(dstPtr, srcPtr, length); @@ -634,25 +749,25 @@ static int packedCopyWrapper(SwsContext *c, const uint8_t *src[], return srcSliceH; } -#define clip9(x) av_clip_uintp2(x, 9) -#define clip10(x) av_clip_uintp2(x, 10) -#define DITHER_COPY(dst, dstStride, wfunc, src, srcStride, rfunc, dithers, shift, clip) \ - for (i = 0; i < height; i++) { \ - const uint8_t *dither = dithers[i & 7]; \ - for (j = 0; j < length - 7; j += 8) { \ - wfunc(&dst[j + 0], clip((rfunc(&src[j + 0]) + dither[0]) >> shift)); \ - wfunc(&dst[j + 1], clip((rfunc(&src[j + 1]) + dither[1]) >> shift)); \ - wfunc(&dst[j + 2], clip((rfunc(&src[j + 2]) + dither[2]) >> shift)); \ - wfunc(&dst[j + 3], clip((rfunc(&src[j + 3]) + dither[3]) >> shift)); \ - wfunc(&dst[j + 4], clip((rfunc(&src[j + 4]) + dither[4]) >> shift)); \ - wfunc(&dst[j + 5], clip((rfunc(&src[j + 5]) + dither[5]) >> shift)); \ - wfunc(&dst[j + 6], clip((rfunc(&src[j + 6]) + dither[6]) >> shift)); \ - wfunc(&dst[j + 7], clip((rfunc(&src[j + 7]) + dither[7]) >> shift)); \ - } \ - for (; j < length; j++) \ - wfunc(&dst[j], (rfunc(&src[j]) + dither[j & 7]) >> shift); \ - dst += dstStride; \ - src += srcStride; \ +#define DITHER_COPY(dst, dstStride, src, srcStride, bswap, dbswap)\ + uint16_t scale= dither_scale[dst_depth-1][src_depth-1];\ + int shift= src_depth-dst_depth + dither_scale[src_depth-2][dst_depth-1];\ + for (i = 0; i < height; i++) {\ + const uint8_t *dither= dithers[src_depth-9][i&7];\ + for (j = 0; j < length-7; j+=8){\ + dst[j+0] = dbswap((bswap(src[j+0]) + dither[0])*scale>>shift);\ + dst[j+1] = dbswap((bswap(src[j+1]) + dither[1])*scale>>shift);\ + dst[j+2] = dbswap((bswap(src[j+2]) + dither[2])*scale>>shift);\ + dst[j+3] = dbswap((bswap(src[j+3]) + dither[3])*scale>>shift);\ + dst[j+4] = dbswap((bswap(src[j+4]) + dither[4])*scale>>shift);\ + dst[j+5] = dbswap((bswap(src[j+5]) + dither[5])*scale>>shift);\ + dst[j+6] = dbswap((bswap(src[j+6]) + dither[6])*scale>>shift);\ + dst[j+7] = dbswap((bswap(src[j+7]) + dither[7])*scale>>shift);\ + }\ + for (; j < length; j++)\ + dst[j] = dbswap((bswap(src[j]) + dither[j&7])*scale>>shift);\ + dst += dstStride;\ + src += srcStride;\ } static int planarCopyWrapper(SwsContext *c, const uint8_t *src[], @@ -666,162 +781,126 @@ static int planarCopyWrapper(SwsContext *c, const uint8_t *src[], int height = (plane == 0 || plane == 3) ? srcSliceH: -((-srcSliceH) >> c->chrDstVSubSample); const uint8_t *srcPtr = src[plane]; uint8_t *dstPtr = dst[plane] + dstStride[plane] * y; + int shiftonly= plane==1 || plane==2 || (!c->srcRange && plane==0); if (!dst[plane]) continue; // ignore palette for GRAY8 if (plane == 1 && !dst[2]) continue; if (!src[plane] || (plane == 1 && !src[2])) { - if (is16BPS(c->dstFormat)) - length *= 2; - fillPlane(dst[plane], dstStride[plane], length, height, y, - (plane == 3) ? 255 : 128); + if (is16BPS(c->dstFormat) || isNBPS(c->dstFormat)) { + fillPlane16(dst[plane], dstStride[plane], length, height, y, + plane == 3, av_pix_fmt_descriptors[c->dstFormat].comp[plane].depth_minus1); + } else { + fillPlane(dst[plane], dstStride[plane], length, height, y, + (plane == 3) ? 255 : 128); + } } else { - if (is9_OR_10BPS(c->srcFormat)) { + if(isNBPS(c->srcFormat) || isNBPS(c->dstFormat) + || (is16BPS(c->srcFormat) != is16BPS(c->dstFormat)) + ) { const int src_depth = av_pix_fmt_descriptors[c->srcFormat].comp[plane].depth_minus1 + 1; const int dst_depth = av_pix_fmt_descriptors[c->dstFormat].comp[plane].depth_minus1 + 1; const uint16_t *srcPtr2 = (const uint16_t *) srcPtr; + uint16_t *dstPtr2 = (uint16_t*)dstPtr; - if (is16BPS(c->dstFormat)) { - uint16_t *dstPtr2 = (uint16_t *) dstPtr; -#define COPY9_OR_10TO16(rfunc, wfunc) \ - for (i = 0; i < height; i++) { \ - for (j = 0; j < length; j++) { \ - int srcpx = rfunc(&srcPtr2[j]); \ - wfunc(&dstPtr2[j], (srcpx << (16 - src_depth)) | (srcpx >> (2 * src_depth - 16))); \ - } \ - dstPtr2 += dstStride[plane] / 2; \ - srcPtr2 += srcStride[plane] / 2; \ + if (dst_depth == 8) { + if(isBE(c->srcFormat) == HAVE_BIGENDIAN){ + DITHER_COPY(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, , ) + } else { + DITHER_COPY(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, av_bswap16, ) } - if (isBE(c->dstFormat)) { - if (isBE(c->srcFormat)) { - COPY9_OR_10TO16(AV_RB16, AV_WB16); - } else { - COPY9_OR_10TO16(AV_RL16, AV_WB16); + } else if (src_depth == 8) { + for (i = 0; i < height; i++) { + #define COPY816(w)\ + if(shiftonly){\ + for (j = 0; j < length; j++)\ + w(&dstPtr2[j], srcPtr[j]<<(dst_depth-8));\ + }else{\ + for (j = 0; j < length; j++)\ + w(&dstPtr2[j], (srcPtr[j]<<(dst_depth-8)) |\ + (srcPtr[j]>>(2*8-dst_depth)));\ } - } else { - if (isBE(c->srcFormat)) { - COPY9_OR_10TO16(AV_RB16, AV_WL16); + if(isBE(c->dstFormat)){ + COPY816(AV_WB16) } else { - COPY9_OR_10TO16(AV_RL16, AV_WL16); + COPY816(AV_WL16) } + dstPtr2 += dstStride[plane]/2; + srcPtr += srcStride[plane]; } - } else if (is9_OR_10BPS(c->dstFormat)) { - uint16_t *dstPtr2 = (uint16_t *) dstPtr; -#define COPY9_OR_10TO9_OR_10(loop) \ - for (i = 0; i < height; i++) { \ - for (j = 0; j < length; j++) { \ - loop; \ - } \ - dstPtr2 += dstStride[plane] / 2; \ - srcPtr2 += srcStride[plane] / 2; \ - } -#define COPY9_OR_10TO9_OR_10_2(rfunc, wfunc) \ - if (dst_depth > src_depth) { \ - COPY9_OR_10TO9_OR_10(int srcpx = rfunc(&srcPtr2[j]); \ - wfunc(&dstPtr2[j], (srcpx << 1) | (srcpx >> 9))); \ - } else if (dst_depth < src_depth) { \ - DITHER_COPY(dstPtr2, dstStride[plane] / 2, wfunc, \ - srcPtr2, srcStride[plane] / 2, rfunc, \ - dither_8x8_1, 1, clip9); \ - } else { \ - COPY9_OR_10TO9_OR_10(wfunc(&dstPtr2[j], rfunc(&srcPtr2[j]))); \ - } - if (isBE(c->dstFormat)) { - if (isBE(c->srcFormat)) { - COPY9_OR_10TO9_OR_10_2(AV_RB16, AV_WB16); - } else { - COPY9_OR_10TO9_OR_10_2(AV_RL16, AV_WB16); + } else if (src_depth <= dst_depth) { + int orig_length = length; + for (i = 0; i < height; i++) { + if(isBE(c->srcFormat) == HAVE_BIGENDIAN && + isBE(c->dstFormat) == HAVE_BIGENDIAN && + shiftonly) { + unsigned shift = dst_depth - src_depth; + length = orig_length; +#if HAVE_FAST_64BIT +#define FAST_COPY_UP(shift) \ + for (j = 0; j < length - 3; j += 4) { \ + uint64_t v = AV_RN64A(srcPtr2 + j); \ + AV_WN64A(dstPtr2 + j, v << shift); \ + } \ + length &= 3; +#else +#define FAST_COPY_UP(shift) \ + for (j = 0; j < length - 1; j += 2) { \ + uint32_t v = AV_RN32A(srcPtr2 + j); \ + AV_WN32A(dstPtr2 + j, v << shift); \ + } \ + length &= 1; +#endif + switch (shift) + { + case 6: FAST_COPY_UP(6); break; + case 7: FAST_COPY_UP(7); break; + } } - } else { - if (isBE(c->srcFormat)) { - COPY9_OR_10TO9_OR_10_2(AV_RB16, AV_WL16); +#define COPY_UP(r,w) \ + if(shiftonly){\ + for (j = 0; j < length; j++){ \ + unsigned int v= r(&srcPtr2[j]);\ + w(&dstPtr2[j], v<<(dst_depth-src_depth));\ + }\ + }else{\ + for (j = 0; j < length; j++){ \ + unsigned int v= r(&srcPtr2[j]);\ + w(&dstPtr2[j], (v<<(dst_depth-src_depth)) | \ + (v>>(2*src_depth-dst_depth)));\ + }\ + } + if(isBE(c->srcFormat)){ + if(isBE(c->dstFormat)){ + COPY_UP(AV_RB16, AV_WB16) + } else { + COPY_UP(AV_RB16, AV_WL16) + } } else { - COPY9_OR_10TO9_OR_10_2(AV_RL16, AV_WL16); + if(isBE(c->dstFormat)){ + COPY_UP(AV_RL16, AV_WB16) + } else { + COPY_UP(AV_RL16, AV_WL16) + } } + dstPtr2 += dstStride[plane]/2; + srcPtr2 += srcStride[plane]/2; } } else { -#define W8(a, b) { *(a) = (b); } -#define COPY9_OR_10TO8(rfunc) \ - if (src_depth == 9) { \ - DITHER_COPY(dstPtr, dstStride[plane], W8, \ - srcPtr2, srcStride[plane] / 2, rfunc, \ - dither_8x8_1, 1, av_clip_uint8); \ - } else { \ - DITHER_COPY(dstPtr, dstStride[plane], W8, \ - srcPtr2, srcStride[plane] / 2, rfunc, \ - dither_8x8_3, 2, av_clip_uint8); \ - } - if (isBE(c->srcFormat)) { - COPY9_OR_10TO8(AV_RB16); - } else { - COPY9_OR_10TO8(AV_RL16); - } - } - } else if (is9_OR_10BPS(c->dstFormat)) { - const int dst_depth = av_pix_fmt_descriptors[c->dstFormat].comp[plane].depth_minus1 + 1; - uint16_t *dstPtr2 = (uint16_t *) dstPtr; - - if (is16BPS(c->srcFormat)) { - const uint16_t *srcPtr2 = (const uint16_t *) srcPtr; -#define COPY16TO9_OR_10(rfunc, wfunc) \ - if (dst_depth == 9) { \ - DITHER_COPY(dstPtr2, dstStride[plane] / 2, wfunc, \ - srcPtr2, srcStride[plane] / 2, rfunc, \ - dither_8x8_128, 7, clip9); \ - } else { \ - DITHER_COPY(dstPtr2, dstStride[plane] / 2, wfunc, \ - srcPtr2, srcStride[plane] / 2, rfunc, \ - dither_8x8_64, 6, clip10); \ - } - if (isBE(c->dstFormat)) { - if (isBE(c->srcFormat)) { - COPY16TO9_OR_10(AV_RB16, AV_WB16); + if(isBE(c->srcFormat) == HAVE_BIGENDIAN){ + if(isBE(c->dstFormat) == HAVE_BIGENDIAN){ + DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , ) } else { - COPY16TO9_OR_10(AV_RL16, AV_WB16); + DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , av_bswap16) } - } else { - if (isBE(c->srcFormat)) { - COPY16TO9_OR_10(AV_RB16, AV_WL16); + }else{ + if(isBE(c->dstFormat) == HAVE_BIGENDIAN){ + DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, ) } else { - COPY16TO9_OR_10(AV_RL16, AV_WL16); + DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, av_bswap16) } } - } else /* 8bit */ { -#define COPY8TO9_OR_10(wfunc) \ - for (i = 0; i < height; i++) { \ - for (j = 0; j < length; j++) { \ - const int srcpx = srcPtr[j]; \ - wfunc(&dstPtr2[j], (srcpx << (dst_depth - 8)) | (srcpx >> (16 - dst_depth))); \ - } \ - dstPtr2 += dstStride[plane] / 2; \ - srcPtr += srcStride[plane]; \ - } - if (isBE(c->dstFormat)) { - COPY8TO9_OR_10(AV_WB16); - } else { - COPY8TO9_OR_10(AV_WL16); - } - } - } else if (is16BPS(c->srcFormat) && !is16BPS(c->dstFormat)) { - const uint16_t *srcPtr2 = (const uint16_t *) srcPtr; -#define COPY16TO8(rfunc) \ - DITHER_COPY(dstPtr, dstStride[plane], W8, \ - srcPtr2, srcStride[plane] / 2, rfunc, \ - dither_8x8_256, 8, av_clip_uint8); - if (isBE(c->srcFormat)) { - COPY16TO8(AV_RB16); - } else { - COPY16TO8(AV_RL16); - } - } else if (!is16BPS(c->srcFormat) && is16BPS(c->dstFormat)) { - for (i = 0; i < height; i++) { - for (j = 0; j < length; j++) { - dstPtr[ j << 1 ] = srcPtr[j]; - dstPtr[(j << 1) + 1] = srcPtr[j]; - } - srcPtr += srcStride[plane]; - dstPtr += dstStride[plane]; } } else if (is16BPS(c->srcFormat) && is16BPS(c->dstFormat) && isBE(c->srcFormat) != isBE(c->dstFormat)) { @@ -899,28 +978,32 @@ void ff_get_unscaled_swscale(SwsContext *c) && (!needsDither || (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)))) c->swScale= rgbToRgbWrapper; - if (isPlanarRGB(srcFormat) && isPackedRGB(dstFormat)) +#define isByteRGB(f) (\ + f == PIX_FMT_RGB32 ||\ + f == PIX_FMT_RGB32_1 ||\ + f == PIX_FMT_RGB24 ||\ + f == PIX_FMT_BGR32 ||\ + f == PIX_FMT_BGR32_1 ||\ + f == PIX_FMT_BGR24) + + if (isAnyRGB(srcFormat) && isPlanar(srcFormat) && isByteRGB(dstFormat)) c->swScale = planarRgbToRgbWrapper; /* bswap 16 bits per pixel/component packed formats */ if (IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, PIX_FMT_BGR444) || IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, PIX_FMT_BGR48) || + IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, PIX_FMT_BGRA64) || IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, PIX_FMT_BGR555) || IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, PIX_FMT_BGR565) || IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, PIX_FMT_GRAY16) || IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, PIX_FMT_RGB444) || IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, PIX_FMT_RGB48) || + IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, PIX_FMT_RGBA64) || IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, PIX_FMT_RGB555) || IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, PIX_FMT_RGB565)) c->swScale = packed_16bpc_bswap; - if ((usePal(srcFormat) && ( - dstFormat == PIX_FMT_RGB32 || - dstFormat == PIX_FMT_RGB32_1 || - dstFormat == PIX_FMT_RGB24 || - dstFormat == PIX_FMT_BGR32 || - dstFormat == PIX_FMT_BGR32_1 || - dstFormat == PIX_FMT_BGR24))) + if (usePal(srcFormat) && isByteRGB(dstFormat)) c->swScale = palToRgbWrapper; if (srcFormat == PIX_FMT_YUV422P) { @@ -951,13 +1034,14 @@ void ff_get_unscaled_swscale(SwsContext *c) if (srcFormat == PIX_FMT_UYVY422 && dstFormat == PIX_FMT_YUV422P) c->swScale = uyvyToYuv422Wrapper; +#define isPlanarGray(x) (isGray(x) && (x) != PIX_FMT_GRAY8A) /* simple copy */ if ( srcFormat == dstFormat || (srcFormat == PIX_FMT_YUVA420P && dstFormat == PIX_FMT_YUV420P) || (srcFormat == PIX_FMT_YUV420P && dstFormat == PIX_FMT_YUVA420P) || - (isPlanarYUV(srcFormat) && isGray(dstFormat)) || - (isPlanarYUV(dstFormat) && isGray(srcFormat)) || - (isGray(dstFormat) && isGray(srcFormat)) || + (isPlanarYUV(srcFormat) && isPlanarGray(dstFormat)) || + (isPlanarYUV(dstFormat) && isPlanarGray(srcFormat)) || + (isPlanarGray(dstFormat) && isPlanarGray(srcFormat)) || (isPlanarYUV(srcFormat) && isPlanarYUV(dstFormat) && c->chrDstHSubSample == c->chrSrcHSubSample && c->chrDstVSubSample == c->chrSrcVSubSample && @@ -988,7 +1072,7 @@ static void reset_ptr(const uint8_t *src[], int format) } } -static int check_image_pointers(uint8_t *data[4], enum PixelFormat pix_fmt, +static int check_image_pointers(const uint8_t * const data[4], enum PixelFormat pix_fmt, const int linesizes[4]) { const AVPixFmtDescriptor *desc = &av_pix_fmt_descriptors[pix_fmt]; @@ -1013,9 +1097,10 @@ int attribute_align_arg sws_scale(struct SwsContext *c, int srcSliceH, uint8_t *const dst[], const int dstStride[]) { - int i; + int i, ret; const uint8_t *src2[4] = { srcSlice[0], srcSlice[1], srcSlice[2], srcSlice[3] }; uint8_t *dst2[4] = { dst[0], dst[1], dst[2], dst[3] }; + uint8_t *rgb0_tmp = NULL; // do not mess up sliceDir if we have a "trailing" 0-size slice if (srcSliceH == 0) @@ -1025,7 +1110,7 @@ int attribute_align_arg sws_scale(struct SwsContext *c, av_log(c, AV_LOG_ERROR, "bad src image pointers\n"); return 0; } - if (!check_image_pointers(dst, c->dstFormat, dstStride)) { + if (!check_image_pointers((const uint8_t* const*)dst, c->dstFormat, dstStride)) { av_log(c, AV_LOG_ERROR, "bad dst image pointers\n"); return 0; } @@ -1040,9 +1125,10 @@ int attribute_align_arg sws_scale(struct SwsContext *c, if (usePal(c->srcFormat)) { for (i = 0; i < 256; i++) { - int p, r, g, b, y, u, v; + int p, r, g, b, y, u, v, a = 0xff; if (c->srcFormat == PIX_FMT_PAL8) { p = ((const uint32_t *)(srcSlice[1]))[i]; + a = (p >> 24) & 0xFF; r = (p >> 16) & 0xFF; g = (p >> 8) & 0xFF; b = p & 0xFF; @@ -1058,11 +1144,10 @@ int attribute_align_arg sws_scale(struct SwsContext *c, r = ( i >> 3 ) * 255; g = ((i >> 1) & 3) * 85; b = ( i & 1) * 255; - } else if (c->srcFormat == PIX_FMT_GRAY8 || - c->srcFormat == PIX_FMT_Y400A) { + } else if (c->srcFormat == PIX_FMT_GRAY8 || c->srcFormat == PIX_FMT_GRAY8A) { r = g = b = i; } else { - assert(c->srcFormat == PIX_FMT_BGR4_BYTE); + av_assert1(c->srcFormat == PIX_FMT_BGR4_BYTE); b = ( i >> 3 ) * 255; g = ((i >> 1) & 3) * 85; r = ( i & 1) * 255; @@ -1070,37 +1155,51 @@ int attribute_align_arg sws_scale(struct SwsContext *c, y = av_clip_uint8((RY * r + GY * g + BY * b + ( 33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT); u = av_clip_uint8((RU * r + GU * g + BU * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT); v = av_clip_uint8((RV * r + GV * g + BV * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT); - c->pal_yuv[i] = y + (u << 8) + (v << 16); + c->pal_yuv[i]= y + (u<<8) + (v<<16) + (a<<24); switch (c->dstFormat) { case PIX_FMT_BGR32: #if !HAVE_BIGENDIAN case PIX_FMT_RGB24: #endif - c->pal_rgb[i] = r + (g << 8) + (b << 16); + c->pal_rgb[i]= r + (g<<8) + (b<<16) + (a<<24); break; case PIX_FMT_BGR32_1: #if HAVE_BIGENDIAN case PIX_FMT_BGR24: #endif - c->pal_rgb[i] = (r + (g << 8) + (b << 16)) << 8; + c->pal_rgb[i]= a + (r<<8) + (g<<16) + (b<<24); break; case PIX_FMT_RGB32_1: #if HAVE_BIGENDIAN case PIX_FMT_RGB24: #endif - c->pal_rgb[i] = (b + (g << 8) + (r << 16)) << 8; + c->pal_rgb[i]= a + (b<<8) + (g<<16) + (r<<24); break; case PIX_FMT_RGB32: #if !HAVE_BIGENDIAN case PIX_FMT_BGR24: #endif default: - c->pal_rgb[i] = b + (g << 8) + (r << 16); + c->pal_rgb[i]= b + (g<<8) + (r<<16) + (a<<24); } } } + if (c->src0Alpha && !c->dst0Alpha && isALPHA(c->dstFormat)) { + uint8_t *base; + int x,y; + rgb0_tmp = av_malloc(FFABS(srcStride[0]) * srcSliceH + 32); + base = srcStride[0] < 0 ? rgb0_tmp - srcStride[0] * (srcSliceH-1) : rgb0_tmp; + for (y=0; y<srcSliceH; y++){ + memcpy(base + srcStride[0]*y, src2[0] + srcStride[0]*y, 4*c->srcW); + for (x=c->src0Alpha-1; x<4*c->srcW; x+=4) { + base[ srcStride[0]*y + x] = 0xFF; + } + } + src2[0] = base; + } + // copy strides, so they can safely be modified if (c->sliceDir == 1) { // slices go from top to bottom @@ -1110,13 +1209,13 @@ int attribute_align_arg sws_scale(struct SwsContext *c, dstStride[3] }; reset_ptr(src2, c->srcFormat); - reset_ptr((const uint8_t **) dst2, c->dstFormat); + reset_ptr((void*)dst2, c->dstFormat); /* reset slice direction at end of frame */ if (srcSliceY + srcSliceH == c->srcH) c->sliceDir = 0; - return c->swScale(c, src2, srcStride2, srcSliceY, srcSliceH, dst2, + ret = c->swScale(c, src2, srcStride2, srcSliceY, srcSliceH, dst2, dstStride2); } else { // slices go from bottom to top => we flip the image internally @@ -1136,15 +1235,18 @@ int attribute_align_arg sws_scale(struct SwsContext *c, dst2[3] += ( c->dstH - 1) * dstStride[3]; reset_ptr(src2, c->srcFormat); - reset_ptr((const uint8_t **) dst2, c->dstFormat); + reset_ptr((void*)dst2, c->dstFormat); /* reset slice direction at end of frame */ if (!srcSliceY) c->sliceDir = 0; - return c->swScale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH, + ret = c->swScale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH, srcSliceH, dst2, dstStride2); } + + av_free(rgb0_tmp); + return ret; } /* Convert the palette to the same packed 32-bit format as the palette */ diff --git a/libswscale/utils.c b/libswscale/utils.c index f890b5cee1..1e3b718232 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -1,27 +1,27 @@ /* * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include "config.h" #define _SVID_SOURCE // needed for MAP_ANONYMOUS -#include <assert.h> +#define _DARWIN_C_SOURCE // needed for MAP_ANON #include <inttypes.h> #include <math.h> #include <stdio.h> @@ -38,6 +38,7 @@ #endif #include "libavutil/attributes.h" +#include "libavutil/avassert.h" #include "libavutil/avutil.h" #include "libavutil/bswap.h" #include "libavutil/cpu.h" @@ -52,18 +53,19 @@ unsigned swscale_version(void) { + av_assert0(LIBSWSCALE_VERSION_MICRO >= 100); return LIBSWSCALE_VERSION_INT; } const char *swscale_configuration(void) { - return LIBAV_CONFIGURATION; + return FFMPEG_CONFIGURATION; } const char *swscale_license(void) { #define LICENSE_PREFIX "libswscale license: " - return LICENSE_PREFIX LIBAV_LICENSE + sizeof(LICENSE_PREFIX) - 1; + return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1; } #define RET 0xC3 // near return opcode for x86 @@ -102,13 +104,21 @@ static const FormatEntry format_entries[PIX_FMT_NB] = { [PIX_FMT_RGBA] = { 1, 1 }, [PIX_FMT_ABGR] = { 1, 1 }, [PIX_FMT_BGRA] = { 1, 1 }, + [PIX_FMT_0RGB] = { 1, 1 }, + [PIX_FMT_RGB0] = { 1, 1 }, + [PIX_FMT_0BGR] = { 1, 1 }, + [PIX_FMT_BGR0] = { 1, 1 }, [PIX_FMT_GRAY16BE] = { 1, 1 }, [PIX_FMT_GRAY16LE] = { 1, 1 }, [PIX_FMT_YUV440P] = { 1, 1 }, [PIX_FMT_YUVJ440P] = { 1, 1 }, [PIX_FMT_YUVA420P] = { 1, 1 }, + [PIX_FMT_YUVA422P] = { 1, 1 }, + [PIX_FMT_YUVA444P] = { 1, 1 }, [PIX_FMT_RGB48BE] = { 1, 1 }, [PIX_FMT_RGB48LE] = { 1, 1 }, + [PIX_FMT_RGBA64BE] = { 1, 0 }, + [PIX_FMT_RGBA64LE] = { 1, 0 }, [PIX_FMT_RGB565BE] = { 1, 1 }, [PIX_FMT_RGB565LE] = { 1, 1 }, [PIX_FMT_RGB555BE] = { 1, 1 }, @@ -130,23 +140,41 @@ static const FormatEntry format_entries[PIX_FMT_NB] = { [PIX_FMT_Y400A] = { 1, 0 }, [PIX_FMT_BGR48BE] = { 1, 1 }, [PIX_FMT_BGR48LE] = { 1, 1 }, + [PIX_FMT_BGRA64BE] = { 0, 0 }, + [PIX_FMT_BGRA64LE] = { 0, 0 }, [PIX_FMT_YUV420P9BE] = { 1, 1 }, [PIX_FMT_YUV420P9LE] = { 1, 1 }, [PIX_FMT_YUV420P10BE] = { 1, 1 }, [PIX_FMT_YUV420P10LE] = { 1, 1 }, + [PIX_FMT_YUV420P12BE] = { 1, 1 }, + [PIX_FMT_YUV420P12LE] = { 1, 1 }, + [PIX_FMT_YUV420P14BE] = { 1, 1 }, + [PIX_FMT_YUV420P14LE] = { 1, 1 }, [PIX_FMT_YUV422P9BE] = { 1, 1 }, [PIX_FMT_YUV422P9LE] = { 1, 1 }, [PIX_FMT_YUV422P10BE] = { 1, 1 }, [PIX_FMT_YUV422P10LE] = { 1, 1 }, + [PIX_FMT_YUV422P12BE] = { 1, 1 }, + [PIX_FMT_YUV422P12LE] = { 1, 1 }, + [PIX_FMT_YUV422P14BE] = { 1, 1 }, + [PIX_FMT_YUV422P14LE] = { 1, 1 }, [PIX_FMT_YUV444P9BE] = { 1, 1 }, [PIX_FMT_YUV444P9LE] = { 1, 1 }, [PIX_FMT_YUV444P10BE] = { 1, 1 }, [PIX_FMT_YUV444P10LE] = { 1, 1 }, + [PIX_FMT_YUV444P12BE] = { 1, 1 }, + [PIX_FMT_YUV444P12LE] = { 1, 1 }, + [PIX_FMT_YUV444P14BE] = { 1, 1 }, + [PIX_FMT_YUV444P14LE] = { 1, 1 }, [PIX_FMT_GBRP] = { 1, 0 }, [PIX_FMT_GBRP9LE] = { 1, 0 }, [PIX_FMT_GBRP9BE] = { 1, 0 }, [PIX_FMT_GBRP10LE] = { 1, 0 }, [PIX_FMT_GBRP10BE] = { 1, 0 }, + [PIX_FMT_GBRP12LE] = { 1, 0 }, + [PIX_FMT_GBRP12BE] = { 1, 0 }, + [PIX_FMT_GBRP14LE] = { 1, 0 }, + [PIX_FMT_GBRP14BE] = { 1, 0 }, [PIX_FMT_GBRP16LE] = { 1, 0 }, [PIX_FMT_GBRP16BE] = { 1, 0 }, }; @@ -165,13 +193,12 @@ int sws_isSupportedOutput(enum PixelFormat pix_fmt) extern const int32_t ff_yuv2rgb_coeffs[8][4]; +#if FF_API_SWS_FORMAT_NAME const char *sws_format_name(enum PixelFormat format) { - if ((unsigned)format < PIX_FMT_NB && av_pix_fmt_descriptors[format].name) - return av_pix_fmt_descriptors[format].name; - else - return "Unknown format"; + return av_get_pix_fmt_name(format); } +#endif static double getSplineCoeff(double a, double b, double c, double d, double dist) @@ -190,7 +217,7 @@ static int initFilter(int16_t **outFilter, int32_t **filterPos, int *outFilterSize, int xInc, int srcW, int dstW, int filterAlign, int one, int flags, int cpu_flags, SwsVector *srcFilter, SwsVector *dstFilter, - double param[2], int is_horizontal) + double param[2]) { int i; int filterSize; @@ -218,7 +245,7 @@ static int initFilter(int16_t **outFilter, int32_t **filterPos, } } else if (flags & SWS_POINT) { // lame looking point sampling mode int i; - int xDstInSrc; + int64_t xDstInSrc; filterSize = 1; FF_ALLOC_OR_GOTO(NULL, filter, dstW * sizeof(*filter) * filterSize, fail); @@ -234,7 +261,7 @@ static int initFilter(int16_t **outFilter, int32_t **filterPos, } else if ((xInc <= (1 << 16) && (flags & SWS_AREA)) || (flags & SWS_FAST_BILINEAR)) { // bilinear upscale int i; - int xDstInSrc; + int64_t xDstInSrc; filterSize = 2; FF_ALLOC_OR_GOTO(NULL, filter, dstW * sizeof(*filter) * filterSize, fail); @@ -247,8 +274,7 @@ static int initFilter(int16_t **outFilter, int32_t **filterPos, (*filterPos)[i] = xx; // bilinear upscale / linear interpolate / area averaging for (j = 0; j < filterSize; j++) { - int64_t coeff = fone - FFABS((xx << 16) - xDstInSrc) * - (fone >> 16); + int64_t coeff= fone - FFABS(((int64_t)xx<<16) - xDstInSrc)*(fone>>16); if (coeff < 0) coeff = 0; filter[i * filterSize + j] = coeff; @@ -277,8 +303,7 @@ static int initFilter(int16_t **outFilter, int32_t **filterPos, else if (flags & SWS_BILINEAR) sizeFactor = 2; else { - sizeFactor = 0; // GCC warning killer - assert(0); + av_assert0(0); } if (xInc <= 1 << 16) @@ -377,8 +402,7 @@ static int initFilter(int16_t **outFilter, int32_t **filterPos, double p = -2.196152422706632; coeff = getSplineCoeff(1.0, 0.0, p, -p - 1.0, floatd) * fone; } else { - coeff = 0.0; // GCC warning killer - assert(0); + av_assert0(0); } filter[i * filterSize + j] = coeff; @@ -391,13 +415,13 @@ static int initFilter(int16_t **outFilter, int32_t **filterPos, /* apply src & dst Filter to filter -> filter2 * av_free(filter); */ - assert(filterSize > 0); + av_assert0(filterSize > 0); filter2Size = filterSize; if (srcFilter) filter2Size += srcFilter->length - 1; if (dstFilter) filter2Size += dstFilter->length - 1; - assert(filter2Size > 0); + av_assert0(filter2Size > 0); FF_ALLOCZ_OR_GOTO(NULL, filter2, filter2Size * dstW * sizeof(*filter2), fail); for (i = 0; i < dstW; i++) { @@ -479,9 +503,9 @@ static int initFilter(int16_t **outFilter, int32_t **filterPos, filterAlign = 1; } - assert(minFilterSize > 0); + av_assert0(minFilterSize > 0); filterSize = (minFilterSize + (filterAlign - 1)) & (~(filterAlign - 1)); - assert(filterSize > 0); + av_assert0(filterSize > 0); filter = av_malloc(filterSize * dstW * sizeof(*filter)); if (filterSize >= MAX_FILTER_SIZE * 16 / ((flags & SWS_ACCURATE_RND) ? APCK_SIZE : 16) || !filter) @@ -509,29 +533,27 @@ static int initFilter(int16_t **outFilter, int32_t **filterPos, // FIXME try to align filterPos if possible // fix borders - if (is_horizontal) { - for (i = 0; i < dstW; i++) { - int j; - if ((*filterPos)[i] < 0) { - // move filter coefficients left to compensate for filterPos - for (j = 1; j < filterSize; j++) { - int left = FFMAX(j + (*filterPos)[i], 0); - filter[i * filterSize + left] += filter[i * filterSize + j]; - filter[i * filterSize + j] = 0; - } - (*filterPos)[i] = 0; + for (i = 0; i < dstW; i++) { + int j; + if ((*filterPos)[i] < 0) { + // move filter coefficients left to compensate for filterPos + for (j = 1; j < filterSize; j++) { + int left = FFMAX(j + (*filterPos)[i], 0); + filter[i * filterSize + left] += filter[i * filterSize + j]; + filter[i * filterSize + j] = 0; } + (*filterPos)[i]= 0; + } - if ((*filterPos)[i] + filterSize > srcW) { - int shift = (*filterPos)[i] + filterSize - srcW; - // move filter coefficients right to compensate for filterPos - for (j = filterSize - 2; j >= 0; j--) { - int right = FFMIN(j + shift, filterSize - 1); - filter[i * filterSize + right] += filter[i * filterSize + j]; - filter[i * filterSize + j] = 0; - } - (*filterPos)[i] = srcW - filterSize; + if ((*filterPos)[i] + filterSize > srcW) { + int shift = (*filterPos)[i] + filterSize - srcW; + // move filter coefficients right to compensate for filterPos + for (j = filterSize - 2; j >= 0; j--) { + int right = FFMIN(j + shift, filterSize - 1); + filter[i * filterSize + right] += filter[i * filterSize + j]; + filter[i * filterSize + j] = 0; } + (*filterPos)[i]= srcW - filterSize; } } @@ -780,7 +802,7 @@ int sws_getColorspaceDetails(struct SwsContext *c, int **inv_table, int *srcRange, int **table, int *dstRange, int *brightness, int *contrast, int *saturation) { - if (isYUV(c->dstFormat) || isGray(c->dstFormat)) + if (!c || isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1; *inv_table = c->srcColorspaceTable; @@ -814,6 +836,17 @@ static int handle_jpeg(enum PixelFormat *format) } } +static int handle_0alpha(enum PixelFormat *format) +{ + switch (*format) { + case PIX_FMT_0BGR : *format = PIX_FMT_ABGR ; return 1; + case PIX_FMT_BGR0 : *format = PIX_FMT_BGRA ; return 4; + case PIX_FMT_0RGB : *format = PIX_FMT_ARGB ; return 1; + case PIX_FMT_RGB0 : *format = PIX_FMT_RGBA ; return 4; + default: return 0; + } +} + SwsContext *sws_alloc_context(void) { SwsContext *c = av_mallocz(sizeof(SwsContext)); @@ -827,7 +860,7 @@ SwsContext *sws_alloc_context(void) av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) { - int i; + int i, j; int usesVFilter, usesHFilter; int unscaled; SwsFilter dummyFilter = { NULL, NULL, NULL, NULL }; @@ -835,8 +868,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, int srcH = c->srcH; int dstW = c->dstW; int dstH = c->dstH; - int dst_stride = FFALIGN(dstW * sizeof(int16_t) + 16, 16); - int dst_stride_px = dst_stride >> 1; + int dst_stride = FFALIGN(dstW * sizeof(int16_t) + 66, 16); int flags, cpu_flags; enum PixelFormat srcFormat = c->srcFormat; enum PixelFormat dstFormat = c->dstFormat; @@ -849,14 +881,25 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, unscaled = (srcW == dstW && srcH == dstH); + handle_jpeg(&srcFormat); + handle_jpeg(&dstFormat); + handle_0alpha(&srcFormat); + handle_0alpha(&dstFormat); + + if(srcFormat!=c->srcFormat || dstFormat!=c->dstFormat){ + av_log(c, AV_LOG_WARNING, "deprecated pixel format used, make sure you did set range correctly\n"); + c->srcFormat= srcFormat; + c->dstFormat= dstFormat; + } + if (!sws_isSupportedInput(srcFormat)) { av_log(c, AV_LOG_ERROR, "%s is not supported as input pixel format\n", - sws_format_name(srcFormat)); + av_get_pix_fmt_name(srcFormat)); return AVERROR(EINVAL); } if (!sws_isSupportedOutput(dstFormat)) { av_log(c, AV_LOG_ERROR, "%s is not supported as output pixel format\n", - sws_format_name(dstFormat)); + av_get_pix_fmt_name(dstFormat)); return AVERROR(EINVAL); } @@ -872,8 +915,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SWS_SPLINE | SWS_BICUBLIN); if (!i || (i & (i - 1))) { - av_log(c, AV_LOG_ERROR, - "Exactly one scaler algorithm must be chosen\n"); + av_log(c, AV_LOG_ERROR, "Exactly one scaler algorithm must be chosen, got %X\n", i); return AVERROR(EINVAL); } /* sanity check */ @@ -908,6 +950,14 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat); getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat); + + if (isAnyRGB(dstFormat) && !(flags&SWS_FULL_CHR_H_INT)) { + if (dstW&1) { + av_log(c, AV_LOG_DEBUG, "Forcing full internal H chroma due to odd output size\n"); + flags |= SWS_FULL_CHR_H_INT; + c->flags = flags; + } + } /* reuse chroma for 2 pixels RGB/BGR unless user wants full * chroma interpolation */ if (flags & SWS_FULL_CHR_H_INT && @@ -918,9 +968,9 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, dstFormat != PIX_FMT_ABGR && dstFormat != PIX_FMT_RGB24 && dstFormat != PIX_FMT_BGR24) { - av_log(c, AV_LOG_ERROR, + av_log(c, AV_LOG_WARNING, "full chroma interpolation for destination format '%s' not yet implemented\n", - sws_format_name(dstFormat)); + av_get_pix_fmt_name(dstFormat)); flags &= ~SWS_FULL_CHR_H_INT; c->flags = flags; } @@ -948,6 +998,8 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, c->chrDstW = -((-dstW) >> c->chrDstHSubSample); c->chrDstH = -((-dstH) >> c->chrDstVSubSample); + FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW*2+78, 16) * 2, fail); + /* unscaled special cases */ if (unscaled && !usesHFilter && !usesVFilter && (c->srcRange == c->dstRange || isAnyRGB(dstFormat))) { @@ -957,7 +1009,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, if (flags & SWS_PRINT_INFO) av_log(c, AV_LOG_INFO, "using unscaled %s -> %s special converter\n", - sws_format_name(srcFormat), sws_format_name(dstFormat)); + av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat)); return 0; } } @@ -968,13 +1020,12 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, c->dstBpc = 1 + av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1; if (c->dstBpc < 8) c->dstBpc = 8; + if (isAnyRGB(srcFormat) || srcFormat == PIX_FMT_PAL8) + c->srcBpc = 16; if (c->dstBpc == 16) dst_stride <<= 1; - FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, - (FFALIGN(srcW, 16) * 2 * FFALIGN(c->srcBpc, 8) >> 3) + 16, - fail); if (HAVE_MMXEXT && HAVE_INLINE_ASM && cpu_flags & AV_CPU_FLAG_MMXEXT && - c->srcBpc == 8 && c->dstBpc <= 10) { + c->srcBpc == 8 && c->dstBpc <= 14) { c->canMMX2BeUsed = (dstW >= srcW && (dstW & 31) == 0 && (srcW & 15) == 0) ? 1 : 0; if (!c->canMMX2BeUsed && dstW >= srcW && (srcW & 15) == 0 @@ -983,8 +1034,8 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, av_log(c, AV_LOG_INFO, "output width is not a multiple of 32 -> no MMX2 scaler\n"); } - if (usesHFilter) - c->canMMX2BeUsed = 0; + if (usesHFilter || isNBPS(c->srcFormat) || is16BPS(c->srcFormat) || isAnyRGB(c->srcFormat)) + c->canMMX2BeUsed=0; } else c->canMMX2BeUsed = 0; @@ -1004,7 +1055,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, c->chrXInc += 20; } // we don't use the x86 asm scaler if MMX is available - else if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) { + else if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX && c->dstBpc <= 14) { c->lumXInc = ((int64_t)(srcW - 2) << 16) / (dstW - 2) - 20; c->chrXInc = ((int64_t)(c->chrSrcW - 2) << 16) / (c->chrDstW - 2) - 20; } @@ -1031,17 +1082,25 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, c->chrMmx2FilterCode = av_malloc(c->chrMmx2FilterCodeSize); #endif +#ifdef MAP_ANONYMOUS + if (c->lumMmx2FilterCode == MAP_FAILED || c->chrMmx2FilterCode == MAP_FAILED) +#else if (!c->lumMmx2FilterCode || !c->chrMmx2FilterCode) +#endif + { + av_log(c, AV_LOG_ERROR, "Failed to allocate MMX2FilterCode\n"); return AVERROR(ENOMEM); + } + FF_ALLOCZ_OR_GOTO(c, c->hLumFilter, (dstW / 8 + 8) * sizeof(int16_t), fail); FF_ALLOCZ_OR_GOTO(c, c->hChrFilter, (c->chrDstW / 4 + 8) * sizeof(int16_t), fail); FF_ALLOCZ_OR_GOTO(c, c->hLumFilterPos, (dstW / 2 / 8 + 8) * sizeof(int32_t), fail); FF_ALLOCZ_OR_GOTO(c, c->hChrFilterPos, (c->chrDstW / 2 / 4 + 8) * sizeof(int32_t), fail); - initMMX2HScaler(dstW, c->lumXInc, c->lumMmx2FilterCode, - c->hLumFilter, c->hLumFilterPos, 8); + initMMX2HScaler( dstW, c->lumXInc, c->lumMmx2FilterCode, + c->hLumFilter, (uint32_t*)c->hLumFilterPos, 8); initMMX2HScaler(c->chrDstW, c->chrXInc, c->chrMmx2FilterCode, - c->hChrFilter, c->hChrFilterPos, 4); + c->hChrFilter, (uint32_t*)c->hChrFilterPos, 4); #ifdef MAP_ANONYMOUS mprotect(c->lumMmx2FilterCode, c->lumMmx2FilterCodeSize, PROT_EXEC | PROT_READ); @@ -1060,14 +1119,14 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, srcW, dstW, filterAlign, 1 << 14, (flags & SWS_BICUBLIN) ? (flags | SWS_BICUBIC) : flags, cpu_flags, srcFilter->lumH, dstFilter->lumH, - c->param, 1) < 0) + c->param) < 0) goto fail; if (initFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc, c->chrSrcW, c->chrDstW, filterAlign, 1 << 14, (flags & SWS_BICUBLIN) ? (flags | SWS_BILINEAR) : flags, cpu_flags, srcFilter->chrH, dstFilter->chrH, - c->param, 1) < 0) + c->param) < 0) goto fail; } } // initialize horizontal stuff @@ -1083,14 +1142,14 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, c->lumYInc, srcH, dstH, filterAlign, (1 << 12), (flags & SWS_BICUBLIN) ? (flags | SWS_BICUBIC) : flags, cpu_flags, srcFilter->lumV, dstFilter->lumV, - c->param, 0) < 0) + c->param) < 0) goto fail; if (initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc, c->chrSrcH, c->chrDstH, filterAlign, (1 << 12), (flags & SWS_BICUBLIN) ? (flags | SWS_BILINEAR) : flags, cpu_flags, srcFilter->chrV, dstFilter->chrV, - c->param, 0) < 0) + c->param) < 0) goto fail; #if HAVE_ALTIVEC @@ -1147,9 +1206,9 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, dst_stride + 16, fail); c->lumPixBuf[i] = c->lumPixBuf[i + c->vLumBufSize]; } - // 64 / (c->dstBpc & ~7) is the same as 16 / sizeof(scaling_intermediate) - c->uv_off_px = dst_stride_px + 64 / (c->dstBpc & ~7); - c->uv_off_byte = dst_stride + 16; + // 64 / c->scalingBpp is the same as 16 / sizeof(scaling_intermediate) + c->uv_off = (dst_stride>>1) + 64 / (c->dstBpc &~ 7); + c->uv_offx2 = dst_stride + 16; for (i = 0; i < c->vChrBufSize; i++) { FF_ALLOC_OR_GOTO(c, c->chrUPixBuf[i + c->vChrBufSize], dst_stride * 2 + 32, fail); @@ -1166,9 +1225,15 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, // try to avoid drawing green stuff between the right end and the stride end for (i = 0; i < c->vChrBufSize; i++) - memset(c->chrUPixBuf[i], 64, dst_stride * 2 + 1); + if(av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 == 15){ + av_assert0(c->dstBpc > 14); + for(j=0; j<dst_stride/2+1; j++) + ((int32_t*)(c->chrUPixBuf[i]))[j] = 1<<18; + } else + for(j=0; j<dst_stride+1; j++) + ((int16_t*)(c->chrUPixBuf[i]))[j] = 1<<14; - assert(c->chrDstH <= dstH); + av_assert0(c->chrDstH <= dstH); if (flags & SWS_PRINT_INFO) { if (flags & SWS_FAST_BILINEAR) @@ -1197,7 +1262,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, av_log(c, AV_LOG_INFO, "ehh flags invalid?! "); av_log(c, AV_LOG_INFO, "from %s to %s%s ", - sws_format_name(srcFormat), + av_get_pix_fmt_name(srcFormat), #ifdef DITHER1XBPP dstFormat == PIX_FMT_BGR555 || dstFormat == PIX_FMT_BGR565 || dstFormat == PIX_FMT_RGB444BE || dstFormat == PIX_FMT_RGB444LE || @@ -1206,7 +1271,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, #else "", #endif - sws_format_name(dstFormat)); + av_get_pix_fmt_name(dstFormat)); if (HAVE_MMXEXT && cpu_flags & AV_CPU_FLAG_MMXEXT) av_log(c, AV_LOG_INFO, "using MMX2\n"); @@ -1253,6 +1318,8 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, c->dstH = dstH; c->srcRange = handle_jpeg(&srcFormat); c->dstRange = handle_jpeg(&dstFormat); + c->src0Alpha = handle_0alpha(&srcFormat); + c->dst0Alpha = handle_0alpha(&dstFormat); c->srcFormat = srcFormat; c->dstFormat = dstFormat; @@ -1641,7 +1708,7 @@ void sws_freeContext(SwsContext *c) #endif /* HAVE_MMX */ av_freep(&c->yuvTable); - av_free(c->formatConvBuffer); + av_freep(&c->formatConvBuffer); av_free(c); } @@ -1680,10 +1747,12 @@ struct SwsContext *sws_getCachedContext(struct SwsContext *context, int srcW, context->srcW = srcW; context->srcH = srcH; context->srcRange = handle_jpeg(&srcFormat); + context->src0Alpha = handle_0alpha(&srcFormat); context->srcFormat = srcFormat; context->dstW = dstW; context->dstH = dstH; context->dstRange = handle_jpeg(&dstFormat); + context->dst0Alpha = handle_0alpha(&dstFormat); context->dstFormat = dstFormat; context->flags = flags; context->param[0] = param[0]; diff --git a/libswscale/version.h b/libswscale/version.h index acbdf6b012..37dcc96572 100644 --- a/libswscale/version.h +++ b/libswscale/version.h @@ -1,18 +1,18 @@ /* - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -28,7 +28,7 @@ #define LIBSWSCALE_VERSION_MAJOR 2 #define LIBSWSCALE_VERSION_MINOR 1 -#define LIBSWSCALE_VERSION_MICRO 1 +#define LIBSWSCALE_VERSION_MICRO 101 #define LIBSWSCALE_VERSION_INT AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \ LIBSWSCALE_VERSION_MINOR, \ @@ -52,5 +52,8 @@ #ifndef FF_API_SWS_CPU_CAPS #define FF_API_SWS_CPU_CAPS (LIBSWSCALE_VERSION_MAJOR < 3) #endif +#ifndef FF_API_SWS_FORMAT_NAME +#define FF_API_SWS_FORMAT_NAME (LIBSWSCALE_VERSION_MAJOR < 3) +#endif #endif /* SWSCALE_VERSION_H */ diff --git a/libswscale/x86/Makefile b/libswscale/x86/Makefile index 5416d48a4c..7d219b458b 100644 --- a/libswscale/x86/Makefile +++ b/libswscale/x86/Makefile @@ -1,3 +1,5 @@ +$(SUBDIR)x86/swscale_mmx.o: CFLAGS += $(NOREDZONE_FLAGS) + OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o MMX-OBJS += x86/rgb2rgb.o \ diff --git a/libswscale/x86/input.asm b/libswscale/x86/input.asm index 66d88458bb..c4174ee65e 100644 --- a/libswscale/x86/input.asm +++ b/libswscale/x86/input.asm @@ -36,8 +36,8 @@ SECTION_RODATA %define GV 0xD0E3 %define BV 0xF6E4 -rgb_Yrnd: times 4 dd 0x84000 ; 16.5 << 15 -rgb_UVrnd: times 4 dd 0x404000 ; 128.5 << 15 +rgb_Yrnd: times 4 dd 0x80100 ; 16.5 << 15 +rgb_UVrnd: times 4 dd 0x400100 ; 128.5 << 15 bgr_Ycoeff_12x4: times 2 dw BY, GY, 0, BY bgr_Ycoeff_3x56: times 2 dw RY, 0, GY, RY rgb_Ycoeff_12x4: times 2 dw RY, GY, 0, RY @@ -83,7 +83,7 @@ SECTION .text ; %1 = nr. of XMM registers ; %2 = rgb or bgr %macro RGB24_TO_Y_FN 2-3 -cglobal %2 %+ 24ToY, 3, 3, %1, dst, src, w +cglobal %2 %+ 24ToY, 6, 6, %1, dst, src, u1, u2, w, u3 %if mmsize == 8 mova m5, [%2_Ycoeff_12x4] mova m6, [%2_Ycoeff_3x56] @@ -115,6 +115,7 @@ cglobal %2 %+ 24ToY, 3, 3, %1, dst, src, w %if ARCH_X86_64 movsxd wq, wd %endif + add wq, wq add dstq, wq neg wq %if notcpuflag(ssse3) @@ -158,12 +159,11 @@ cglobal %2 %+ 24ToY, 3, 3, %1, dst, src, w paddd m2, m3 ; (dword) { Bx*BY + Gx*GY + Rx*RY }[4-7] paddd m0, m4 ; += rgb_Yrnd, i.e. (dword) { Y[0-3] } paddd m2, m4 ; += rgb_Yrnd, i.e. (dword) { Y[4-7] } - psrad m0, 15 - psrad m2, 15 + psrad m0, 9 + psrad m2, 9 packssdw m0, m2 ; (word) { Y[0-7] } - packuswb m0, m0 ; (byte) { Y[0-7] } - movh [dstq+wq], m0 - add wq, mmsize / 2 + mova [dstq+wq], m0 + add wq, mmsize jl .loop REP_RET %endif ; (ARCH_X86_64 && %0 == 3) || mmsize == 8 @@ -172,7 +172,7 @@ cglobal %2 %+ 24ToY, 3, 3, %1, dst, src, w ; %1 = nr. of XMM registers ; %2 = rgb or bgr %macro RGB24_TO_UV_FN 2-3 -cglobal %2 %+ 24ToUV, 3, 4, %1, dstU, dstV, src, w +cglobal %2 %+ 24ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, u3 %if ARCH_X86_64 mova m8, [%2_Ucoeff_12x4] mova m9, [%2_Ucoeff_3x56] @@ -203,10 +203,11 @@ cglobal %2 %+ 24ToUV, 3, 4, %1, dstU, dstV, src, w %endif ; x86-32/64 %endif ; cpuflag(ssse3) %if ARCH_X86_64 - movsxd wq, dword r4m + movsxd wq, dword r5m %else ; x86-32 - mov wq, r4m + mov wq, r5m %endif + add wq, wq add dstUq, wq add dstVq, wq neg wq @@ -264,23 +265,20 @@ cglobal %2 %+ 24ToUV, 3, 4, %1, dstU, dstV, src, w paddd m2, m6 ; += rgb_UVrnd, i.e. (dword) { V[0-3] } paddd m1, m6 ; += rgb_UVrnd, i.e. (dword) { U[4-7] } paddd m4, m6 ; += rgb_UVrnd, i.e. (dword) { V[4-7] } - psrad m0, 15 - psrad m2, 15 - psrad m1, 15 - psrad m4, 15 + psrad m0, 9 + psrad m2, 9 + psrad m1, 9 + psrad m4, 9 packssdw m0, m1 ; (word) { U[0-7] } packssdw m2, m4 ; (word) { V[0-7] } %if mmsize == 8 - packuswb m0, m0 ; (byte) { U[0-3] } - packuswb m2, m2 ; (byte) { V[0-3] } - movh [dstUq+wq], m0 - movh [dstVq+wq], m2 + mova [dstUq+wq], m0 + mova [dstVq+wq], m2 %else ; mmsize == 16 - packuswb m0, m2 ; (byte) { U[0-7], V[0-7] } - movh [dstUq+wq], m0 - movhps [dstVq+wq], m0 + mova [dstUq+wq], m0 + mova [dstVq+wq], m2 %endif ; mmsize == 8/16 - add wq, mmsize / 2 + add wq, mmsize jl .loop REP_RET %endif ; ARCH_X86_64 && %0 == 3 @@ -306,13 +304,15 @@ RGB24_FUNCS 10, 12 INIT_XMM ssse3 RGB24_FUNCS 11, 13 +%if HAVE_AVX INIT_XMM avx RGB24_FUNCS 11, 13 +%endif ; %1 = nr. of XMM registers ; %2-5 = rgba, bgra, argb or abgr (in individual characters) %macro RGB32_TO_Y_FN 5-6 -cglobal %2%3%4%5 %+ ToY, 3, 3, %1, dst, src, w +cglobal %2%3%4%5 %+ ToY, 6, 6, %1, dst, src, u1, u2, w, u3 mova m5, [rgba_Ycoeff_%2%4] mova m6, [rgba_Ycoeff_%3%5] %if %0 == 6 @@ -323,6 +323,7 @@ cglobal %2%3%4%5 %+ ToY, 3, 3, %1, dst, src, w movsxd wq, wd %endif lea srcq, [srcq+wq*4] + add wq, wq add dstq, wq neg wq mova m4, [rgb_Yrnd] @@ -330,8 +331,8 @@ cglobal %2%3%4%5 %+ ToY, 3, 3, %1, dst, src, w psrlw m7, 8 ; (word) { 0x00ff } x4 .loop: ; FIXME check alignment and use mova - movu m0, [srcq+wq*4+0] ; (byte) { Bx, Gx, Rx, xx }[0-3] - movu m2, [srcq+wq*4+mmsize] ; (byte) { Bx, Gx, Rx, xx }[4-7] + movu m0, [srcq+wq*2+0] ; (byte) { Bx, Gx, Rx, xx }[0-3] + movu m2, [srcq+wq*2+mmsize] ; (byte) { Bx, Gx, Rx, xx }[4-7] DEINTB 1, 0, 3, 2, 7 ; (word) { Gx, xx (m0/m2) or Bx, Rx (m1/m3) }[0-3]/[4-7] pmaddwd m1, m5 ; (dword) { Bx*BY + Rx*RY }[0-3] pmaddwd m0, m6 ; (dword) { Gx*GY }[0-3] @@ -341,12 +342,11 @@ cglobal %2%3%4%5 %+ ToY, 3, 3, %1, dst, src, w paddd m2, m4 ; += rgb_Yrnd paddd m0, m1 ; (dword) { Y[0-3] } paddd m2, m3 ; (dword) { Y[4-7] } - psrad m0, 15 - psrad m2, 15 + psrad m0, 9 + psrad m2, 9 packssdw m0, m2 ; (word) { Y[0-7] } - packuswb m0, m0 ; (byte) { Y[0-7] } - movh [dstq+wq], m0 - add wq, mmsize / 2 + mova [dstq+wq], m0 + add wq, mmsize jl .loop REP_RET %endif ; %0 == 3 @@ -355,7 +355,7 @@ cglobal %2%3%4%5 %+ ToY, 3, 3, %1, dst, src, w ; %1 = nr. of XMM registers ; %2-5 = rgba, bgra, argb or abgr (in individual characters) %macro RGB32_TO_UV_FN 5-6 -cglobal %2%3%4%5 %+ ToUV, 3, 4, %1, dstU, dstV, src, w +cglobal %2%3%4%5 %+ ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, u3 %if ARCH_X86_64 mova m8, [rgba_Ucoeff_%2%4] mova m9, [rgba_Ucoeff_%3%5] @@ -376,21 +376,22 @@ cglobal %2%3%4%5 %+ ToUV, 3, 4, %1, dstU, dstV, src, w %else ; ARCH_X86_64 && %0 == 6 .body: %if ARCH_X86_64 - movsxd wq, dword r4m + movsxd wq, dword r5m %else ; x86-32 - mov wq, r4m + mov wq, r5m %endif + add wq, wq add dstUq, wq add dstVq, wq - lea srcq, [srcq+wq*4] + lea srcq, [srcq+wq*2] neg wq pcmpeqb m7, m7 psrlw m7, 8 ; (word) { 0x00ff } x4 mova m6, [rgb_UVrnd] .loop: ; FIXME check alignment and use mova - movu m0, [srcq+wq*4+0] ; (byte) { Bx, Gx, Rx, xx }[0-3] - movu m4, [srcq+wq*4+mmsize] ; (byte) { Bx, Gx, Rx, xx }[4-7] + movu m0, [srcq+wq*2+0] ; (byte) { Bx, Gx, Rx, xx }[0-3] + movu m4, [srcq+wq*2+mmsize] ; (byte) { Bx, Gx, Rx, xx }[4-7] DEINTB 1, 0, 5, 4, 7 ; (word) { Gx, xx (m0/m4) or Bx, Rx (m1/m5) }[0-3]/[4-7] pmaddwd m3, m1, coeffV1 ; (dword) { Bx*BV + Rx*RV }[0-3] pmaddwd m2, m0, coeffV2 ; (dword) { Gx*GV }[0-3] @@ -406,25 +407,22 @@ cglobal %2%3%4%5 %+ ToUV, 3, 4, %1, dstU, dstV, src, w pmaddwd m4, coeffU2 ; (dword) { Gx*GU }[4-7] paddd m3, m6 ; += rgb_UVrnd paddd m5, m6 ; += rgb_UVrnd - psrad m0, 15 + psrad m0, 9 paddd m1, m3 ; (dword) { V[4-7] } paddd m4, m5 ; (dword) { U[4-7] } - psrad m2, 15 - psrad m4, 15 - psrad m1, 15 + psrad m2, 9 + psrad m4, 9 + psrad m1, 9 packssdw m0, m4 ; (word) { U[0-7] } packssdw m2, m1 ; (word) { V[0-7] } %if mmsize == 8 - packuswb m0, m0 ; (byte) { U[0-7] } - packuswb m2, m2 ; (byte) { V[0-7] } - movh [dstUq+wq], m0 - movh [dstVq+wq], m2 + mova [dstUq+wq], m0 + mova [dstVq+wq], m2 %else ; mmsize == 16 - packuswb m0, m2 ; (byte) { U[0-7], V[0-7] } - movh [dstUq+wq], m0 - movhps [dstVq+wq], m0 + mova [dstUq+wq], m0 + mova [dstVq+wq], m2 %endif ; mmsize == 8/16 - add wq, mmsize / 2 + add wq, mmsize jl .loop REP_RET %endif ; ARCH_X86_64 && %0 == 3 @@ -452,8 +450,10 @@ RGB32_FUNCS 0, 0 INIT_XMM sse2 RGB32_FUNCS 8, 12 +%if HAVE_AVX INIT_XMM avx RGB32_FUNCS 8, 12 +%endif ;----------------------------------------------------------------------------- ; YUYV/UYVY/NV12/NV21 packed pixel shuffling. @@ -490,7 +490,7 @@ RGB32_FUNCS 8, 12 ; will be the same (i.e. YUYV+AVX), and thus we don't need to ; split the loop in an aligned and unaligned case %macro YUYV_TO_Y_FN 2-3 -cglobal %2ToY, 3, 3, %1, dst, src, w +cglobal %2ToY, 5, 5, %1, dst, unused0, unused1, src, w %if ARCH_X86_64 movsxd wq, wd %endif @@ -560,11 +560,11 @@ cglobal %2ToY, 3, 3, %1, dst, src, w ; will be the same (i.e. UYVY+AVX), and thus we don't need to ; split the loop in an aligned and unaligned case %macro YUYV_TO_UV_FN 2-3 -cglobal %2ToUV, 3, 4, %1, dstU, dstV, src, w +cglobal %2ToUV, 4, 5, %1, dstU, dstV, unused, src, w %if ARCH_X86_64 - movsxd wq, dword r4m + movsxd wq, dword r5m %else ; x86-32 - mov wq, r4m + mov wq, r5m %endif add dstUq, wq add dstVq, wq @@ -594,8 +594,8 @@ cglobal %2ToUV, 3, 4, %1, dstU, dstV, src, w .loop_%1: mov%1 m0, [srcq+wq*2] ; (byte) { U0, V0, U1, V1, ... } mov%1 m1, [srcq+wq*2+mmsize] ; (byte) { U8, V8, U9, V9, ... } - pand m2, m0, m4 ; (word) { U0, U1, ..., U7 } - pand m3, m1, m4 ; (word) { U8, U9, ..., U15 } + pand m2, m0, m5 ; (word) { U0, U1, ..., U7 } + pand m3, m1, m5 ; (word) { U8, U9, ..., U15 } psrlw m0, 8 ; (word) { V0, V1, ..., V7 } psrlw m1, 8 ; (word) { V8, V9, ..., V15 } packuswb m2, m3 ; (byte) { U0, ..., U15 } @@ -615,11 +615,11 @@ cglobal %2ToUV, 3, 4, %1, dstU, dstV, src, w ; %1 = nr. of XMM registers ; %2 = nv12 or nv21 %macro NVXX_TO_UV_FN 2 -cglobal %2ToUV, 3, 4, %1, dstU, dstV, src, w +cglobal %2ToUV, 4, 5, %1, dstU, dstV, unused, src, w %if ARCH_X86_64 - movsxd wq, dword r4m + movsxd wq, dword r5m %else ; x86-32 - mov wq, r4m + mov wq, r5m %endif add dstUq, wq add dstVq, wq @@ -627,8 +627,8 @@ cglobal %2ToUV, 3, 4, %1, dstU, dstV, src, w test srcq, 15 %endif lea srcq, [srcq+wq*2] - pcmpeqb m4, m4 ; (byte) { 0xff } x 16 - psrlw m4, 8 ; (word) { 0x00ff } x 8 + pcmpeqb m5, m5 ; (byte) { 0xff } x 16 + psrlw m5, 8 ; (word) { 0x00ff } x 8 %if mmsize == 16 jnz .loop_u_start neg wq @@ -660,6 +660,7 @@ YUYV_TO_UV_FN 3, uyvy NVXX_TO_UV_FN 5, nv12 NVXX_TO_UV_FN 5, nv21 +%if HAVE_AVX INIT_XMM avx ; in theory, we could write a yuy2-to-y using vpand (i.e. AVX), but ; that's not faster in practice @@ -667,3 +668,4 @@ YUYV_TO_UV_FN 3, yuyv YUYV_TO_UV_FN 3, uyvy, 1 NVXX_TO_UV_FN 5, nv12 NVXX_TO_UV_FN 5, nv21 +%endif diff --git a/libswscale/x86/output.asm b/libswscale/x86/output.asm index 9b0b01253a..01a946f7b3 100644 --- a/libswscale/x86/output.asm +++ b/libswscale/x86/output.asm @@ -267,10 +267,12 @@ yuv2planeX_fn 9, 7, 5 yuv2planeX_fn 10, 7, 5 yuv2planeX_fn 16, 8, 5 +%if HAVE_AVX INIT_XMM avx yuv2planeX_fn 8, 10, 7 yuv2planeX_fn 9, 7, 5 yuv2planeX_fn 10, 7, 5 +%endif ; %1=outout-bpc, %2=alignment (u/a) %macro yuv2plane1_mainloop 2 @@ -405,8 +407,10 @@ yuv2plane1_fn 16, 6, 3 INIT_XMM sse4 yuv2plane1_fn 16, 5, 3 +%if HAVE_AVX INIT_XMM avx yuv2plane1_fn 8, 5, 5 yuv2plane1_fn 9, 5, 3 yuv2plane1_fn 10, 5, 3 yuv2plane1_fn 16, 5, 3 +%endif diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c index f201281fac..24b284eec7 100644 --- a/libswscale/x86/rgb2rgb.c +++ b/libswscale/x86/rgb2rgb.c @@ -6,20 +6,20 @@ * Written by Nick Kurshev. * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -71,6 +71,9 @@ DECLARE_ASM_CONST(8, uint64_t, blue_16mask) = 0x0000001f0000001fULL; DECLARE_ASM_CONST(8, uint64_t, red_15mask) = 0x00007c0000007c00ULL; DECLARE_ASM_CONST(8, uint64_t, green_15mask) = 0x000003e0000003e0ULL; DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL; +DECLARE_ASM_CONST(8, uint64_t, mul15_mid) = 0x4200420042004200ULL; +DECLARE_ASM_CONST(8, uint64_t, mul15_hi) = 0x0210021002100210ULL; +DECLARE_ASM_CONST(8, uint64_t, mul16_mid) = 0x2080208020802080ULL; #define RGB2YUV_SHIFT 8 #define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5)) diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c index 7a641e1814..7e5ffdf8d1 100644 --- a/libswscale/x86/rgb2rgb_template.c +++ b/libswscale/x86/rgb2rgb_template.c @@ -7,20 +7,20 @@ * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) * lot of big-endian byte order fixes by Alex Beregszaszi * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -713,27 +713,6 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_s } } -/* - I use less accurate approximation here by simply left-shifting the input - value and filling the low order bits with zeroes. This method improves PNG - compression but this scheme cannot reproduce white exactly, since it does - not generate an all-ones maximum value; the net effect is to darken the - image slightly. - - The better method should be "left bit replication": - - 4 3 2 1 0 - --------- - 1 1 0 1 1 - - 7 6 5 4 3 2 1 0 - ---------------- - 1 1 0 1 1 1 1 0 - |=======| |===| - | leftmost bits repeated to fill open bits - | - original bits -*/ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size) { const uint16_t *end; @@ -752,9 +731,10 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" - "psllq $3, %%mm0 \n\t" - "psrlq $2, %%mm1 \n\t" - "psrlq $7, %%mm2 \n\t" + "psllq $5, %%mm0 \n\t" + "pmulhw "MANGLE(mul15_mid)", %%mm0 \n\t" + "pmulhw "MANGLE(mul15_mid)", %%mm1 \n\t" + "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t" "movq %%mm0, %%mm3 \n\t" "movq %%mm1, %%mm4 \n\t" "movq %%mm2, %%mm5 \n\t" @@ -782,9 +762,10 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" - "psllq $3, %%mm0 \n\t" - "psrlq $2, %%mm1 \n\t" - "psrlq $7, %%mm2 \n\t" + "psllq $5, %%mm0 \n\t" + "pmulhw "MANGLE(mul15_mid)", %%mm0 \n\t" + "pmulhw "MANGLE(mul15_mid)", %%mm1 \n\t" + "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t" "movq %%mm0, %%mm3 \n\t" "movq %%mm1, %%mm4 \n\t" "movq %%mm2, %%mm5 \n\t" @@ -830,9 +811,9 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr while (s < end) { register uint16_t bgr; bgr = *s++; - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x3E0)>>2; - *d++ = (bgr&0x7C00)>>7; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7); + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); } } @@ -854,9 +835,11 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" - "psllq $3, %%mm0 \n\t" - "psrlq $3, %%mm1 \n\t" - "psrlq $8, %%mm2 \n\t" + "psllq $5, %%mm0 \n\t" + "psrlq $1, %%mm2 \n\t" + "pmulhw "MANGLE(mul15_mid)", %%mm0 \n\t" + "pmulhw "MANGLE(mul16_mid)", %%mm1 \n\t" + "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t" "movq %%mm0, %%mm3 \n\t" "movq %%mm1, %%mm4 \n\t" "movq %%mm2, %%mm5 \n\t" @@ -884,9 +867,11 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" - "psllq $3, %%mm0 \n\t" - "psrlq $3, %%mm1 \n\t" - "psrlq $8, %%mm2 \n\t" + "psllq $5, %%mm0 \n\t" + "psrlq $1, %%mm2 \n\t" + "pmulhw "MANGLE(mul15_mid)", %%mm0 \n\t" + "pmulhw "MANGLE(mul16_mid)", %%mm1 \n\t" + "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t" "movq %%mm0, %%mm3 \n\t" "movq %%mm1, %%mm4 \n\t" "movq %%mm2, %%mm5 \n\t" @@ -931,9 +916,9 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr while (s < end) { register uint16_t bgr; bgr = *s++; - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x7E0)>>3; - *d++ = (bgr&0xF800)>>8; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9); + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); } } @@ -976,11 +961,12 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" - "psllq $3, %%mm0 \n\t" - "psrlq $2, %%mm1 \n\t" - "psrlq $7, %%mm2 \n\t" + "psllq $5, %%mm0 \n\t" + "pmulhw %5, %%mm0 \n\t" + "pmulhw %5, %%mm1 \n\t" + "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t" PACK_RGB32 - ::"r"(d),"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r) + ::"r"(d),"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r) ,"m"(mul15_mid) :"memory"); d += 16; s += 4; @@ -990,9 +976,9 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s while (s < end) { register uint16_t bgr; bgr = *s++; - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x3E0)>>2; - *d++ = (bgr&0x7C00)>>7; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7); + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); *d++ = 255; } } @@ -1017,11 +1003,13 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" - "psllq $3, %%mm0 \n\t" - "psrlq $3, %%mm1 \n\t" - "psrlq $8, %%mm2 \n\t" + "psllq $5, %%mm0 \n\t" + "psrlq $1, %%mm2 \n\t" + "pmulhw %5, %%mm0 \n\t" + "pmulhw "MANGLE(mul16_mid)", %%mm1 \n\t" + "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t" PACK_RGB32 - ::"r"(d),"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r) + ::"r"(d),"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mul15_mid) :"memory"); d += 16; s += 4; @@ -1031,9 +1019,9 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s while (s < end) { register uint16_t bgr; bgr = *s++; - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x7E0)>>3; - *d++ = (bgr&0xF800)>>8; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9); + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); *d++ = 255; } } diff --git a/libswscale/x86/scale.asm b/libswscale/x86/scale.asm index d56e253afa..6282ab2c69 100644 --- a/libswscale/x86/scale.asm +++ b/libswscale/x86/scale.asm @@ -408,11 +408,15 @@ SCALE_FUNC %1, %2, X, X8, 7, %3 SCALE_FUNCS 8, 15, %1 SCALE_FUNCS 9, 15, %2 SCALE_FUNCS 10, 15, %2 +SCALE_FUNCS 12, 15, %2 +SCALE_FUNCS 14, 15, %2 SCALE_FUNCS 16, 15, %3 %endif ; !sse4 SCALE_FUNCS 8, 19, %1 SCALE_FUNCS 9, 19, %2 SCALE_FUNCS 10, 19, %2 +SCALE_FUNCS 12, 19, %2 +SCALE_FUNCS 14, 19, %2 SCALE_FUNCS 16, 19, %3 %endmacro diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c index 9683c0cedd..67cda51ca5 100644 --- a/libswscale/x86/swscale.c +++ b/libswscale/x86/swscale.c @@ -1,20 +1,20 @@ /* - * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> + * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -23,6 +23,7 @@ #include "libswscale/swscale.h" #include "libswscale/swscale_internal.h" #include "libavutil/attributes.h" +#include "libavutil/avassert.h" #include "libavutil/intreadwrite.h" #include "libavutil/x86/asm.h" #include "libavutil/cpu.h" @@ -69,6 +70,7 @@ DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL; DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL; DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL; + //MMX versions #if HAVE_MMX #undef RENAME @@ -116,9 +118,9 @@ void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufI c->greenDither= ff_dither4[dstY&1]; c->redDither= ff_dither8[(dstY+1)&1]; if (dstY < dstH - 2) { - const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; - const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; - const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; + const int16_t **lumSrcPtr= (const int16_t **)(void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; + const int16_t **chrUSrcPtr= (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; + const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; int i; if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) { @@ -202,6 +204,67 @@ void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufI } } +#if HAVE_MMX2 +static void yuv2yuvX_sse3(const int16_t *filter, int filterSize, + const int16_t **src, uint8_t *dest, int dstW, + const uint8_t *dither, int offset) +{ + if(((int)dest) & 15){ + return yuv2yuvX_MMX2(filter, filterSize, src, dest, dstW, dither, offset); + } + if (offset) { + __asm__ volatile("movq (%0), %%xmm3\n\t" + "movdqa %%xmm3, %%xmm4\n\t" + "psrlq $24, %%xmm3\n\t" + "psllq $40, %%xmm4\n\t" + "por %%xmm4, %%xmm3\n\t" + :: "r"(dither) + ); + } else { + __asm__ volatile("movq (%0), %%xmm3\n\t" + :: "r"(dither) + ); + } + __asm__ volatile( + "pxor %%xmm0, %%xmm0\n\t" + "punpcklbw %%xmm0, %%xmm3\n\t" + "psraw $4, %%xmm3\n\t" + "movdqa %%xmm3, %%xmm4\n\t" + "movdqa %%xmm3, %%xmm7\n\t" + "movl %3, %%ecx\n\t" + "mov %0, %%"REG_d" \n\t"\ + "mov (%%"REG_d"), %%"REG_S" \n\t"\ + ".p2align 4 \n\t" /* FIXME Unroll? */\ + "1: \n\t"\ + "movddup 8(%%"REG_d"), %%xmm0 \n\t" /* filterCoeff */\ + "movdqa (%%"REG_S", %%"REG_c", 2), %%xmm2 \n\t" /* srcData */\ + "movdqa 16(%%"REG_S", %%"REG_c", 2), %%xmm5 \n\t" /* srcData */\ + "add $16, %%"REG_d" \n\t"\ + "mov (%%"REG_d"), %%"REG_S" \n\t"\ + "test %%"REG_S", %%"REG_S" \n\t"\ + "pmulhw %%xmm0, %%xmm2 \n\t"\ + "pmulhw %%xmm0, %%xmm5 \n\t"\ + "paddw %%xmm2, %%xmm3 \n\t"\ + "paddw %%xmm5, %%xmm4 \n\t"\ + " jnz 1b \n\t"\ + "psraw $3, %%xmm3 \n\t"\ + "psraw $3, %%xmm4 \n\t"\ + "packuswb %%xmm4, %%xmm3 \n\t" + "movntdq %%xmm3, (%1, %%"REG_c")\n\t" + "add $16, %%"REG_c" \n\t"\ + "cmp %2, %%"REG_c" \n\t"\ + "movdqa %%xmm7, %%xmm3\n\t" + "movdqa %%xmm7, %%xmm4\n\t" + "mov %0, %%"REG_d" \n\t"\ + "mov (%%"REG_d"), %%"REG_S" \n\t"\ + "jb 1b \n\t"\ + :: "g" (filter), + "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset) + : "%"REG_d, "%"REG_S, "%"REG_c + ); +} +#endif + #endif /* HAVE_INLINE_ASM */ #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \ @@ -215,10 +278,14 @@ extern void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( SCALE_FUNC(filter_n, 8, 15, opt); \ SCALE_FUNC(filter_n, 9, 15, opt); \ SCALE_FUNC(filter_n, 10, 15, opt); \ + SCALE_FUNC(filter_n, 12, 15, opt); \ + SCALE_FUNC(filter_n, 14, 15, opt); \ SCALE_FUNC(filter_n, 16, 15, opt); \ SCALE_FUNC(filter_n, 8, 19, opt); \ SCALE_FUNC(filter_n, 9, 19, opt); \ SCALE_FUNC(filter_n, 10, 19, opt); \ + SCALE_FUNC(filter_n, 12, 19, opt); \ + SCALE_FUNC(filter_n, 14, 19, opt); \ SCALE_FUNC(filter_n, 16, 19, opt) #define SCALE_FUNCS_MMX(opt) \ @@ -311,22 +378,33 @@ av_cold void ff_sws_init_swScale_mmx(SwsContext *c) #if HAVE_MMXEXT if (cpu_flags & AV_CPU_FLAG_MMXEXT) sws_init_swScale_MMX2(c); + if (cpu_flags & AV_CPU_FLAG_SSE3){ + if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) + c->yuv2planeX = yuv2yuvX_sse3; + } #endif #endif /* HAVE_INLINE_ASM */ #if HAVE_YASM #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \ if (c->srcBpc == 8) { \ - hscalefn = c->dstBpc <= 10 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \ + hscalefn = c->dstBpc <= 14 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \ ff_hscale8to19_ ## filtersize ## _ ## opt1; \ } else if (c->srcBpc == 9) { \ - hscalefn = c->dstBpc <= 10 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \ + hscalefn = c->dstBpc <= 14 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \ ff_hscale9to19_ ## filtersize ## _ ## opt1; \ } else if (c->srcBpc == 10) { \ - hscalefn = c->dstBpc <= 10 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \ + hscalefn = c->dstBpc <= 14 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \ ff_hscale10to19_ ## filtersize ## _ ## opt1; \ - } else /* c->srcBpc == 16 */ { \ - hscalefn = c->dstBpc <= 10 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \ + } else if (c->srcBpc == 12) { \ + hscalefn = c->dstBpc <= 14 ? ff_hscale12to15_ ## filtersize ## _ ## opt2 : \ + ff_hscale12to19_ ## filtersize ## _ ## opt1; \ + } else if (c->srcBpc == 14 || ((c->srcFormat==PIX_FMT_PAL8||isAnyRGB(c->srcFormat)) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)) { \ + hscalefn = c->dstBpc <= 14 ? ff_hscale14to15_ ## filtersize ## _ ## opt2 : \ + ff_hscale14to19_ ## filtersize ## _ ## opt1; \ + } else { /* c->srcBpc == 16 */ \ + av_assert0(c->srcBpc == 16);\ + hscalefn = c->dstBpc <= 14 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \ ff_hscale16to19_ ## filtersize ## _ ## opt1; \ } \ } while (0) @@ -341,14 +419,15 @@ switch(c->dstBpc){ \ case 16: do_16_case; break; \ case 10: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_10_ ## opt; break; \ case 9: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \ - default: if (condition_8bit) vscalefn = ff_yuv2planeX_8_ ## opt; break; \ + default: if (condition_8bit) /*vscalefn = ff_yuv2planeX_8_ ## opt;*/ break; \ } #define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk) \ switch(c->dstBpc){ \ case 16: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2plane1_16_ ## opt1; break; \ case 10: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_10_ ## opt2; break; \ case 9: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_9_ ## opt2; break; \ - default: vscalefn = ff_yuv2plane1_8_ ## opt1; break; \ + case 8: vscalefn = ff_yuv2plane1_8_ ## opt1; break; \ + default: av_assert0(c->dstBpc>8); \ } #define case_rgb(x, X, opt) \ case PIX_FMT_ ## X: \ @@ -462,7 +541,7 @@ switch(c->dstBpc){ \ c->yuv2plane1 = ff_yuv2plane1_16_sse4; } - if (cpu_flags & AV_CPU_FLAG_AVX) { + if (HAVE_AVX && cpu_flags & AV_CPU_FLAG_AVX) { ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, , HAVE_ALIGNED_STACK || ARCH_X86_64); ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1); diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c index e9816cf0a6..370a0ebe1b 100644 --- a/libswscale/x86/swscale_template.c +++ b/libswscale/x86/swscale_template.c @@ -1,25 +1,26 @@ /* - * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> + * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #undef REAL_MOVNTQ #undef MOVNTQ +#undef MOVNTQ2 #undef PREFETCH #if COMPILE_TEMPLATE_MMXEXT @@ -30,11 +31,84 @@ #if COMPILE_TEMPLATE_MMXEXT #define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t" +#define MOVNTQ2 "movntq " #else #define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t" +#define MOVNTQ2 "movq " #endif #define MOVNTQ(a,b) REAL_MOVNTQ(a,b) +#if !COMPILE_TEMPLATE_MMXEXT +static av_always_inline void +dither_8to16(const uint8_t *srcDither, int rot) +{ + if (rot) { + __asm__ volatile("pxor %%mm0, %%mm0\n\t" + "movq (%0), %%mm3\n\t" + "movq %%mm3, %%mm4\n\t" + "psrlq $24, %%mm3\n\t" + "psllq $40, %%mm4\n\t" + "por %%mm4, %%mm3\n\t" + "movq %%mm3, %%mm4\n\t" + "punpcklbw %%mm0, %%mm3\n\t" + "punpckhbw %%mm0, %%mm4\n\t" + :: "r"(srcDither) + ); + } else { + __asm__ volatile("pxor %%mm0, %%mm0\n\t" + "movq (%0), %%mm3\n\t" + "movq %%mm3, %%mm4\n\t" + "punpcklbw %%mm0, %%mm3\n\t" + "punpckhbw %%mm0, %%mm4\n\t" + :: "r"(srcDither) + ); + } +} +#endif + +static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize, + const int16_t **src, uint8_t *dest, int dstW, + const uint8_t *dither, int offset) +{ + dither_8to16(dither, offset); + __asm__ volatile(\ + "psraw $4, %%mm3\n\t" + "psraw $4, %%mm4\n\t" + "movq %%mm3, %%mm6\n\t" + "movq %%mm4, %%mm7\n\t" + "movl %3, %%ecx\n\t" + "mov %0, %%"REG_d" \n\t"\ + "mov (%%"REG_d"), %%"REG_S" \n\t"\ + ".p2align 4 \n\t" /* FIXME Unroll? */\ + "1: \n\t"\ + "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\ + "movq (%%"REG_S", %%"REG_c", 2), %%mm2 \n\t" /* srcData */\ + "movq 8(%%"REG_S", %%"REG_c", 2), %%mm5 \n\t" /* srcData */\ + "add $16, %%"REG_d" \n\t"\ + "mov (%%"REG_d"), %%"REG_S" \n\t"\ + "test %%"REG_S", %%"REG_S" \n\t"\ + "pmulhw %%mm0, %%mm2 \n\t"\ + "pmulhw %%mm0, %%mm5 \n\t"\ + "paddw %%mm2, %%mm3 \n\t"\ + "paddw %%mm5, %%mm4 \n\t"\ + " jnz 1b \n\t"\ + "psraw $3, %%mm3 \n\t"\ + "psraw $3, %%mm4 \n\t"\ + "packuswb %%mm4, %%mm3 \n\t" + MOVNTQ2 " %%mm3, (%1, %%"REG_c")\n\t" + "add $8, %%"REG_c" \n\t"\ + "cmp %2, %%"REG_c" \n\t"\ + "movq %%mm6, %%mm3\n\t" + "movq %%mm7, %%mm4\n\t" + "mov %0, %%"REG_d" \n\t"\ + "mov (%%"REG_d"), %%"REG_S" \n\t"\ + "jb 1b \n\t"\ + :: "g" (filter), + "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset) + : "%"REG_d, "%"REG_S, "%"REG_c + ); +} + #define YSCALEYUV2PACKEDX_UV \ __asm__ volatile(\ "xor %%"REG_a", %%"REG_a" \n\t"\ @@ -260,7 +334,7 @@ static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter, { x86_reg dummy=0; x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_off_byte; + x86_reg uv_off = c->uv_offx2; if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { YSCALEYUV2PACKEDX_ACCURATE @@ -293,7 +367,7 @@ static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter, { x86_reg dummy=0; x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_off_byte; + x86_reg uv_off = c->uv_offx2; if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { YSCALEYUV2PACKEDX @@ -350,7 +424,7 @@ static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter, { x86_reg dummy=0; x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_off_byte; + x86_reg uv_off = c->uv_offx2; YSCALEYUV2PACKEDX_ACCURATE YSCALEYUV2RGBX @@ -374,7 +448,7 @@ static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter, { x86_reg dummy=0; x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_off_byte; + x86_reg uv_off = c->uv_offx2; YSCALEYUV2PACKEDX YSCALEYUV2RGBX @@ -427,7 +501,7 @@ static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter, { x86_reg dummy=0; x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_off_byte; + x86_reg uv_off = c->uv_offx2; YSCALEYUV2PACKEDX_ACCURATE YSCALEYUV2RGBX @@ -451,7 +525,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter, { x86_reg dummy=0; x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_off_byte; + x86_reg uv_off = c->uv_offx2; YSCALEYUV2PACKEDX YSCALEYUV2RGBX @@ -584,7 +658,7 @@ static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter, { x86_reg dummy=0; x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_off_byte; + x86_reg uv_off = c->uv_offx2; YSCALEYUV2PACKEDX_ACCURATE YSCALEYUV2RGBX @@ -608,7 +682,7 @@ static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter, { x86_reg dummy=0; x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_off_byte; + x86_reg uv_off = c->uv_offx2; YSCALEYUV2PACKEDX YSCALEYUV2RGBX @@ -649,7 +723,7 @@ static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter, { x86_reg dummy=0; x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_off_byte; + x86_reg uv_off = c->uv_offx2; YSCALEYUV2PACKEDX_ACCURATE /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ @@ -670,7 +744,7 @@ static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter, { x86_reg dummy=0; x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_off_byte; + x86_reg uv_off = c->uv_offx2; YSCALEYUV2PACKEDX /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ @@ -786,8 +860,8 @@ static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2], : "%r8" ); #else - *(const uint16_t **)(&c->u_temp)=abuf0; - *(const uint16_t **)(&c->v_temp)=abuf1; + c->u_temp=(intptr_t)abuf0; + c->v_temp=(intptr_t)abuf1; __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" @@ -1559,9 +1633,9 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c) { enum PixelFormat dstFormat = c->dstFormat; - if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) && - dstFormat != PIX_FMT_NV12 && dstFormat != PIX_FMT_NV21) { - if (!(c->flags & SWS_BITEXACT)) { + c->use_mmx_vfilter= 0; + if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) && dstFormat != PIX_FMT_NV12 + && dstFormat != PIX_FMT_NV21 && !(c->flags & SWS_BITEXACT)) { if (c->flags & SWS_ACCURATE_RND) { if (!(c->flags & SWS_FULL_CHR_H_INT)) { switch (c->dstFormat) { @@ -1574,6 +1648,8 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c) } } } else { + c->use_mmx_vfilter= 1; + c->yuv2planeX = RENAME(yuv2yuvX ); if (!(c->flags & SWS_FULL_CHR_H_INT)) { switch (c->dstFormat) { case PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X); break; @@ -1585,7 +1661,6 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c) } } } - } if (!(c->flags & SWS_FULL_CHR_H_INT)) { switch (c->dstFormat) { case PIX_FMT_RGB32: @@ -1614,7 +1689,7 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c) } } - if (c->srcBpc == 8 && c->dstBpc <= 10) { + if (c->srcBpc == 8 && c->dstBpc <= 14) { // Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one). #if COMPILE_TEMPLATE_MMXEXT if (c->flags & SWS_FAST_BILINEAR && c->canMMX2BeUsed) diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c index 93755493ab..9445d08e84 100644 --- a/libswscale/x86/yuv2rgb.c +++ b/libswscale/x86/yuv2rgb.c @@ -7,20 +7,20 @@ * 1,4,8bpp support and context / deglobalize stuff * by Michael Niedermayer (michaelni@gmx.at) * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -74,10 +74,6 @@ av_cold SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c) #if HAVE_INLINE_ASM int cpu_flags = av_get_cpu_flags(); - if (c->srcFormat != PIX_FMT_YUV420P && - c->srcFormat != PIX_FMT_YUVA420P) - return NULL; - #if HAVE_MMXEXT if (cpu_flags & AV_CPU_FLAG_MMXEXT) { switch (c->dstFormat) { diff --git a/libswscale/x86/yuv2rgb_template.c b/libswscale/x86/yuv2rgb_template.c index a71fd13862..79e48fd7f0 100644 --- a/libswscale/x86/yuv2rgb_template.c +++ b/libswscale/x86/yuv2rgb_template.c @@ -4,20 +4,20 @@ * Copyright (C) 2001-2007 Michael Niedermayer * (c) 2010 Konstantin Shishkov * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -43,17 +43,14 @@ if (h_size * depth > FFABS(dstStride[0])) \ h_size -= 8; \ \ - if (c->srcFormat == PIX_FMT_YUV422P) { \ - srcStride[1] *= 2; \ - srcStride[2] *= 2; \ - } \ + vshift = c->srcFormat != PIX_FMT_YUV422P; \ \ __asm__ volatile ("pxor %mm4, %mm4\n\t"); \ for (y = 0; y < srcSliceH; y++) { \ uint8_t *image = dst[0] + (y + srcSliceY) * dstStride[0]; \ const uint8_t *py = src[0] + y * srcStride[0]; \ - const uint8_t *pu = src[1] + (y >> 1) * srcStride[1]; \ - const uint8_t *pv = src[2] + (y >> 1) * srcStride[2]; \ + const uint8_t *pu = src[1] + (y >> vshift) * srcStride[1]; \ + const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; \ x86_reg index = -h_size / 2; \ #define YUV2RGB_INITIAL_LOAD \ @@ -141,6 +138,7 @@ : "+r" (index), "+r" (image) \ : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \ "r" (py - 2*index) \ + : "memory" \ ); \ } \ @@ -148,6 +146,7 @@ : "+r" (index), "+r" (image) \ : "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \ "r" (py - 2*index), "r" (pa - 2*index) \ + : "memory" \ ); \ } \ @@ -188,7 +187,7 @@ static inline int RENAME(yuv420_rgb15)(SwsContext *c, const uint8_t *src[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(2) @@ -216,7 +215,7 @@ static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(2) @@ -306,7 +305,7 @@ static inline int RENAME(yuv420_rgb24)(SwsContext *c, const uint8_t *src[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(3) @@ -324,7 +323,7 @@ static inline int RENAME(yuv420_bgr24)(SwsContext *c, const uint8_t *src[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(3) @@ -368,7 +367,7 @@ static inline int RENAME(yuv420_rgb32)(SwsContext *c, const uint8_t *src[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(4) @@ -389,7 +388,7 @@ static inline int RENAME(yuva420_rgb32)(SwsContext *c, const uint8_t *src[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(4) @@ -411,7 +410,7 @@ static inline int RENAME(yuv420_bgr32)(SwsContext *c, const uint8_t *src[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(4) @@ -432,7 +431,7 @@ static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t *src[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(4) diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c index 1c44a2f544..67cf19ec03 100644 --- a/libswscale/yuv2rgb.c +++ b/libswscale/yuv2rgb.c @@ -6,27 +6,26 @@ * 1,4,8bpp support and context / deglobalize stuff * by Michael Niedermayer (michaelni@gmx.at) * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include <stdio.h> #include <stdlib.h> #include <inttypes.h> -#include <assert.h> #include "libavutil/cpu.h" #include "libavutil/bswap.h" @@ -34,7 +33,10 @@ #include "rgb2rgb.h" #include "swscale.h" #include "swscale_internal.h" +#include "libavutil/pixdesc.h" +extern const uint8_t dither_2x2_4[2][8]; +extern const uint8_t dither_2x2_8[2][8]; extern const uint8_t dither_4x4_16[4][8]; extern const uint8_t dither_8x8_32[8][8]; extern const uint8_t dither_8x8_73[8][8]; @@ -61,9 +63,9 @@ const int *sws_getCoefficients(int colorspace) #define LOADCHROMA(i) \ U = pu[i]; \ V = pv[i]; \ - r = (void *)c->table_rV[V]; \ - g = (void *)(c->table_gU[U] + c->table_gV[V]); \ - b = (void *)c->table_bU[U]; + r = (void *)c->table_rV[V+YUVRGB_TABLE_HEADROOM]; \ + g = (void *)(c->table_gU[U+YUVRGB_TABLE_HEADROOM] + c->table_gV[V+YUVRGB_TABLE_HEADROOM]); \ + b = (void *)c->table_bU[U+YUVRGB_TABLE_HEADROOM]; #define PUTRGB(dst, src, i) \ Y = src[2 * i]; \ @@ -355,24 +357,65 @@ ENDYUV2RGBLINE(24) PUTBGR24(dst_1, py_1, 1); ENDYUV2RGBFUNC() -// This is exactly the same code as yuv2rgb_c_32 except for the types of -// r, g, b, dst_1, dst_2 -YUV2RGBFUNC(yuv2rgb_c_16, uint16_t, 0) +YUV2RGBFUNC(yuv2rgb_c_16_ordered_dither, uint16_t, 0) + const uint8_t *d16 = dither_2x2_8[y & 1]; + const uint8_t *e16 = dither_2x2_4[y & 1]; + const uint8_t *f16 = dither_2x2_8[(y & 1)^1]; + +#define PUTRGB16(dst, src, i, o) \ + Y = src[2 * i]; \ + dst[2 * i] = r[Y + d16[0 + o]] + \ + g[Y + e16[0 + o]] + \ + b[Y + f16[0 + o]]; \ + Y = src[2 * i + 1]; \ + dst[2 * i + 1] = r[Y + d16[1 + o]] + \ + g[Y + e16[1 + o]] + \ + b[Y + f16[1 + o]]; LOADCHROMA(0); - PUTRGB(dst_1, py_1, 0); - PUTRGB(dst_2, py_2, 0); + PUTRGB16(dst_1, py_1, 0, 0); + PUTRGB16(dst_2, py_2, 0, 0 + 8); LOADCHROMA(1); - PUTRGB(dst_2, py_2, 1); - PUTRGB(dst_1, py_1, 1); + PUTRGB16(dst_2, py_2, 1, 2 + 8); + PUTRGB16(dst_1, py_1, 1, 2); LOADCHROMA(2); - PUTRGB(dst_1, py_1, 2); - PUTRGB(dst_2, py_2, 2); + PUTRGB16(dst_1, py_1, 2, 4); + PUTRGB16(dst_2, py_2, 2, 4 + 8); LOADCHROMA(3); - PUTRGB(dst_2, py_2, 3); - PUTRGB(dst_1, py_1, 3); + PUTRGB16(dst_2, py_2, 3, 6 + 8); + PUTRGB16(dst_1, py_1, 3, 6); +CLOSEYUV2RGBFUNC(8) + +YUV2RGBFUNC(yuv2rgb_c_15_ordered_dither, uint16_t, 0) + const uint8_t *d16 = dither_2x2_8[y & 1]; + const uint8_t *e16 = dither_2x2_8[(y & 1)^1]; + +#define PUTRGB15(dst, src, i, o) \ + Y = src[2 * i]; \ + dst[2 * i] = r[Y + d16[0 + o]] + \ + g[Y + d16[1 + o]] + \ + b[Y + e16[0 + o]]; \ + Y = src[2 * i + 1]; \ + dst[2 * i + 1] = r[Y + d16[1 + o]] + \ + g[Y + d16[0 + o]] + \ + b[Y + e16[1 + o]]; + LOADCHROMA(0); + PUTRGB15(dst_1, py_1, 0, 0); + PUTRGB15(dst_2, py_2, 0, 0 + 8); + + LOADCHROMA(1); + PUTRGB15(dst_2, py_2, 1, 2 + 8); + PUTRGB15(dst_1, py_1, 1, 2); + + LOADCHROMA(2); + PUTRGB15(dst_1, py_1, 2, 4); + PUTRGB15(dst_2, py_2, 2, 4 + 8); + + LOADCHROMA(3); + PUTRGB15(dst_2, py_2, 3, 6 + 8); + PUTRGB15(dst_1, py_1, 3, 6); CLOSEYUV2RGBFUNC(8) // r, g, b, dst_1, dst_2 @@ -505,7 +548,7 @@ CLOSEYUV2RGBFUNC(8) YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t, 0) const uint8_t *d128 = dither_8x8_220[y & 7]; char out_1 = 0, out_2 = 0; - g = c->table_gU[128] + c->table_gV[128]; + g = c->table_gU[128 + YUVRGB_TABLE_HEADROOM] + c->table_gV[128 + YUVRGB_TABLE_HEADROOM]; #define PUTRGB1(out, src, i, o) \ Y = src[2 * i]; \ @@ -547,7 +590,7 @@ SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c) av_log(c, AV_LOG_WARNING, "No accelerated colorspace conversion found from %s to %s.\n", - sws_format_name(c->srcFormat), sws_format_name(c->dstFormat)); + av_get_pix_fmt_name(c->srcFormat), av_get_pix_fmt_name(c->dstFormat)); switch (c->dstFormat) { case PIX_FMT_BGR48BE: @@ -558,23 +601,21 @@ SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c) return yuv2rgb_c_48; case PIX_FMT_ARGB: case PIX_FMT_ABGR: - if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) + if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat)) return yuva2argb_c; case PIX_FMT_RGBA: case PIX_FMT_BGRA: - if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) - return yuva2rgba_c; - else - return yuv2rgb_c_32; + return (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat)) ? yuva2rgba_c : yuv2rgb_c_32; case PIX_FMT_RGB24: return yuv2rgb_c_24_rgb; case PIX_FMT_BGR24: return yuv2rgb_c_24_bgr; case PIX_FMT_RGB565: case PIX_FMT_BGR565: + return yuv2rgb_c_16_ordered_dither; case PIX_FMT_RGB555: case PIX_FMT_BGR555: - return yuv2rgb_c_16; + return yuv2rgb_c_15_ordered_dither; case PIX_FMT_RGB444: case PIX_FMT_BGR444: return yuv2rgb_c_12_ordered_dither; @@ -589,36 +630,32 @@ SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c) return yuv2rgb_c_4b_ordered_dither; case PIX_FMT_MONOBLACK: return yuv2rgb_c_1_ordered_dither; - default: - assert(0); } return NULL; } -static void fill_table(uint8_t *table[256], const int elemsize, +static void fill_table(uint8_t* table[256 + 2*YUVRGB_TABLE_HEADROOM], const int elemsize, const int inc, void *y_tab) { int i; - int64_t cb = 0; uint8_t *y_table = y_tab; y_table -= elemsize * (inc >> 9); - for (i = 0; i < 256; i++) { + for (i = 0; i < 256 + 2*YUVRGB_TABLE_HEADROOM; i++) { + int64_t cb = av_clip(i-YUVRGB_TABLE_HEADROOM, 0, 255)*inc; table[i] = y_table + elemsize * (cb >> 16); - cb += inc; } } -static void fill_gv_table(int table[256], const int elemsize, const int inc) +static void fill_gv_table(int table[256 + 2*YUVRGB_TABLE_HEADROOM], const int elemsize, const int inc) { int i; - int64_t cb = 0; int off = -(inc >> 9); - for (i = 0; i < 256; i++) { + for (i = 0; i < 256 + 2*YUVRGB_TABLE_HEADROOM; i++) { + int64_t cb = av_clip(i-YUVRGB_TABLE_HEADROOM, 0, 255)*inc; table[i] = elemsize * (off + (cb >> 16)); - cb += inc; } } @@ -661,7 +698,7 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], uint8_t *y_table; uint16_t *y_table16; uint32_t *y_table32; - int i, base, rbase, gbase, bbase, abase, needAlpha; + int i, base, rbase, gbase, bbase, av_uninit(abase), needAlpha; const int yoffs = fullRange ? 384 : 326; int64_t crv = inv_table[0]; |