diff options
Diffstat (limited to 'libswscale')
29 files changed, 1044 insertions, 674 deletions
diff --git a/libswscale/Makefile b/libswscale/Makefile index 5671b2e1b3..942545d282 100644 --- a/libswscale/Makefile +++ b/libswscale/Makefile @@ -1,3 +1,5 @@ +include $(SUBDIR)../config.mak + NAME = swscale FFLIBS = avutil @@ -19,6 +21,8 @@ OBJS-$(HAVE_MMX) += x86/rgb2rgb.o \ OBJS-$(HAVE_VIS) += sparc/yuv2rgb_vis.o OBJS-$(HAVE_YASM) += x86/scale.o +$(SUBDIR)x86/swscale_mmx.o: CFLAGS += $(NOREDZONE_FLAGS) + TESTPROGS = colorspace swscale DIRS = bfin mlib ppc sparc x86 diff --git a/libswscale/bfin/internal_bfin.S b/libswscale/bfin/internal_bfin.S index 9f985e7824..cb8d71253c 100644 --- a/libswscale/bfin/internal_bfin.S +++ b/libswscale/bfin/internal_bfin.S @@ -5,20 +5,20 @@ * Blackfin video color space converter operations * convert I420 YV12 to RGB in various formats * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/libswscale/bfin/swscale_bfin.c b/libswscale/bfin/swscale_bfin.c index 0c5f004930..870636ea05 100644 --- a/libswscale/bfin/swscale_bfin.c +++ b/libswscale/bfin/swscale_bfin.c @@ -3,20 +3,20 @@ * * Blackfin software video scaler operations * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/libswscale/bfin/yuv2rgb_bfin.c b/libswscale/bfin/yuv2rgb_bfin.c index 68af522642..7a7dc7f0e6 100644 --- a/libswscale/bfin/yuv2rgb_bfin.c +++ b/libswscale/bfin/yuv2rgb_bfin.c @@ -4,20 +4,20 @@ * Blackfin video color space converter operations * convert I420 YV12 to RGB in various formats * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -28,6 +28,7 @@ #include <assert.h> #include "config.h" #include <unistd.h> +#include "libavutil/pixdesc.h" #include "libswscale/rgb2rgb.h" #include "libswscale/swscale.h" #include "libswscale/swscale_internal.h" @@ -197,7 +198,7 @@ SwsFunc ff_yuv2rgb_get_func_ptr_bfin(SwsContext *c) } av_log(c, AV_LOG_INFO, "BlackFin accelerated color space converter %s\n", - sws_format_name (c->dstFormat)); + av_get_pix_fmt_name(c->dstFormat)); return f; } diff --git a/libswscale/colorspace-test.c b/libswscale/colorspace-test.c index 07c1cbd803..34095d8532 100644 --- a/libswscale/colorspace-test.c +++ b/libswscale/colorspace-test.c @@ -1,20 +1,20 @@ /* * Copyright (C) 2002 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/libswscale/mlib/yuv2rgb_mlib.c b/libswscale/mlib/yuv2rgb_mlib.c index 526c97505c..e9f11494ee 100644 --- a/libswscale/mlib/yuv2rgb_mlib.c +++ b/libswscale/mlib/yuv2rgb_mlib.c @@ -3,20 +3,20 @@ * * Copyright (C) 2003 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/libswscale/options.c b/libswscale/options.c index 9ba6e5e714..65095d5fd1 100644 --- a/libswscale/options.c +++ b/libswscale/options.c @@ -1,20 +1,20 @@ /* * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/libswscale/ppc/swscale_altivec.c b/libswscale/ppc/swscale_altivec.c index 87059d9430..f2a8600925 100644 --- a/libswscale/ppc/swscale_altivec.c +++ b/libswscale/ppc/swscale_altivec.c @@ -4,20 +4,20 @@ * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org> * based on the equivalent C code in swscale.c * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/libswscale/ppc/yuv2rgb_altivec.c b/libswscale/ppc/yuv2rgb_altivec.c index 73c02e9494..8e84c26382 100644 --- a/libswscale/ppc/yuv2rgb_altivec.c +++ b/libswscale/ppc/yuv2rgb_altivec.c @@ -3,20 +3,20 @@ * * copyright (C) 2004 Marc Hoffman <marc.hoffman@analog.com> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -95,6 +95,7 @@ adjustment. #include "libswscale/swscale.h" #include "libswscale/swscale_internal.h" #include "libavutil/cpu.h" +#include "libavutil/pixdesc.h" #include "yuv2rgb_altivec.h" #undef PROFILE_THE_BEAST @@ -298,7 +299,7 @@ static int altivec_##name (SwsContext *c, \ vector signed short R1,G1,B1; \ vector unsigned char R,G,B; \ \ - vector unsigned char *y1ivP, *y2ivP, *uivP, *vivP; \ + const vector unsigned char *y1ivP, *y2ivP, *uivP, *vivP; \ vector unsigned char align_perm; \ \ vector signed short \ @@ -335,10 +336,10 @@ static int altivec_##name (SwsContext *c, \ \ for (j=0;j<w/16;j++) { \ \ - y1ivP = (vector unsigned char *)y1i; \ - y2ivP = (vector unsigned char *)y2i; \ - uivP = (vector unsigned char *)ui; \ - vivP = (vector unsigned char *)vi; \ + y1ivP = (const vector unsigned char *)y1i; \ + y2ivP = (const vector unsigned char *)y2i; \ + uivP = (const vector unsigned char *)ui; \ + vivP = (const vector unsigned char *)vi; \ \ align_perm = vec_lvsl (0, y1i); \ y0 = (vector unsigned char) \ @@ -720,7 +721,7 @@ ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter, static int printed_error_message; if (!printed_error_message) { av_log(c, AV_LOG_ERROR, "altivec_yuv2packedX doesn't support %s output\n", - sws_format_name(c->dstFormat)); + av_get_pix_fmt_name(c->dstFormat)); printed_error_message=1; } return; @@ -795,7 +796,7 @@ ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter, default: /* Unreachable, I think. */ av_log(c, AV_LOG_ERROR, "altivec_yuv2packedX doesn't support %s output\n", - sws_format_name(c->dstFormat)); + av_get_pix_fmt_name(c->dstFormat)); return; } diff --git a/libswscale/ppc/yuv2rgb_altivec.h b/libswscale/ppc/yuv2rgb_altivec.h index b809fe13fe..163eba6eb7 100644 --- a/libswscale/ppc/yuv2rgb_altivec.h +++ b/libswscale/ppc/yuv2rgb_altivec.h @@ -4,20 +4,20 @@ * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org> * based on the equivalent C code in swscale.c * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/libswscale/ppc/yuv2yuv_altivec.c b/libswscale/ppc/yuv2yuv_altivec.c index 4cd02ffe1d..82c265afd2 100644 --- a/libswscale/ppc/yuv2yuv_altivec.c +++ b/libswscale/ppc/yuv2yuv_altivec.c @@ -4,20 +4,20 @@ * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org> * based on the equivalent C code in swscale.c * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c index 51cb600e6b..adb3005753 100644 --- a/libswscale/rgb2rgb.c +++ b/libswscale/rgb2rgb.c @@ -6,20 +6,20 @@ * Written by Nick Kurshev. * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include <inttypes.h> @@ -171,13 +171,13 @@ void rgb16tobgr32(const uint8_t *src, uint8_t *dst, int src_size) bgr = *s++; #if HAVE_BIGENDIAN *d++ = 255; - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x7E0)>>3; - *d++ = (bgr&0xF800)>>8; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9); + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); #else - *d++ = (bgr&0xF800)>>8; - *d++ = (bgr&0x7E0)>>3; - *d++ = (bgr&0x1F)<<3; + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); + *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9); + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); *d++ = 255; #endif } @@ -192,9 +192,9 @@ void rgb16to24(const uint8_t *src, uint8_t *dst, int src_size) while (s < end) { register uint16_t bgr; bgr = *s++; - *d++ = (bgr&0xF800)>>8; - *d++ = (bgr&0x7E0)>>3; - *d++ = (bgr&0x1F)<<3; + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); + *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9); + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); } } @@ -231,13 +231,13 @@ void rgb15tobgr32(const uint8_t *src, uint8_t *dst, int src_size) bgr = *s++; #if HAVE_BIGENDIAN *d++ = 255; - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x3E0)>>2; - *d++ = (bgr&0x7C00)>>7; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7); + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); #else - *d++ = (bgr&0x7C00)>>7; - *d++ = (bgr&0x3E0)>>2; - *d++ = (bgr&0x1F)<<3; + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); + *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7); + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); *d++ = 255; #endif } @@ -252,9 +252,9 @@ void rgb15to24(const uint8_t *src, uint8_t *dst, int src_size) while (s < end) { register uint16_t bgr; bgr = *s++; - *d++ = (bgr&0x7C00)>>7; - *d++ = (bgr&0x3E0)>>2; - *d++ = (bgr&0x1F)<<3; + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); + *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7); + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); } } diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h index 9d051de4f6..e3edac88d4 100644 --- a/libswscale/rgb2rgb.h +++ b/libswscale/rgb2rgb.h @@ -6,20 +6,20 @@ * Written by Nick Kurshev. * YUV & runtime CPU stuff by Michael (michaelni@gmx.at) * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c index c02015e5b3..6363bc19aa 100644 --- a/libswscale/rgb2rgb_template.c +++ b/libswscale/rgb2rgb_template.c @@ -7,20 +7,20 @@ * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) * lot of big-endian byte order fixes by Alex Beregszaszi * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -227,27 +227,6 @@ static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, int src_size) } } -/* - I use less accurate approximation here by simply left-shifting the input - value and filling the low order bits with zeroes. This method improves PNG - compression but this scheme cannot reproduce white exactly, since it does - not generate an all-ones maximum value; the net effect is to darken the - image slightly. - - The better method should be "left bit replication": - - 4 3 2 1 0 - --------- - 1 1 0 1 1 - - 7 6 5 4 3 2 1 0 - ---------------- - 1 1 0 1 1 1 1 0 - |=======| |===| - | leftmost bits repeated to fill open bits - | - original bits -*/ static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size) { const uint16_t *end; @@ -257,9 +236,9 @@ static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size while (s < end) { register uint16_t bgr; bgr = *s++; - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x3E0)>>2; - *d++ = (bgr&0x7C00)>>7; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7); + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); } } @@ -272,9 +251,9 @@ static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size while (s < end) { register uint16_t bgr; bgr = *s++; - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x7E0)>>3; - *d++ = (bgr&0xF800)>>8; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9); + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); } } @@ -289,13 +268,13 @@ static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, int src_size) bgr = *s++; #if HAVE_BIGENDIAN *d++ = 255; - *d++ = (bgr&0x7C00)>>7; - *d++ = (bgr&0x3E0)>>2; - *d++ = (bgr&0x1F)<<3; + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); + *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7); + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); #else - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x3E0)>>2; - *d++ = (bgr&0x7C00)>>7; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7); + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); *d++ = 255; #endif } @@ -312,13 +291,13 @@ static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, int src_size) bgr = *s++; #if HAVE_BIGENDIAN *d++ = 255; - *d++ = (bgr&0xF800)>>8; - *d++ = (bgr&0x7E0)>>3; - *d++ = (bgr&0x1F)<<3; + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); + *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9); + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); #else - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x7E0)>>3; - *d++ = (bgr&0xF800)>>8; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9); + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); *d++ = 255; #endif } @@ -658,6 +637,9 @@ void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst, ydst += lumStride; src += srcStride; + if(y+1 == height) + break; + for (i=0; i<chromWidth; i++) { unsigned int b = src[6*i+0]; unsigned int g = src[6*i+1]; diff --git a/libswscale/sparc/yuv2rgb_vis.c b/libswscale/sparc/yuv2rgb_vis.c index 2111ea8f64..cc98f04053 100644 --- a/libswscale/sparc/yuv2rgb_vis.c +++ b/libswscale/sparc/yuv2rgb_vis.c @@ -2,20 +2,20 @@ * VIS optimized software YUV to RGB converter * Copyright (c) 2007 Denes Balatoni <dbalatoni@programozo.hu> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/libswscale/swscale-test.c b/libswscale/swscale-test.c index b5cf1d202e..5dd2e34870 100644 --- a/libswscale/swscale-test.c +++ b/libswscale/swscale-test.c @@ -1,20 +1,20 @@ /* - * Copyright (C) 2003 Michael Niedermayer <michaelni@gmx.at> + * Copyright (C) 2003-2011 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -104,6 +104,7 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h, av_image_fill_linesizes(srcStride, srcFormat, srcW); for (p = 0; p < 4; p++) { + srcStride[p] = FFALIGN(srcStride[p], 16); if (srcStride[p]) src[p] = av_mallocz(srcStride[p]*srcH+16); if (srcStride[p] && !src[p]) { @@ -139,6 +140,7 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h, * allocated with av_malloc). */ /* An extra 16 bytes is being allocated because some scalers may write * out of bounds. */ + dstStride[i] = FFALIGN(dstStride[i], 16); if (dstStride[i]) dst[i]= av_mallocz(dstStride[i]*dstH+16); if (dstStride[i] && !dst[i]) { @@ -178,6 +180,7 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h, ssdA = r->ssdA; } else { for (i=0; i<4; i++) { + refStride[i] = FFALIGN(refStride[i], 16); if (refStride[i]) out[i]= av_mallocz(refStride[i]*h); if (refStride[i] && !out[i]) { diff --git a/libswscale/swscale.c b/libswscale/swscale.c index 975a0bdf1f..71033d3df8 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -1,20 +1,20 @@ /* - * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> + * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -60,6 +60,7 @@ untested special converters #include "swscale.h" #include "swscale_internal.h" #include "rgb2rgb.h" +#include "libavutil/avassert.h" #include "libavutil/intreadwrite.h" #include "libavutil/cpu.h" #include "libavutil/avutil.h" @@ -67,7 +68,6 @@ untested special converters #include "libavutil/bswap.h" #include "libavutil/pixdesc.h" -#define DITHER1XBPP #define RGB2YUV_SHIFT 15 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5)) @@ -195,6 +195,101 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = { DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] = { 64, 64, 64, 64, 64, 64, 64, 64 }; +DECLARE_ALIGNED(8, const uint8_t, dithers)[8][8][8]={ +{ + { 0, 1, 0, 1, 0, 1, 0, 1,}, + { 1, 0, 1, 0, 1, 0, 1, 0,}, + { 0, 1, 0, 1, 0, 1, 0, 1,}, + { 1, 0, 1, 0, 1, 0, 1, 0,}, + { 0, 1, 0, 1, 0, 1, 0, 1,}, + { 1, 0, 1, 0, 1, 0, 1, 0,}, + { 0, 1, 0, 1, 0, 1, 0, 1,}, + { 1, 0, 1, 0, 1, 0, 1, 0,}, +},{ + { 1, 2, 1, 2, 1, 2, 1, 2,}, + { 3, 0, 3, 0, 3, 0, 3, 0,}, + { 1, 2, 1, 2, 1, 2, 1, 2,}, + { 3, 0, 3, 0, 3, 0, 3, 0,}, + { 1, 2, 1, 2, 1, 2, 1, 2,}, + { 3, 0, 3, 0, 3, 0, 3, 0,}, + { 1, 2, 1, 2, 1, 2, 1, 2,}, + { 3, 0, 3, 0, 3, 0, 3, 0,}, +},{ + { 2, 4, 3, 5, 2, 4, 3, 5,}, + { 6, 0, 7, 1, 6, 0, 7, 1,}, + { 3, 5, 2, 4, 3, 5, 2, 4,}, + { 7, 1, 6, 0, 7, 1, 6, 0,}, + { 2, 4, 3, 5, 2, 4, 3, 5,}, + { 6, 0, 7, 1, 6, 0, 7, 1,}, + { 3, 5, 2, 4, 3, 5, 2, 4,}, + { 7, 1, 6, 0, 7, 1, 6, 0,}, +},{ + { 4, 8, 7, 11, 4, 8, 7, 11,}, + { 12, 0, 15, 3, 12, 0, 15, 3,}, + { 6, 10, 5, 9, 6, 10, 5, 9,}, + { 14, 2, 13, 1, 14, 2, 13, 1,}, + { 4, 8, 7, 11, 4, 8, 7, 11,}, + { 12, 0, 15, 3, 12, 0, 15, 3,}, + { 6, 10, 5, 9, 6, 10, 5, 9,}, + { 14, 2, 13, 1, 14, 2, 13, 1,}, +},{ + { 9, 17, 15, 23, 8, 16, 14, 22,}, + { 25, 1, 31, 7, 24, 0, 30, 6,}, + { 13, 21, 11, 19, 12, 20, 10, 18,}, + { 29, 5, 27, 3, 28, 4, 26, 2,}, + { 8, 16, 14, 22, 9, 17, 15, 23,}, + { 24, 0, 30, 6, 25, 1, 31, 7,}, + { 12, 20, 10, 18, 13, 21, 11, 19,}, + { 28, 4, 26, 2, 29, 5, 27, 3,}, +},{ + { 18, 34, 30, 46, 17, 33, 29, 45,}, + { 50, 2, 62, 14, 49, 1, 61, 13,}, + { 26, 42, 22, 38, 25, 41, 21, 37,}, + { 58, 10, 54, 6, 57, 9, 53, 5,}, + { 16, 32, 28, 44, 19, 35, 31, 47,}, + { 48, 0, 60, 12, 51, 3, 63, 15,}, + { 24, 40, 20, 36, 27, 43, 23, 39,}, + { 56, 8, 52, 4, 59, 11, 55, 7,}, +},{ + { 18, 34, 30, 46, 17, 33, 29, 45,}, + { 50, 2, 62, 14, 49, 1, 61, 13,}, + { 26, 42, 22, 38, 25, 41, 21, 37,}, + { 58, 10, 54, 6, 57, 9, 53, 5,}, + { 16, 32, 28, 44, 19, 35, 31, 47,}, + { 48, 0, 60, 12, 51, 3, 63, 15,}, + { 24, 40, 20, 36, 27, 43, 23, 39,}, + { 56, 8, 52, 4, 59, 11, 55, 7,}, +},{ + { 36, 68, 60, 92, 34, 66, 58, 90,}, + { 100, 4,124, 28, 98, 2,122, 26,}, + { 52, 84, 44, 76, 50, 82, 42, 74,}, + { 116, 20,108, 12,114, 18,106, 10,}, + { 32, 64, 56, 88, 38, 70, 62, 94,}, + { 96, 0,120, 24,102, 6,126, 30,}, + { 48, 80, 40, 72, 54, 86, 46, 78,}, + { 112, 16,104, 8,118, 22,110, 14,}, +}}; + +static const uint8_t flat64[8]={64,64,64,64,64,64,64,64}; + +const uint16_t dither_scale[15][16]={ +{ 2, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,}, +{ 2, 3, 7, 7, 13, 13, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,}, +{ 3, 3, 4, 15, 15, 29, 57, 57, 57, 113, 113, 113, 113, 113, 113, 113,}, +{ 3, 4, 4, 5, 31, 31, 61, 121, 241, 241, 241, 241, 481, 481, 481, 481,}, +{ 3, 4, 5, 5, 6, 63, 63, 125, 249, 497, 993, 993, 993, 993, 993, 1985,}, +{ 3, 5, 6, 6, 6, 7, 127, 127, 253, 505, 1009, 2017, 4033, 4033, 4033, 4033,}, +{ 3, 5, 6, 7, 7, 7, 8, 255, 255, 509, 1017, 2033, 4065, 8129,16257,16257,}, +{ 3, 5, 6, 8, 8, 8, 8, 9, 511, 511, 1021, 2041, 4081, 8161,16321,32641,}, +{ 3, 5, 7, 8, 9, 9, 9, 9, 10, 1023, 1023, 2045, 4089, 8177,16353,32705,}, +{ 3, 5, 7, 8, 10, 10, 10, 10, 10, 11, 2047, 2047, 4093, 8185,16369,32737,}, +{ 3, 5, 7, 8, 10, 11, 11, 11, 11, 11, 12, 4095, 4095, 8189,16377,32753,}, +{ 3, 5, 7, 9, 10, 12, 12, 12, 12, 12, 12, 13, 8191, 8191,16381,32761,}, +{ 3, 5, 7, 9, 10, 12, 13, 13, 13, 13, 13, 13, 14,16383,16383,32765,}, +{ 3, 5, 7, 9, 10, 12, 14, 14, 14, 14, 14, 14, 14, 15,32767,32767,}, +{ 3, 5, 7, 9, 11, 12, 14, 15, 15, 15, 15, 15, 15, 15, 16,65535,}, +}; + #define output_pixel(pos, val, bias, signedness) \ if (big_endian) { \ AV_WB16(pos, bias + av_clip_ ## signedness ## 16(val >> shift)); \ @@ -207,7 +302,8 @@ yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW, int big_endian, int output_bits) { int i; - int shift = 19 - output_bits; + int shift = 3; + av_assert0(output_bits == 16); for (i = 0; i < dstW; i++) { int val = src[i] + (1 << (shift - 1)); @@ -221,10 +317,11 @@ yuv2planeX_16_c_template(const int16_t *filter, int filterSize, int big_endian, int output_bits) { int i; - int shift = 15 + 16 - output_bits; + int shift = 15; + av_assert0(output_bits == 16); for (i = 0; i < dstW; i++) { - int val = 1 << (30-output_bits); + int val = 1 << (shift - 1); int j; /* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline @@ -271,7 +368,7 @@ yuv2planeX_10_c_template(const int16_t *filter, int filterSize, int shift = 11 + 16 - output_bits; for (i = 0; i < dstW; i++) { - int val = 1 << (26-output_bits); + int val = 1 << (shift - 1); int j; for (j = 0; j < filterSize; j++) @@ -433,8 +530,8 @@ yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0, int i; for (i = 0; i < (dstW >> 1); i++) { - int Y1 = buf0[i * 2 ] << 1; - int Y2 = buf0[i * 2 + 1] << 1; + int Y1 = (buf0[i * 2 ]+4)>>3; + int Y2 = (buf0[i * 2 + 1]+4)>>3; output_pixel(&dest[i * 2 + 0], Y1); output_pixel(&dest[i * 2 + 1], Y2); @@ -882,8 +979,8 @@ yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0, for (i = 0; i < (dstW >> 1); i++) { int Y1 = (buf0[i * 2] ) >> 2; int Y2 = (buf0[i * 2 + 1]) >> 2; - int U = (ubuf0[i] + ubuf1[i] + (-128 << 11)) >> 3; - int V = (vbuf0[i] + vbuf1[i] + (-128 << 11)) >> 3; + int U = (ubuf0[i] + ubuf1[i] + (-128 << 12)) >> 3; + int V = (vbuf0[i] + vbuf1[i] + (-128 << 12)) >> 3; int R, G, B; Y1 -= c->yuv2rgb_y_offset; @@ -954,6 +1051,7 @@ yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2, #define r_b ((target == PIX_FMT_RGB24) ? r : b) #define b_r ((target == PIX_FMT_RGB24) ? b : r) + dest[i * 6 + 0] = r_b[Y1]; dest[i * 6 + 1] = g[Y1]; dest[i * 6 + 2] = b_r[Y1]; @@ -1244,9 +1342,9 @@ yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter, for (i = 0; i < dstW; i++) { int j; - int Y = 0; - int U = -128 << 19; - int V = -128 << 19; + int Y = 1<<9; + int U = (1<<9)-(128 << 19); + int V = (1<<9)-(128 << 19); int av_unused A; int R, G, B; @@ -1261,7 +1359,7 @@ yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter, U >>= 10; V >>= 10; if (hasAlpha) { - A = 1 << 21; + A = 1 << 18; for (j = 0; j < lumFilterSize; j++) { A += alpSrc[j][i] * lumFilter[j]; } @@ -1304,7 +1402,6 @@ yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter, dest[1] = B >> 22; dest[2] = G >> 22; dest[3] = R >> 22; - dest += 4; break; case PIX_FMT_BGR24: dest[0] = B >> 22; @@ -1412,7 +1509,7 @@ rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV, #undef input_pixel #define rgb48funcs(pattern, BE_LE, origin) \ -static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, \ +static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused0, const uint8_t *unused1,\ int width, uint32_t *unused) \ { \ const uint16_t *src = (const uint16_t *) _src; \ @@ -1421,7 +1518,7 @@ static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, } \ \ static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \ - const uint8_t *_src1, const uint8_t *_src2, \ + const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \ int width, uint32_t *unused) \ { \ const uint16_t *src1 = (const uint16_t *) _src1, \ @@ -1431,7 +1528,7 @@ static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \ } \ \ static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \ - const uint8_t *_src1, const uint8_t *_src2, \ + const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \ int width, uint32_t *unused) \ { \ const uint16_t *src1 = (const uint16_t *) _src1, \ @@ -1450,14 +1547,14 @@ rgb48funcs(bgr, BE, PIX_FMT_BGR48BE); (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2]))) static av_always_inline void -rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src, +rgb16_32ToY_c_template(int16_t *dst, const uint8_t *src, int width, enum PixelFormat origin, int shr, int shg, int shb, int shp, int maskr, int maskg, int maskb, int rsh, int gsh, int bsh, int S) { const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh, - rnd = 33 << (S - 1); + rnd = (32<<((S)-1)) + (1<<(S-7)); int i; for (i = 0; i < width; i++) { @@ -1466,12 +1563,12 @@ rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src, int g = (px & maskg) >> shg; int r = (px & maskr) >> shr; - dst[i] = (ry * r + gy * g + by * b + rnd) >> S; + dst[i] = (ry * r + gy * g + by * b + rnd) >> ((S)-6); } } static av_always_inline void -rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV, +rgb16_32ToUV_c_template(int16_t *dstU, int16_t *dstV, const uint8_t *src, int width, enum PixelFormat origin, int shr, int shg, int shb, int shp, @@ -1480,7 +1577,7 @@ rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV, { const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh, rv = RV << rsh, gv = GV << gsh, bv = BV << bsh, - rnd = 257 << (S - 1); + rnd = (256<<((S)-1)) + (1<<(S-7)); int i; for (i = 0; i < width; i++) { @@ -1489,13 +1586,13 @@ rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV, int g = (px & maskg) >> shg; int r = (px & maskr) >> shr; - dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S; - dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S; + dstU[i] = (ru * r + gu * g + bu * b + rnd) >> ((S)-6); + dstV[i] = (rv * r + gv * g + bv * b + rnd) >> ((S)-6); } } static av_always_inline void -rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV, +rgb16_32ToUV_half_c_template(int16_t *dstU, int16_t *dstV, const uint8_t *src, int width, enum PixelFormat origin, int shr, int shg, int shb, int shp, @@ -1504,7 +1601,7 @@ rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV, { const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh, rv = RV << rsh, gv = GV << gsh, bv = BV << bsh, - rnd = 257 << S, maskgx = ~(maskr | maskb); + rnd = (256U<<(S)) + (1<<(S-6)), maskgx = ~(maskr | maskb); int i; maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1; @@ -1523,8 +1620,8 @@ rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV, } r = (rb & maskr) >> shr; - dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1); - dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1); + dstU[i] = (ru * r + gu * g + bu * b + (unsigned)rnd) >> ((S)-6+1); + dstV[i] = (rv * r + gv * g + bv * b + (unsigned)rnd) >> ((S)-6+1); } } @@ -1532,26 +1629,29 @@ rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV, #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \ maskg, maskb, rsh, gsh, bsh, S) \ -static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \ +static void name ## ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, \ int width, uint32_t *unused) \ { \ - rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \ + rgb16_32ToY_c_template((int16_t*)dst, src, width, fmt, \ + shr, shg, shb, shp, \ maskr, maskg, maskb, rsh, gsh, bsh, S); \ } \ \ static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \ - const uint8_t *src, const uint8_t *dummy, \ + const uint8_t *unused0, const uint8_t *src, const uint8_t *dummy, \ int width, uint32_t *unused) \ { \ - rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \ + rgb16_32ToUV_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \ + shr, shg, shb, shp, \ maskr, maskg, maskb, rsh, gsh, bsh, S); \ } \ \ static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \ - const uint8_t *src, const uint8_t *dummy, \ + const uint8_t *unused0, const uint8_t *src, const uint8_t *dummy, \ int width, uint32_t *unused) \ { \ - rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \ + rgb16_32ToUV_half_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \ + shr, shg, shb, shp, \ maskr, maskg, maskb, rsh, gsh, bsh, S); \ } @@ -1568,71 +1668,104 @@ rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8); rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7); -static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused) +static void gbr24pToUV_half_c(uint16_t *dstU, uint16_t *dstV, + const uint8_t *gsrc, const uint8_t *bsrc, const uint8_t *rsrc, + int width, enum PixelFormat origin) +{ + int i; + for (i = 0; i < width; i++) { + unsigned int g = gsrc[2*i] + gsrc[2*i+1]; + unsigned int b = bsrc[2*i] + bsrc[2*i+1]; + unsigned int r = rsrc[2*i] + rsrc[2*i+1]; + + dstU[i] = (RU*r + GU*g + BU*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1); + dstV[i] = (RV*r + GV*g + BV*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1); + } +} + +static void abgrToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) +{ + int i; + for (i=0; i<width; i++) { + dst[i]= src[4*i]<<6; + } +} + +static void rgbaToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) { int i; for (i=0; i<width; i++) { - dst[i]= src[4*i]; + dst[i]= src[4*i+3]<<6; } } -static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused) +static void palToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *pal) { int i; for (i=0; i<width; i++) { - dst[i]= src[4*i+3]; + int d= src[i]; + + dst[i]= (pal[d] >> 24)<<6; } } -static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal) +static void palToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, long width, uint32_t *pal) { int i; for (i=0; i<width; i++) { int d= src[i]; - dst[i]= pal[d] & 0xFF; + dst[i]= (pal[d] & 0xFF)<<6; } } -static void palToUV_c(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, - int width, uint32_t *pal) +static void palToUV_c(uint16_t *dstU, int16_t *dstV, + const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, + int width, uint32_t *pal) { int i; assert(src1 == src2); for (i=0; i<width; i++) { int p= pal[src1[i]]; - dstU[i]= p>>8; - dstV[i]= p>>16; + dstU[i]= (uint8_t)(p>> 8)<<6; + dstV[i]= (uint8_t)(p>>16)<<6; } } -static void monowhite2Y_c(uint8_t *dst, const uint8_t *src, - int width, uint32_t *unused) +static void monowhite2Y_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) { int i, j; for (i=0; i<width/8; i++) { int d= ~src[i]; for(j=0; j<8; j++) - dst[8*i+j]= ((d>>(7-j))&1)*255; + dst[8*i+j]= ((d>>(7-j))&1)*16383; + } + if(width&7){ + int d= ~src[i]; + for(j=0; j<(width&7); j++) + dst[8*i+j]= ((d>>(7-j))&1)*16383; } } -static void monoblack2Y_c(uint8_t *dst, const uint8_t *src, - int width, uint32_t *unused) +static void monoblack2Y_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) { int i, j; for (i=0; i<width/8; i++) { int d= src[i]; for(j=0; j<8; j++) - dst[8*i+j]= ((d>>(7-j))&1)*255; + dst[8*i+j]= ((d>>(7-j))&1)*16383; + } + if(width&7){ + int d= src[i]; + for(j=0; j<(width&7); j++) + dst[8*i+j]= ((d>>(7-j))&1)*16383; } } //FIXME yuy2* can read up to 7 samples too much -static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width, +static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) { int i; @@ -1640,7 +1773,7 @@ static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width, dst[i]= src[2*i]; } -static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, +static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) { int i; @@ -1651,7 +1784,7 @@ static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, assert(src1 == src2); } -static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t *unused) +static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) { int i; const uint16_t *src = (const uint16_t *) _src; @@ -1661,7 +1794,7 @@ static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t * } } -static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1, +static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, int width, uint32_t *unused) { int i; @@ -1676,7 +1809,7 @@ static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1, /* This is almost identical to the previous, end exists only because * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */ -static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width, +static void uyvyToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) { int i; @@ -1684,7 +1817,7 @@ static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width, dst[i]= src[2*i+1]; } -static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, +static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) { int i; @@ -1706,14 +1839,14 @@ static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2, } static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, + const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) { nvXXtoUV_c(dstU, dstV, src1, width); } static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, + const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) { nvXXtoUV_c(dstV, dstU, src1, width); @@ -1721,7 +1854,7 @@ static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV, #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos)) -static void bgr24ToY_c(uint8_t *dst, const uint8_t *src, +static void bgr24ToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) { int i; @@ -1730,11 +1863,11 @@ static void bgr24ToY_c(uint8_t *dst, const uint8_t *src, int g= src[i*3+1]; int r= src[i*3+2]; - dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT); + dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6)); } } -static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, +static void bgr24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) { int i; @@ -1743,13 +1876,13 @@ static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, int g= src1[3*i + 1]; int r= src1[3*i + 2]; - dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT; - dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT; + dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); + dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); } assert(src1 == src2); } -static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, +static void bgr24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) { int i; @@ -1758,13 +1891,13 @@ static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, int g= src1[6*i + 1] + src1[6*i + 4]; int r= src1[6*i + 2] + src1[6*i + 5]; - dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1); - dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1); + dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5); + dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5); } assert(src1 == src2); } -static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width, +static void rgb24ToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) { int i; @@ -1773,11 +1906,11 @@ static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width, int g= src[i*3+1]; int b= src[i*3+2]; - dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT); + dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6)); } } -static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, +static void rgb24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) { int i; @@ -1787,13 +1920,13 @@ static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, int g= src1[3*i + 1]; int b= src1[3*i + 2]; - dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT; - dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT; + dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); + dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6); } } -static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, - const uint8_t *src2, int width, uint32_t *unused) +static void rgb24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1, + const uint8_t *src2, int width, uint32_t *unused) { int i; assert(src1==src2); @@ -1802,12 +1935,12 @@ static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, int g= src1[6*i + 1] + src1[6*i + 4]; int b= src1[6*i + 2] + src1[6*i + 5]; - dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1); - dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1); + dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5); + dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5); } } -static void planar_rgb_to_y(uint8_t *dst, const uint8_t *src[4], int width) +static void planar_rgb_to_y(uint16_t *dst, const uint8_t *src[4], int width) { int i; for (i = 0; i < width; i++) { @@ -1815,7 +1948,7 @@ static void planar_rgb_to_y(uint8_t *dst, const uint8_t *src[4], int width) int b = src[1][i]; int r = src[2][i]; - dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT); + dst[i] = (RY*r + GY*g + BY*b + (0x801<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6); } } @@ -1847,7 +1980,7 @@ static void planar_rgb16be_to_y(uint8_t *_dst, const uint8_t *_src[4], int width } } -static void planar_rgb_to_uv(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4], int width) +static void planar_rgb_to_uv(uint16_t *dstU, uint16_t *dstV, const uint8_t *src[4], int width) { int i; for (i = 0; i < width; i++) { @@ -1855,8 +1988,8 @@ static void planar_rgb_to_uv(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4] int b = src[1][i]; int r = src[2][i]; - dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1); - dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1); + dstU[i] = (RU*r + GU*g + BU*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6); + dstV[i] = (RV*r + GV*g + BV*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6); } } @@ -1902,6 +2035,9 @@ static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1; int sh = bits - 4; + if((isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15) + sh= 9; + for (i = 0; i < dstW; i++) { int j; int srcPos = filterPos[i]; @@ -1923,6 +2059,9 @@ static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t const uint16_t *src = (const uint16_t *) _src; int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1; + if(sh<15) + sh= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1; + for (i = 0; i < dstW; i++) { int j; int srcPos = filterPos[i]; @@ -2037,7 +2176,7 @@ static void lumRangeFromJpeg16_c(int16_t *_dst, int width) int i; int32_t *dst = (int32_t *) _dst; for (i = 0; i < width; i++) - dst[i] = (dst[i]*14071 + (33561947<<4))>>14; + dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12; } static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth, @@ -2051,6 +2190,8 @@ static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth, dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha; xpos+=xInc; } + for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) + dst[i] = src[srcW-1]*128; } // *** horizontal scale Y line to temp buffer @@ -2061,12 +2202,12 @@ static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth, uint8_t *formatConvBuffer, uint32_t *pal, int isAlpha) { - void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12; + void (*toYV12)(uint8_t *, const uint8_t *, const uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12; void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange; const uint8_t *src = src_in[isAlpha ? 3 : 0]; if (toYV12) { - toYV12(formatConvBuffer, src, srcW, pal); + toYV12(formatConvBuffer, src, src_in[1], src_in[2], srcW, pal); src= formatConvBuffer; } else if (c->readLumPlanar && !isAlpha) { c->readLumPlanar(formatConvBuffer, src_in, srcW); @@ -2096,6 +2237,10 @@ static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2, dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha); xpos+=xInc; } + for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) { + dst1[i] = src1[srcW-1]*128; + dst2[i] = src2[srcW-1]*128; + } } static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth, @@ -2106,12 +2251,12 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2 { const uint8_t *src1 = src_in[1], *src2 = src_in[2]; if (c->chrToYV12) { - uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16); - c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal); + uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16); + c->chrToYV12(formatConvBuffer, buf2, src_in[0], src1, src2, srcW, pal); src1= formatConvBuffer; src2= buf2; } else if (c->readChrPlanar) { - uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16); + uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16); c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW); src1= formatConvBuffer; src2= buf2; @@ -2220,7 +2365,10 @@ find_c_packed_planar_out_funcs(SwsContext *c, *yuv2packedX = yuv2bgr24_full_X_c; break; } + if(!*yuv2packedX) + goto YUV_PACKED; } else { + YUV_PACKED: switch (dstFormat) { case PIX_FMT_GRAY16BE: *yuv2packed1 = yuv2gray16BE_1_c; @@ -2391,8 +2539,6 @@ static int swScale(SwsContext *c, const uint8_t* src[], int16_t *vChrFilterPos= c->vChrFilterPos; int16_t *hLumFilterPos= c->hLumFilterPos; int16_t *hChrFilterPos= c->hChrFilterPos; - int16_t *vLumFilter= c->vLumFilter; - int16_t *vChrFilter= c->vChrFilter; int16_t *hLumFilter= c->hLumFilter; int16_t *hChrFilter= c->hChrFilter; int32_t *lumMmxFilter= c->lumMmxFilter; @@ -2413,13 +2559,14 @@ static int swScale(SwsContext *c, const uint8_t* src[], const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample); int lastDstY; uint32_t *pal=c->pal_yuv; + int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat); + yuv2planar1_fn yuv2plane1 = c->yuv2plane1; yuv2planarX_fn yuv2planeX = c->yuv2planeX; yuv2interleavedX_fn yuv2nv12cX = c->yuv2nv12cX; yuv2packed1_fn yuv2packed1 = c->yuv2packed1; yuv2packed2_fn yuv2packed2 = c->yuv2packed2; yuv2packedX_fn yuv2packedX = c->yuv2packedX; - int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat); /* vars which will change and which we need to store back in the context */ int dstY= c->dstY; @@ -2449,7 +2596,7 @@ static int swScale(SwsContext *c, const uint8_t* src[], DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n", vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize); - if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) { + if (dstStride[0]%16 !=0 || dstStride[1]%16 !=0 || dstStride[2]%16 !=0 || dstStride[3]%16 != 0) { static int warnedAlready=0; //FIXME move this into the context perhaps if (flags & SWS_PRINT_INFO && !warnedAlready) { av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n" @@ -2458,6 +2605,18 @@ static int swScale(SwsContext *c, const uint8_t* src[], } } + if ((int)dst[0]%16 || (int)dst[1]%16 || (int)dst[2]%16 || (int)src[0]%16 || (int)src[1]%16 || (int)src[2]%16 + || dstStride[0]%16 || dstStride[1]%16 || dstStride[2]%16 || dstStride[3]%16 + || srcStride[0]%16 || srcStride[1]%16 || srcStride[2]%16 || srcStride[3]%16 + ) { + static int warnedAlready=0; + int cpu_flags = av_get_cpu_flags(); + if (HAVE_MMX2 && (cpu_flags & AV_CPU_FLAG_SSE2) && !warnedAlready){ + av_log(c, AV_LOG_WARNING, "Warning: data is not aligned! This can lead to a speedloss\n"); + warnedAlready=1; + } + } + /* Note the user might start scaling the picture in the middle so this will not get executed. This is not really intended but works currently, so people might do it. */ @@ -2482,6 +2641,7 @@ static int swScale(SwsContext *c, const uint8_t* src[], dst[2] + dstStride[2] * chrDstY, (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL, }; + int use_mmx_vfilter= c->use_mmx_vfilter; const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)]; @@ -2577,6 +2737,7 @@ static int swScale(SwsContext *c, const uint8_t* src[], // hmm looks like we can't use MMX here without overwriting this array's tail find_c_packed_planar_out_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX, &yuv2packed1, &yuv2packed2, &yuv2packedX); + use_mmx_vfilter= 0; } { @@ -2584,35 +2745,58 @@ static int swScale(SwsContext *c, const uint8_t* src[], const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; + int16_t *vLumFilter= c->vLumFilter; + int16_t *vChrFilter= c->vChrFilter; + if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like const int chrSkipMask= (1<<c->chrDstVSubSample)-1; + vLumFilter += dstY * vLumFilterSize; + vChrFilter += chrDstY * vChrFilterSize; + + av_assert0(use_mmx_vfilter != ( + yuv2planeX == yuv2planeX_10BE_c + || yuv2planeX == yuv2planeX_10LE_c + || yuv2planeX == yuv2planeX_9BE_c + || yuv2planeX == yuv2planeX_9LE_c + || yuv2planeX == yuv2planeX_16BE_c + || yuv2planeX == yuv2planeX_16LE_c + || yuv2planeX == yuv2planeX_8_c) || !ARCH_X86); + + if(use_mmx_vfilter){ + vLumFilter= c->lumMmxFilter; + vChrFilter= c->chrMmxFilter; + } + if (vLumFilterSize == 1) { yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0); } else { - yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize, + yuv2planeX(vLumFilter, vLumFilterSize, lumSrcPtr, dest[0], dstW, c->lumDither8, 0); } if (!((dstY&chrSkipMask) || isGray(dstFormat))) { if (yuv2nv12cX) { - yuv2nv12cX(c, vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW); + yuv2nv12cX(c, vChrFilter, vChrFilterSize, chrUSrcPtr, chrVSrcPtr, dest[1], chrDstW); } else if (vChrFilterSize == 1) { yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0); yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3); } else { - yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, + yuv2planeX(vChrFilter, vChrFilterSize, chrUSrcPtr, dest[1], chrDstW, c->chrDither8, 0); - yuv2planeX(vChrFilter + chrDstY * vChrFilterSize, vChrFilterSize, - chrVSrcPtr, dest[2], chrDstW, c->chrDither8, 3); + yuv2planeX(vChrFilter, vChrFilterSize, + chrVSrcPtr, dest[2], chrDstW, c->chrDither8, use_mmx_vfilter ? (c->uv_offx2 >> 1) : 3); } } if (CONFIG_SWSCALE_ALPHA && alpPixBuf){ + if(use_mmx_vfilter){ + vLumFilter= c->alpMmxFilter; + } if (vLumFilterSize == 1) { yuv2plane1(alpSrcPtr[0], dest[3], dstW, c->lumDither8, 0); } else { - yuv2planeX(vLumFilter + dstY * vLumFilterSize, vLumFilterSize, + yuv2planeX(vLumFilter, vLumFilterSize, alpSrcPtr, dest[3], dstW, c->lumDither8, 0); } } @@ -2695,8 +2879,8 @@ static av_cold void sws_init_swScale_c(SwsContext *c) case PIX_FMT_YUV422P9LE: case PIX_FMT_YUV420P9LE: case PIX_FMT_YUV422P10LE: - case PIX_FMT_YUV444P10LE: case PIX_FMT_YUV420P10LE: + case PIX_FMT_YUV444P10LE: case PIX_FMT_YUV420P16LE: case PIX_FMT_YUV422P16LE: case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break; @@ -2732,6 +2916,7 @@ static av_cold void sws_init_swScale_c(SwsContext *c) case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break; case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break; case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break; + case PIX_FMT_GBR24P : c->chrToYV12 = gbr24pToUV_half_c; break; } } else { switch(srcFormat) { @@ -2770,9 +2955,9 @@ static av_cold void sws_init_swScale_c(SwsContext *c) case PIX_FMT_YUV444P9LE: case PIX_FMT_YUV422P9LE: case PIX_FMT_YUV420P9LE: - case PIX_FMT_YUV444P10LE: case PIX_FMT_YUV422P10LE: case PIX_FMT_YUV420P10LE: + case PIX_FMT_YUV444P10LE: case PIX_FMT_YUV420P16LE: case PIX_FMT_YUV422P16LE: case PIX_FMT_YUV444P16LE: @@ -2825,9 +3010,11 @@ static av_cold void sws_init_swScale_c(SwsContext *c) case PIX_FMT_ABGR: case PIX_FMT_ARGB: c->alpToYV12 = abgrToA_c; break; case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break; + case PIX_FMT_PAL8 : c->alpToYV12 = palToA_c; break; } } + if (c->srcBpc == 8) { if (c->dstBpc <= 10) { c->hyScale = c->hcScale = hScale8To15_c; diff --git a/libswscale/swscale.h b/libswscale/swscale.h index 91378a40c4..845121a0c0 100644 --- a/libswscale/swscale.h +++ b/libswscale/swscale.h @@ -1,20 +1,20 @@ /* - * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> + * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -55,6 +55,9 @@ #ifndef FF_API_SWS_CPU_CAPS #define FF_API_SWS_CPU_CAPS (LIBSWSCALE_VERSION_MAJOR < 3) #endif +#ifndef FF_API_SWS_FORMAT_NAME +#define FF_API_SWS_FORMAT_NAME (LIBSWSCALE_VERSION_MAJOR < 3) +#endif /** * Returns the LIBSWSCALE_VERSION_INT constant. diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h index 7bfdc7eb6b..44ff166e55 100644 --- a/libswscale/swscale_internal.h +++ b/libswscale/swscale_internal.h @@ -1,20 +1,20 @@ /* - * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> + * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -38,6 +38,8 @@ #define MAX_FILTER_SIZE 256 +#define DITHER1XBPP + #if HAVE_BIGENDIAN #define ALT32_CORR (-1) #else @@ -347,8 +349,8 @@ typedef struct SwsContext { #define V_TEMP "11*8+4*4*256*2+32" #define Y_TEMP "11*8+4*4*256*2+40" #define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48" -#define UV_OFF_PX "11*8+4*4*256*3+48" -#define UV_OFF_BYTE "11*8+4*4*256*3+56" +#define UV_OFF "11*8+4*4*256*3+48" +#define UV_OFFx2 "11*8+4*4*256*3+56" #define DITHER16 "11*8+4*4*256*3+64" #define DITHER32 "11*8+4*4*256*3+80" @@ -376,8 +378,8 @@ typedef struct SwsContext { // alignment of these values is not necessary, but merely here // to maintain the same offset across x8632 and x86-64. Once we // use proper offset macros in the asm, they can be removed. - DECLARE_ALIGNED(8, ptrdiff_t, uv_off_px); ///< offset (in pixels) between u and v planes - DECLARE_ALIGNED(8, ptrdiff_t, uv_off_byte); ///< offset (in bytes) between u and v planes + DECLARE_ALIGNED(8, ptrdiff_t, uv_off); ///< offset (in pixels) between u and v planes + DECLARE_ALIGNED(8, ptrdiff_t, uv_offx2); ///< offset (in bytes) between u and v planes DECLARE_ALIGNED(8, uint16_t, dither16)[8]; DECLARE_ALIGNED(8, uint32_t, dither32)[8]; @@ -411,6 +413,7 @@ typedef struct SwsContext { #if HAVE_VIS DECLARE_ALIGNED(8, uint64_t, sparc_coeffs)[10]; #endif + int use_mmx_vfilter; /* function pointers for swScale() */ yuv2planar1_fn yuv2plane1; @@ -420,12 +423,12 @@ typedef struct SwsContext { yuv2packed2_fn yuv2packed2; yuv2packedX_fn yuv2packedX; - void (*lumToYV12)(uint8_t *dst, const uint8_t *src, + void (*lumToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3, int width, uint32_t *pal); ///< Unscaled conversion of luma plane to YV12 for horizontal scaler. - void (*alpToYV12)(uint8_t *dst, const uint8_t *src, + void (*alpToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3, int width, uint32_t *pal); ///< Unscaled conversion of alpha plane to YV12 for horizontal scaler. void (*chrToYV12)(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, + const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, int width, uint32_t *pal); ///< Unscaled conversion of chroma planes to YV12 for horizontal scaler. /** @@ -529,7 +532,13 @@ SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c); SwsFunc ff_yuv2rgb_get_func_ptr_bfin(SwsContext *c); void ff_bfin_get_unscaled_swscale(SwsContext *c); +#if FF_API_SWS_FORMAT_NAME +/** + * @deprecated Use av_get_pix_fmt_name() instead. + */ +attribute_deprecated const char *sws_format_name(enum PixelFormat format); +#endif #define is16BPS(x) \ (av_pix_fmt_descriptors[x].comp[0].depth_minus1 == 15) @@ -538,6 +547,8 @@ const char *sws_format_name(enum PixelFormat format); (av_pix_fmt_descriptors[x].comp[0].depth_minus1 == 8 || \ av_pix_fmt_descriptors[x].comp[0].depth_minus1 == 9) +#define isNBPS(x) is9_OR_10BPS(x) + #define isBE(x) \ (av_pix_fmt_descriptors[x].flags & PIX_FMT_BE) @@ -551,7 +562,6 @@ const char *sws_format_name(enum PixelFormat format); #define isRGB(x) \ (av_pix_fmt_descriptors[x].flags & PIX_FMT_RGB) - #if 0 // FIXME #define isGray(x) \ (!(av_pix_fmt_descriptors[x].flags & PIX_FMT_PAL) && \ @@ -559,7 +569,7 @@ const char *sws_format_name(enum PixelFormat format); #else #define isGray(x) ( \ (x)==PIX_FMT_GRAY8 \ - || (x)==PIX_FMT_Y400A \ + || (x)==PIX_FMT_GRAY8A \ || (x)==PIX_FMT_GRAY16BE \ || (x)==PIX_FMT_GRAY16LE \ ) @@ -568,6 +578,8 @@ const char *sws_format_name(enum PixelFormat format); #define isRGBinInt(x) ( \ (x)==PIX_FMT_RGB48BE \ || (x)==PIX_FMT_RGB48LE \ + || (x)==PIX_FMT_RGBA64BE \ + || (x)==PIX_FMT_RGBA64LE \ || (x)==PIX_FMT_RGB32 \ || (x)==PIX_FMT_RGB32_1 \ || (x)==PIX_FMT_RGB24 \ @@ -586,6 +598,8 @@ const char *sws_format_name(enum PixelFormat format); #define isBGRinInt(x) ( \ (x)==PIX_FMT_BGR48BE \ || (x)==PIX_FMT_BGR48LE \ + || (x)==PIX_FMT_BGRA64BE \ + || (x)==PIX_FMT_BGRA64LE \ || (x)==PIX_FMT_BGR32 \ || (x)==PIX_FMT_BGR32_1 \ || (x)==PIX_FMT_BGR24 \ @@ -601,18 +615,51 @@ const char *sws_format_name(enum PixelFormat format); || (x)==PIX_FMT_MONOBLACK \ || (x)==PIX_FMT_MONOWHITE \ ) + +#define isRGBinBytes(x) ( \ + (x)==PIX_FMT_RGB48BE \ + || (x)==PIX_FMT_RGB48LE \ + || (x)==PIX_FMT_RGBA64BE \ + || (x)==PIX_FMT_RGBA64LE \ + || (x)==PIX_FMT_RGBA \ + || (x)==PIX_FMT_ARGB \ + || (x)==PIX_FMT_RGB24 \ + ) +#define isBGRinBytes(x) ( \ + (x)==PIX_FMT_BGR48BE \ + || (x)==PIX_FMT_BGR48LE \ + || (x)==PIX_FMT_BGRA64BE \ + || (x)==PIX_FMT_BGRA64LE \ + || (x)==PIX_FMT_BGRA \ + || (x)==PIX_FMT_ABGR \ + || (x)==PIX_FMT_BGR24 \ + ) + #define isAnyRGB(x) ( \ isRGBinInt(x) \ || isBGRinInt(x) \ + || (x)==PIX_FMT_GBR24P \ ) + #define isALPHA(x) \ (av_pix_fmt_descriptors[x].nb_components == 2 || \ av_pix_fmt_descriptors[x].nb_components == 4) +#if 1 +#define isPacked(x) ( \ + (x)==PIX_FMT_PAL8 \ + || (x)==PIX_FMT_YUYV422 \ + || (x)==PIX_FMT_UYVY422 \ + || (x)==PIX_FMT_Y400A \ + || isRGBinInt(x) \ + || isBGRinInt(x) \ + ) +#else #define isPacked(x) \ (av_pix_fmt_descriptors[x].nb_components >= 2 && \ !(av_pix_fmt_descriptors[x].flags & PIX_FMT_PLANAR)) +#endif #define isPlanar(x) \ (av_pix_fmt_descriptors[x].nb_components >= 2 && \ (av_pix_fmt_descriptors[x].flags & PIX_FMT_PLANAR)) @@ -621,6 +668,9 @@ const char *sws_format_name(enum PixelFormat format); extern const uint64_t ff_dither4[2]; extern const uint64_t ff_dither8[2]; +extern const uint8_t dithers[8][8][8]; +extern const uint16_t dither_scale[15][16]; + extern const AVClass sws_context_class; diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c index 86bc5acda6..80ae5fc287 100644 --- a/libswscale/swscale_unscaled.c +++ b/libswscale/swscale_unscaled.c @@ -1,20 +1,20 @@ /* - * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> + * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -34,48 +34,6 @@ #include "libavutil/bswap.h" #include "libavutil/pixdesc.h" -DECLARE_ALIGNED(8, const uint8_t, dither_8x8_1)[8][8] = { - { 0, 1, 0, 1, 0, 1, 0, 1,}, - { 1, 0, 1, 0, 1, 0, 1, 0,}, - { 0, 1, 0, 1, 0, 1, 0, 1,}, - { 1, 0, 1, 0, 1, 0, 1, 0,}, - { 0, 1, 0, 1, 0, 1, 0, 1,}, - { 1, 0, 1, 0, 1, 0, 1, 0,}, - { 0, 1, 0, 1, 0, 1, 0, 1,}, - { 1, 0, 1, 0, 1, 0, 1, 0,}, -}; -DECLARE_ALIGNED(8, const uint8_t, dither_8x8_3)[8][8] = { - { 1, 2, 1, 2, 1, 2, 1, 2,}, - { 3, 0, 3, 0, 3, 0, 3, 0,}, - { 1, 2, 1, 2, 1, 2, 1, 2,}, - { 3, 0, 3, 0, 3, 0, 3, 0,}, - { 1, 2, 1, 2, 1, 2, 1, 2,}, - { 3, 0, 3, 0, 3, 0, 3, 0,}, - { 1, 2, 1, 2, 1, 2, 1, 2,}, - { 3, 0, 3, 0, 3, 0, 3, 0,}, -}; -DECLARE_ALIGNED(8, const uint8_t, dither_8x8_64)[8][8] = { - { 18, 34, 30, 46, 17, 33, 29, 45,}, - { 50, 2, 62, 14, 49, 1, 61, 13,}, - { 26, 42, 22, 38, 25, 41, 21, 37,}, - { 58, 10, 54, 6, 57, 9, 53, 5,}, - { 16, 32, 28, 44, 19, 35, 31, 47,}, - { 48, 0, 60, 12, 51, 3, 63, 15,}, - { 24, 40, 20, 36, 27, 43, 23, 39,}, - { 56, 8, 52, 4, 59, 11, 55, 7,}, -}; -extern const uint8_t dither_8x8_128[8][8]; -DECLARE_ALIGNED(8, const uint8_t, dither_8x8_256)[8][8] = { - { 72, 136, 120, 184, 68, 132, 116, 180,}, - { 200, 8, 248, 56, 196, 4, 244, 52,}, - { 104, 168, 88, 152, 100, 164, 84, 148,}, - { 232, 40, 216, 24, 228, 36, 212, 20,}, - { 64, 128, 102, 176, 76, 140, 124, 188,}, - { 192, 0, 240, 48, 204, 12, 252, 60,}, - { 96, 160, 80, 144, 108, 172, 92, 156,}, - { 224, 32, 208, 16, 236, 44, 220, 28,}, -}; - #define RGB2YUV_SHIFT 15 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5)) #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5)) @@ -263,7 +221,7 @@ static int palToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY; const uint8_t *srcPtr= src[0]; - if (srcFormat == PIX_FMT_Y400A) { + if (srcFormat == PIX_FMT_GRAY8A) { switch (dstFormat) { case PIX_FMT_RGB32 : conv = gray8aToPacked32; break; case PIX_FMT_BGR32 : conv = gray8aToPacked32; break; @@ -285,7 +243,7 @@ static int palToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], if (!conv) av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n", - sws_format_name(srcFormat), sws_format_name(dstFormat)); + av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat)); else { for (i=0; i<srcSliceH; i++) { conv(srcPtr, dstPtr, c->srcW, (uint8_t *) c->pal_rgb); @@ -297,6 +255,92 @@ static int palToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], return srcSliceH; } +static void gbr24ptopacked24(const uint8_t* src[], int srcStride[], uint8_t* dst, int dstStride, int srcSliceH, int width) +{ + int x, h, i; + for (h = 0; h < srcSliceH; h++) { + uint8_t *dest = dst + dstStride * h; + for (x = 0; x < width; x++) { + *dest++ = src[0][x]; + *dest++ = src[1][x]; + *dest++ = src[2][x]; + } + + for (i = 0; i < 3; i++) + src[i] += srcStride[i]; + } +} + +static void gbr24ptopacked32(const uint8_t* src[], int srcStride[], uint8_t* dst, int dstStride, int srcSliceH, int alpha_first, int width) +{ + int x, h, i; + for (h = 0; h < srcSliceH; h++) { + uint8_t *dest = dst + dstStride * h; + + if (alpha_first) { + for (x = 0; x < width; x++) { + *dest++ = 0xff; + *dest++ = src[0][x]; + *dest++ = src[1][x]; + *dest++ = src[2][x]; + } + } else { + for (x = 0; x < width; x++) { + *dest++ = src[0][x]; + *dest++ = src[1][x]; + *dest++ = src[2][x]; + *dest++ = 0xff; + } + } + + for (i = 0; i < 3; i++) + src[i] += srcStride[i]; + } +} + +static int planarRgbToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dst[], int dstStride[]) +{ + int alpha_first = 0; + if (c->srcFormat != PIX_FMT_GBR24P) { + av_log(c, AV_LOG_ERROR, "unsupported planar RGB conversion %s -> %s\n", + av_get_pix_fmt_name(c->srcFormat), av_get_pix_fmt_name(c->dstFormat)); + return srcSliceH; + } + + switch (c->dstFormat) { + case PIX_FMT_BGR24: + gbr24ptopacked24((const uint8_t* []) {src[1], src[0], src[2]}, (int []) {srcStride[1], srcStride[0], srcStride[2]}, + dst[0] + srcSliceY * dstStride[0], dstStride[0], srcSliceH, c->srcW); + break; + + case PIX_FMT_RGB24: + gbr24ptopacked24((const uint8_t* []) {src[2], src[0], src[1]}, (int []) {srcStride[2], srcStride[0], srcStride[1]}, + dst[0] + srcSliceY * dstStride[0], dstStride[0], srcSliceH, c->srcW); + break; + + case PIX_FMT_ARGB: + alpha_first = 1; + case PIX_FMT_RGBA: + gbr24ptopacked32((const uint8_t* []) {src[2], src[0], src[1]}, (int []) {srcStride[2], srcStride[0], srcStride[1]}, + dst[0] + srcSliceY * dstStride[0], dstStride[0], srcSliceH, alpha_first, c->srcW); + break; + + case PIX_FMT_ABGR: + alpha_first = 1; + case PIX_FMT_BGRA: + gbr24ptopacked32((const uint8_t* []) {src[1], src[0], src[2]}, (int []) {srcStride[1], srcStride[0], srcStride[2]}, + dst[0] + srcSliceY * dstStride[0], dstStride[0], srcSliceH, alpha_first, c->srcW); + break; + + default: + av_log(c, AV_LOG_ERROR, "unsupported planar RGB conversion %s -> %s\n", + av_get_pix_fmt_name(c->srcFormat), av_get_pix_fmt_name(c->dstFormat)); + } + + return srcSliceH; +} + #define isRGBA32(x) ( \ (x) == PIX_FMT_ARGB \ || (x) == PIX_FMT_RGBA \ @@ -372,7 +416,7 @@ static int rgbToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], if (!conv) { av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n", - sws_format_name(srcFormat), sws_format_name(dstFormat)); + av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat)); } else { const uint8_t *srcPtr= src[0]; uint8_t *dstPtr= dst[0]; @@ -454,27 +498,28 @@ static int packedCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[ return srcSliceH; } -#define clip9(x) av_clip_uintp2(x, 9) -#define clip10(x) av_clip_uintp2(x, 10) -#define DITHER_COPY(dst, dstStride, wfunc, src, srcStride, rfunc, dithers, shift, clip) \ - for (i = 0; i < height; i++) { \ - const uint8_t *dither = dithers[i & 7]; \ - for (j = 0; j < length - 7; j += 8) { \ - wfunc(&dst[j + 0], clip((rfunc(&src[j + 0]) + dither[0]) >> shift)); \ - wfunc(&dst[j + 1], clip((rfunc(&src[j + 1]) + dither[1]) >> shift)); \ - wfunc(&dst[j + 2], clip((rfunc(&src[j + 2]) + dither[2]) >> shift)); \ - wfunc(&dst[j + 3], clip((rfunc(&src[j + 3]) + dither[3]) >> shift)); \ - wfunc(&dst[j + 4], clip((rfunc(&src[j + 4]) + dither[4]) >> shift)); \ - wfunc(&dst[j + 5], clip((rfunc(&src[j + 5]) + dither[5]) >> shift)); \ - wfunc(&dst[j + 6], clip((rfunc(&src[j + 6]) + dither[6]) >> shift)); \ - wfunc(&dst[j + 7], clip((rfunc(&src[j + 7]) + dither[7]) >> shift)); \ - } \ - for (; j < length; j++) \ - wfunc(&dst[j], (rfunc(&src[j]) + dither[j & 7]) >> shift); \ - dst += dstStride; \ - src += srcStride; \ +#define DITHER_COPY(dst, dstStride, src, srcStride, bswap, dbswap)\ + uint16_t scale= dither_scale[dst_depth-1][src_depth-1];\ + int shift= src_depth-dst_depth + dither_scale[src_depth-2][dst_depth-1];\ + for (i = 0; i < height; i++) {\ + const uint8_t *dither= dithers[src_depth-9][i&7];\ + for (j = 0; j < length-7; j+=8){\ + dst[j+0] = dbswap((bswap(src[j+0]) + dither[0])*scale>>shift);\ + dst[j+1] = dbswap((bswap(src[j+1]) + dither[1])*scale>>shift);\ + dst[j+2] = dbswap((bswap(src[j+2]) + dither[2])*scale>>shift);\ + dst[j+3] = dbswap((bswap(src[j+3]) + dither[3])*scale>>shift);\ + dst[j+4] = dbswap((bswap(src[j+4]) + dither[4])*scale>>shift);\ + dst[j+5] = dbswap((bswap(src[j+5]) + dither[5])*scale>>shift);\ + dst[j+6] = dbswap((bswap(src[j+6]) + dither[6])*scale>>shift);\ + dst[j+7] = dbswap((bswap(src[j+7]) + dither[7])*scale>>shift);\ + }\ + for (; j < length; j++)\ + dst[j] = dbswap((bswap(src[j]) + dither[j&7])*scale>>shift);\ + dst += dstStride;\ + src += srcStride;\ } + static int planarCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[]) { @@ -485,6 +530,7 @@ static int planarCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[ int height= (plane==0 || plane==3) ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample); const uint8_t *srcPtr= src[plane]; uint8_t *dstPtr= dst[plane] + dstStride[plane]*y; + int shiftonly= plane==1 || plane==2 || (!c->srcRange && plane==0); if (!dst[plane]) continue; // ignore palette for GRAY8 @@ -494,151 +540,84 @@ static int planarCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[ length*=2; fillPlane(dst[plane], dstStride[plane], length, height, y, (plane==3) ? 255 : 128); } else { - if(is9_OR_10BPS(c->srcFormat)) { + if(isNBPS(c->srcFormat) || isNBPS(c->dstFormat) + || (is16BPS(c->srcFormat) != is16BPS(c->dstFormat)) + ) { const int src_depth = av_pix_fmt_descriptors[c->srcFormat].comp[plane].depth_minus1+1; const int dst_depth = av_pix_fmt_descriptors[c->dstFormat].comp[plane].depth_minus1+1; const uint16_t *srcPtr2 = (const uint16_t*)srcPtr; + uint16_t *dstPtr2 = (uint16_t*)dstPtr; - if (is16BPS(c->dstFormat)) { - uint16_t *dstPtr2 = (uint16_t*)dstPtr; -#define COPY9_OR_10TO16(rfunc, wfunc) \ - for (i = 0; i < height; i++) { \ - for (j = 0; j < length; j++) { \ - int srcpx = rfunc(&srcPtr2[j]); \ - wfunc(&dstPtr2[j], (srcpx<<(16-src_depth)) | (srcpx>>(2*src_depth-16))); \ - } \ - dstPtr2 += dstStride[plane]/2; \ - srcPtr2 += srcStride[plane]/2; \ + if (dst_depth == 8) { + if(isBE(c->srcFormat) == HAVE_BIGENDIAN){ + DITHER_COPY(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, , ) + } else { + DITHER_COPY(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, av_bswap16, ) } - if (isBE(c->dstFormat)) { - if (isBE(c->srcFormat)) { - COPY9_OR_10TO16(AV_RB16, AV_WB16); - } else { - COPY9_OR_10TO16(AV_RL16, AV_WB16); + } else if (src_depth == 8) { + for (i = 0; i < height; i++) { + #define COPY816(w)\ + if(shiftonly){\ + for (j = 0; j < length; j++)\ + w(&dstPtr2[j], srcPtr[j]<<(dst_depth-8));\ + }else{\ + for (j = 0; j < length; j++)\ + w(&dstPtr2[j], (srcPtr[j]<<(dst_depth-8)) |\ + (srcPtr[j]>>(2*8-dst_depth)));\ } - } else { - if (isBE(c->srcFormat)) { - COPY9_OR_10TO16(AV_RB16, AV_WL16); + if(isBE(c->dstFormat)){ + COPY816(AV_WB16) } else { - COPY9_OR_10TO16(AV_RL16, AV_WL16); + COPY816(AV_WL16) } + dstPtr2 += dstStride[plane]/2; + srcPtr += srcStride[plane]; } - } else if (is9_OR_10BPS(c->dstFormat)) { - uint16_t *dstPtr2 = (uint16_t*)dstPtr; -#define COPY9_OR_10TO9_OR_10(loop) \ - for (i = 0; i < height; i++) { \ - for (j = 0; j < length; j++) { \ - loop; \ - } \ - dstPtr2 += dstStride[plane]/2; \ - srcPtr2 += srcStride[plane]/2; \ - } -#define COPY9_OR_10TO9_OR_10_2(rfunc, wfunc) \ - if (dst_depth > src_depth) { \ - COPY9_OR_10TO9_OR_10(int srcpx = rfunc(&srcPtr2[j]); \ - wfunc(&dstPtr2[j], (srcpx << 1) | (srcpx >> 9))); \ - } else if (dst_depth < src_depth) { \ - DITHER_COPY(dstPtr2, dstStride[plane]/2, wfunc, \ - srcPtr2, srcStride[plane]/2, rfunc, \ - dither_8x8_1, 1, clip9); \ - } else { \ - COPY9_OR_10TO9_OR_10(wfunc(&dstPtr2[j], rfunc(&srcPtr2[j]))); \ - } - if (isBE(c->dstFormat)) { - if (isBE(c->srcFormat)) { - COPY9_OR_10TO9_OR_10_2(AV_RB16, AV_WB16); - } else { - COPY9_OR_10TO9_OR_10_2(AV_RL16, AV_WB16); - } - } else { - if (isBE(c->srcFormat)) { - COPY9_OR_10TO9_OR_10_2(AV_RB16, AV_WL16); + } else if (src_depth <= dst_depth) { + for (i = 0; i < height; i++) { +#define COPY_UP(r,w) \ + if(shiftonly){\ + for (j = 0; j < length; j++){ \ + unsigned int v= r(&srcPtr2[j]);\ + w(&dstPtr2[j], v<<(dst_depth-src_depth));\ + }\ + }else{\ + for (j = 0; j < length; j++){ \ + unsigned int v= r(&srcPtr2[j]);\ + w(&dstPtr2[j], (v<<(dst_depth-src_depth)) | \ + (v>>(2*src_depth-dst_depth)));\ + }\ + } + if(isBE(c->srcFormat)){ + if(isBE(c->dstFormat)){ + COPY_UP(AV_RB16, AV_WB16) + } else { + COPY_UP(AV_RB16, AV_WL16) + } } else { - COPY9_OR_10TO9_OR_10_2(AV_RL16, AV_WL16); + if(isBE(c->dstFormat)){ + COPY_UP(AV_RL16, AV_WB16) + } else { + COPY_UP(AV_RL16, AV_WL16) + } } + dstPtr2 += dstStride[plane]/2; + srcPtr2 += srcStride[plane]/2; } } else { -#define W8(a, b) { *(a) = (b); } -#define COPY9_OR_10TO8(rfunc) \ - if (src_depth == 9) { \ - DITHER_COPY(dstPtr, dstStride[plane], W8, \ - srcPtr2, srcStride[plane]/2, rfunc, \ - dither_8x8_1, 1, av_clip_uint8); \ - } else { \ - DITHER_COPY(dstPtr, dstStride[plane], W8, \ - srcPtr2, srcStride[plane]/2, rfunc, \ - dither_8x8_3, 2, av_clip_uint8); \ - } - if (isBE(c->srcFormat)) { - COPY9_OR_10TO8(AV_RB16); - } else { - COPY9_OR_10TO8(AV_RL16); - } - } - } else if(is9_OR_10BPS(c->dstFormat)) { - const int dst_depth = av_pix_fmt_descriptors[c->dstFormat].comp[plane].depth_minus1+1; - uint16_t *dstPtr2 = (uint16_t*)dstPtr; - - if (is16BPS(c->srcFormat)) { - const uint16_t *srcPtr2 = (const uint16_t*)srcPtr; -#define COPY16TO9_OR_10(rfunc, wfunc) \ - if (dst_depth == 9) { \ - DITHER_COPY(dstPtr2, dstStride[plane]/2, wfunc, \ - srcPtr2, srcStride[plane]/2, rfunc, \ - dither_8x8_128, 7, clip9); \ - } else { \ - DITHER_COPY(dstPtr2, dstStride[plane]/2, wfunc, \ - srcPtr2, srcStride[plane]/2, rfunc, \ - dither_8x8_64, 6, clip10); \ - } - if (isBE(c->dstFormat)) { - if (isBE(c->srcFormat)) { - COPY16TO9_OR_10(AV_RB16, AV_WB16); + if(isBE(c->srcFormat) == HAVE_BIGENDIAN){ + if(isBE(c->dstFormat) == HAVE_BIGENDIAN){ + DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , ) } else { - COPY16TO9_OR_10(AV_RL16, AV_WB16); + DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , av_bswap16) } - } else { - if (isBE(c->srcFormat)) { - COPY16TO9_OR_10(AV_RB16, AV_WL16); + }else{ + if(isBE(c->dstFormat) == HAVE_BIGENDIAN){ + DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, ) } else { - COPY16TO9_OR_10(AV_RL16, AV_WL16); + DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, av_bswap16) } } - } else /* 8bit */ { -#define COPY8TO9_OR_10(wfunc) \ - for (i = 0; i < height; i++) { \ - for (j = 0; j < length; j++) { \ - const int srcpx = srcPtr[j]; \ - wfunc(&dstPtr2[j], (srcpx<<(dst_depth-8)) | (srcpx >> (16-dst_depth))); \ - } \ - dstPtr2 += dstStride[plane]/2; \ - srcPtr += srcStride[plane]; \ - } - if (isBE(c->dstFormat)) { - COPY8TO9_OR_10(AV_WB16); - } else { - COPY8TO9_OR_10(AV_WL16); - } - } - } else if(is16BPS(c->srcFormat) && !is16BPS(c->dstFormat)) { - const uint16_t *srcPtr2 = (const uint16_t*)srcPtr; -#define COPY16TO8(rfunc) \ - DITHER_COPY(dstPtr, dstStride[plane], W8, \ - srcPtr2, srcStride[plane]/2, rfunc, \ - dither_8x8_256, 8, av_clip_uint8); - if (isBE(c->srcFormat)) { - COPY16TO8(AV_RB16); - } else { - COPY16TO8(AV_RL16); - } - } else if(!is16BPS(c->srcFormat) && is16BPS(c->dstFormat)) { - for (i=0; i<height; i++) { - for (j=0; j<length; j++) { - dstPtr[ j<<1 ] = srcPtr[j]; - dstPtr[(j<<1)+1] = srcPtr[j]; - } - srcPtr+= srcStride[plane]; - dstPtr+= dstStride[plane]; } } else if(is16BPS(c->srcFormat) && is16BPS(c->dstFormat) && isBE(c->srcFormat) != isBE(c->dstFormat)) { @@ -715,13 +694,18 @@ void ff_get_unscaled_swscale(SwsContext *c) && (!needsDither || (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)))) c->swScale= rgbToRgbWrapper; - if ((usePal(srcFormat) && ( - dstFormat == PIX_FMT_RGB32 || - dstFormat == PIX_FMT_RGB32_1 || - dstFormat == PIX_FMT_RGB24 || - dstFormat == PIX_FMT_BGR32 || - dstFormat == PIX_FMT_BGR32_1 || - dstFormat == PIX_FMT_BGR24))) +#define isByteRGB(f) (\ + f == PIX_FMT_RGB32 ||\ + f == PIX_FMT_RGB32_1 ||\ + f == PIX_FMT_RGB24 ||\ + f == PIX_FMT_BGR32 ||\ + f == PIX_FMT_BGR32_1 ||\ + f == PIX_FMT_BGR24) + + if (isAnyRGB(srcFormat) && isPlanar(srcFormat) && isByteRGB(dstFormat)) + c->swScale= planarRgbToRgbWrapper; + + if (usePal(srcFormat) && isByteRGB(dstFormat)) c->swScale= palToRgbWrapper; if (srcFormat == PIX_FMT_YUV422P) { @@ -787,7 +771,7 @@ static void reset_ptr(const uint8_t* src[], int format) } } -static int check_image_pointers(uint8_t *data[4], enum PixelFormat pix_fmt, +static int check_image_pointers(const uint8_t * const data[4], enum PixelFormat pix_fmt, const int linesizes[4]) { const AVPixFmtDescriptor *desc = &av_pix_fmt_descriptors[pix_fmt]; @@ -822,7 +806,7 @@ int attribute_align_arg sws_scale(struct SwsContext *c, const uint8_t* const src av_log(c, AV_LOG_ERROR, "bad src image pointers\n"); return 0; } - if (!check_image_pointers(dst, c->dstFormat, dstStride)) { + if (!check_image_pointers((const uint8_t* const*)dst, c->dstFormat, dstStride)) { av_log(c, AV_LOG_ERROR, "bad dst image pointers\n"); return 0; } @@ -837,9 +821,10 @@ int attribute_align_arg sws_scale(struct SwsContext *c, const uint8_t* const src if (usePal(c->srcFormat)) { for (i=0; i<256; i++) { - int p, r, g, b,y,u,v; + int p, r, g, b, y, u, v, a = 0xff; if(c->srcFormat == PIX_FMT_PAL8) { p=((const uint32_t*)(srcSlice[1]))[i]; + a= (p>>24)&0xFF; r= (p>>16)&0xFF; g= (p>> 8)&0xFF; b= p &0xFF; @@ -855,7 +840,7 @@ int attribute_align_arg sws_scale(struct SwsContext *c, const uint8_t* const src r= (i>>3 )*255; g= ((i>>1)&3)*85; b= (i&1 )*255; - } else if(c->srcFormat == PIX_FMT_GRAY8 || c->srcFormat == PIX_FMT_Y400A) { + } else if(c->srcFormat == PIX_FMT_GRAY8 || c->srcFormat == PIX_FMT_GRAY8A) { r = g = b = i; } else { assert(c->srcFormat == PIX_FMT_BGR4_BYTE); @@ -866,33 +851,33 @@ int attribute_align_arg sws_scale(struct SwsContext *c, const uint8_t* const src y= av_clip_uint8((RY*r + GY*g + BY*b + ( 33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT); u= av_clip_uint8((RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT); v= av_clip_uint8((RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT); - c->pal_yuv[i]= y + (u<<8) + (v<<16); + c->pal_yuv[i]= y + (u<<8) + (v<<16) + (a<<24); switch(c->dstFormat) { case PIX_FMT_BGR32: #if !HAVE_BIGENDIAN case PIX_FMT_RGB24: #endif - c->pal_rgb[i]= r + (g<<8) + (b<<16); + c->pal_rgb[i]= r + (g<<8) + (b<<16) + (a<<24); break; case PIX_FMT_BGR32_1: #if HAVE_BIGENDIAN case PIX_FMT_BGR24: #endif - c->pal_rgb[i]= (r + (g<<8) + (b<<16)) << 8; + c->pal_rgb[i]= a + (r<<8) + (g<<16) + (b<<24); break; case PIX_FMT_RGB32_1: #if HAVE_BIGENDIAN case PIX_FMT_RGB24: #endif - c->pal_rgb[i]= (b + (g<<8) + (r<<16)) << 8; + c->pal_rgb[i]= a + (b<<8) + (g<<16) + (r<<24); break; case PIX_FMT_RGB32: #if !HAVE_BIGENDIAN case PIX_FMT_BGR24: #endif default: - c->pal_rgb[i]= b + (g<<8) + (r<<16); + c->pal_rgb[i]= b + (g<<8) + (r<<16) + (a<<24); } } } @@ -904,7 +889,7 @@ int attribute_align_arg sws_scale(struct SwsContext *c, const uint8_t* const src int dstStride2[4]= {dstStride[0], dstStride[1], dstStride[2], dstStride[3]}; reset_ptr(src2, c->srcFormat); - reset_ptr((const uint8_t**)dst2, c->dstFormat); + reset_ptr((void*)dst2, c->dstFormat); /* reset slice direction at end of frame */ if (srcSliceY + srcSliceH == c->srcH) @@ -927,7 +912,7 @@ int attribute_align_arg sws_scale(struct SwsContext *c, const uint8_t* const src dst2[3] += ( c->dstH -1)*dstStride[3]; reset_ptr(src2, c->srcFormat); - reset_ptr((const uint8_t**)dst2, c->dstFormat); + reset_ptr((void*)dst2, c->dstFormat); /* reset slice direction at end of frame */ if (!srcSliceY) diff --git a/libswscale/utils.c b/libswscale/utils.c index b644ed9610..3240b96030 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -1,24 +1,25 @@ /* * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #define _SVID_SOURCE //needed for MAP_ANONYMOUS +#define _DARWIN_C_SOURCE // needed for MAP_ANON #include <inttypes.h> #include <string.h> #include <math.h> @@ -46,6 +47,7 @@ #include "libavutil/mathematics.h" #include "libavutil/opt.h" #include "libavutil/pixdesc.h" +#include "libavutil/avassert.h" unsigned swscale_version(void) { @@ -54,13 +56,13 @@ unsigned swscale_version(void) const char *swscale_configuration(void) { - return LIBAV_CONFIGURATION; + return FFMPEG_CONFIGURATION; } const char *swscale_license(void) { #define LICENSE_PREFIX "libswscale license: " - return LICENSE_PREFIX LIBAV_LICENSE + sizeof(LICENSE_PREFIX) - 1; + return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1; } #define RET 0xC3 //near return opcode for x86 @@ -99,6 +101,10 @@ const static FormatEntry format_entries[PIX_FMT_NB] = { [PIX_FMT_RGBA] = { 1 , 1 }, [PIX_FMT_ABGR] = { 1 , 1 }, [PIX_FMT_BGRA] = { 1 , 1 }, + [PIX_FMT_0RGB] = { 1 , 1 }, + [PIX_FMT_RGB0] = { 1 , 1 }, + [PIX_FMT_0BGR] = { 1 , 1 }, + [PIX_FMT_BGR0] = { 1 , 1 }, [PIX_FMT_GRAY16BE] = { 1 , 1 }, [PIX_FMT_GRAY16LE] = { 1 , 1 }, [PIX_FMT_YUV440P] = { 1 , 1 }, @@ -106,6 +112,8 @@ const static FormatEntry format_entries[PIX_FMT_NB] = { [PIX_FMT_YUVA420P] = { 1 , 1 }, [PIX_FMT_RGB48BE] = { 1 , 1 }, [PIX_FMT_RGB48LE] = { 1 , 1 }, + [PIX_FMT_RGBA64BE] = { 0 , 0 }, + [PIX_FMT_RGBA64LE] = { 0 , 0 }, [PIX_FMT_RGB565BE] = { 1 , 1 }, [PIX_FMT_RGB565LE] = { 1 , 1 }, [PIX_FMT_RGB555BE] = { 1 , 1 }, @@ -127,6 +135,8 @@ const static FormatEntry format_entries[PIX_FMT_NB] = { [PIX_FMT_Y400A] = { 1 , 0 }, [PIX_FMT_BGR48BE] = { 1 , 1 }, [PIX_FMT_BGR48LE] = { 1 , 1 }, + [PIX_FMT_BGRA64BE] = { 0 , 0 }, + [PIX_FMT_BGRA64LE] = { 0 , 0 }, [PIX_FMT_YUV420P9BE] = { 1 , 1 }, [PIX_FMT_YUV420P9LE] = { 1 , 1 }, [PIX_FMT_YUV420P10BE] = { 1 , 1 }, @@ -139,6 +149,7 @@ const static FormatEntry format_entries[PIX_FMT_NB] = { [PIX_FMT_YUV444P9LE] = { 1 , 1 }, [PIX_FMT_YUV444P10BE] = { 1 , 1 }, [PIX_FMT_YUV444P10LE] = { 1 , 1 }, + [PIX_FMT_GBR24P] = { 1 , 0 }, [PIX_FMT_GBRP] = { 1 , 0 }, [PIX_FMT_GBRP9LE] = { 1 , 0 }, [PIX_FMT_GBRP9BE] = { 1 , 0 }, @@ -162,13 +173,12 @@ int sws_isSupportedOutput(enum PixelFormat pix_fmt) extern const int32_t ff_yuv2rgb_coeffs[8][4]; +#if FF_API_SWS_FORMAT_NAME const char *sws_format_name(enum PixelFormat format) { - if ((unsigned)format < PIX_FMT_NB && av_pix_fmt_descriptors[format].name) - return av_pix_fmt_descriptors[format].name; - else - return "Unknown format"; + return av_get_pix_fmt_name(format); } +#endif static double getSplineCoeff(double a, double b, double c, double d, double dist) { @@ -717,7 +727,7 @@ int sws_getColorspaceDetails(struct SwsContext *c, int **inv_table, int *srcRange, int **table, int *dstRange, int *brightness, int *contrast, int *saturation) { - if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1; + if (!c || isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1; *inv_table = c->srcColorspaceTable; *table = c->dstColorspaceTable; @@ -737,6 +747,10 @@ static int handle_jpeg(enum PixelFormat *format) case PIX_FMT_YUVJ422P: *format = PIX_FMT_YUV422P; return 1; case PIX_FMT_YUVJ444P: *format = PIX_FMT_YUV444P; return 1; case PIX_FMT_YUVJ440P: *format = PIX_FMT_YUV440P; return 1; + case PIX_FMT_0BGR : *format = PIX_FMT_ABGR ; return 0; + case PIX_FMT_BGR0 : *format = PIX_FMT_BGRA ; return 0; + case PIX_FMT_0RGB : *format = PIX_FMT_ARGB ; return 0; + case PIX_FMT_RGB0 : *format = PIX_FMT_RGBA ; return 0; default: return 0; } } @@ -753,7 +767,7 @@ SwsContext *sws_alloc_context(void) int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) { - int i; + int i, j; int usesVFilter, usesHFilter; int unscaled; SwsFilter dummyFilter= {NULL, NULL, NULL, NULL}; @@ -761,7 +775,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) int srcH= c->srcH; int dstW= c->dstW; int dstH= c->dstH; - int dst_stride = FFALIGN(dstW * sizeof(int16_t) + 16, 16), dst_stride_px = dst_stride >> 1; + int dst_stride = FFALIGN(dstW * sizeof(int16_t)+66, 16); int flags, cpu_flags; enum PixelFormat srcFormat= c->srcFormat; enum PixelFormat dstFormat= c->dstFormat; @@ -773,12 +787,21 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) unscaled = (srcW == dstW && srcH == dstH); + handle_jpeg(&srcFormat); + handle_jpeg(&dstFormat); + + if(srcFormat!=c->srcFormat || dstFormat!=c->dstFormat){ + av_log(c, AV_LOG_WARNING, "deprecated pixel format used, make sure you did set range correctly\n"); + c->srcFormat= srcFormat; + c->dstFormat= dstFormat; + } + if (!sws_isSupportedInput(srcFormat)) { - av_log(c, AV_LOG_ERROR, "%s is not supported as input pixel format\n", sws_format_name(srcFormat)); + av_log(c, AV_LOG_ERROR, "%s is not supported as input pixel format\n", av_get_pix_fmt_name(srcFormat)); return AVERROR(EINVAL); } if (!sws_isSupportedOutput(dstFormat)) { - av_log(c, AV_LOG_ERROR, "%s is not supported as output pixel format\n", sws_format_name(dstFormat)); + av_log(c, AV_LOG_ERROR, "%s is not supported as output pixel format\n", av_get_pix_fmt_name(dstFormat)); return AVERROR(EINVAL); } @@ -826,19 +849,6 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat); // reuse chroma for 2 pixels RGB/BGR unless user wants full chroma interpolation - if (flags & SWS_FULL_CHR_H_INT && - dstFormat != PIX_FMT_RGBA && - dstFormat != PIX_FMT_ARGB && - dstFormat != PIX_FMT_BGRA && - dstFormat != PIX_FMT_ABGR && - dstFormat != PIX_FMT_RGB24 && - dstFormat != PIX_FMT_BGR24) { - av_log(c, AV_LOG_ERROR, - "full chroma interpolation for destination format '%s' not yet implemented\n", - sws_format_name(dstFormat)); - flags &= ~SWS_FULL_CHR_H_INT; - c->flags = flags; - } if (isAnyRGB(dstFormat) && !(flags&SWS_FULL_CHR_H_INT)) c->chrDstHSubSample=1; // drop some chroma lines if the user wants it @@ -866,7 +876,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) if (c->swScale) { if (flags&SWS_PRINT_INFO) av_log(c, AV_LOG_INFO, "using unscaled %s -> %s special converter\n", - sws_format_name(srcFormat), sws_format_name(dstFormat)); + av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat)); return 0; } } @@ -877,18 +887,18 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) c->dstBpc = 1 + av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1; if (c->dstBpc < 8) c->dstBpc = 8; + if (isAnyRGB(srcFormat) || srcFormat == PIX_FMT_PAL8) + c->srcBpc = 16; if (c->dstBpc == 16) dst_stride <<= 1; - FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, - (FFALIGN(srcW, 16) * 2 * FFALIGN(c->srcBpc, 8) >> 3) + 16, - fail); + FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW*2+78, 16) * 2, fail); if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2 && c->srcBpc == 8 && c->dstBpc <= 10) { c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0; if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR)) { if (flags&SWS_PRINT_INFO) av_log(c, AV_LOG_INFO, "output width is not a multiple of 32 -> no MMX2 scaler\n"); } - if (usesHFilter) c->canMMX2BeUsed=0; + if (usesHFilter || isNBPS(c->srcFormat) || is16BPS(c->srcFormat) || isAnyRGB(c->srcFormat)) c->canMMX2BeUsed=0; } else c->canMMX2BeUsed=0; @@ -908,7 +918,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) c->chrXInc+= 20; } //we don't use the x86 asm scaler if MMX is available - else if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) { + else if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX && c->dstBpc <= 10) { c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20; c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20; } @@ -933,15 +943,19 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) c->chrMmx2FilterCode = av_malloc(c->chrMmx2FilterCodeSize); #endif +#ifdef MAP_ANONYMOUS + if (c->lumMmx2FilterCode == MAP_FAILED || c->chrMmx2FilterCode == MAP_FAILED) +#else if (!c->lumMmx2FilterCode || !c->chrMmx2FilterCode) +#endif return AVERROR(ENOMEM); FF_ALLOCZ_OR_GOTO(c, c->hLumFilter , (dstW /8+8)*sizeof(int16_t), fail); FF_ALLOCZ_OR_GOTO(c, c->hChrFilter , (c->chrDstW /4+8)*sizeof(int16_t), fail); FF_ALLOCZ_OR_GOTO(c, c->hLumFilterPos, (dstW /2/8+8)*sizeof(int32_t), fail); FF_ALLOCZ_OR_GOTO(c, c->hChrFilterPos, (c->chrDstW/2/4+8)*sizeof(int32_t), fail); - initMMX2HScaler( dstW, c->lumXInc, c->lumMmx2FilterCode, c->hLumFilter, c->hLumFilterPos, 8); - initMMX2HScaler(c->chrDstW, c->chrXInc, c->chrMmx2FilterCode, c->hChrFilter, c->hChrFilterPos, 4); + initMMX2HScaler( dstW, c->lumXInc, c->lumMmx2FilterCode, c->hLumFilter, (uint32_t*)c->hLumFilterPos, 8); + initMMX2HScaler(c->chrDstW, c->chrXInc, c->chrMmx2FilterCode, c->hChrFilter, (uint32_t*)c->hChrFilterPos, 4); #ifdef MAP_ANONYMOUS mprotect(c->lumMmx2FilterCode, c->lumMmx2FilterCodeSize, PROT_EXEC | PROT_READ); @@ -1010,7 +1024,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) c->vLumBufSize= c->vLumFilterSize; c->vChrBufSize= c->vChrFilterSize; for (i=0; i<dstH; i++) { - int chrI= i*c->chrDstH / dstH; + int chrI= (int64_t)i*c->chrDstH / dstH; int nextSlice= FFMAX(c->vLumFilterPos[i ] + c->vLumFilterSize - 1, ((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)<<c->chrSrcVSubSample)); @@ -1035,9 +1049,9 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) FF_ALLOCZ_OR_GOTO(c, c->lumPixBuf[i+c->vLumBufSize], dst_stride+16, fail); c->lumPixBuf[i] = c->lumPixBuf[i+c->vLumBufSize]; } - // 64 / (c->dstBpc & ~7) is the same as 16 / sizeof(scaling_intermediate) - c->uv_off_px = dst_stride_px + 64 / (c->dstBpc &~ 7); - c->uv_off_byte = dst_stride + 16; + // 64 / c->scalingBpp is the same as 16 / sizeof(scaling_intermediate) + c->uv_off = (dst_stride>>1) + 64 / (c->dstBpc &~ 7); + c->uv_offx2 = dst_stride + 16; for (i=0; i<c->vChrBufSize; i++) { FF_ALLOC_OR_GOTO(c, c->chrUPixBuf[i+c->vChrBufSize], dst_stride*2+32, fail); c->chrUPixBuf[i] = c->chrUPixBuf[i+c->vChrBufSize]; @@ -1051,7 +1065,13 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) //try to avoid drawing green stuff between the right end and the stride end for (i=0; i<c->vChrBufSize; i++) - memset(c->chrUPixBuf[i], 64, dst_stride*2+1); + if(av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 == 15){ + av_assert0(c->dstBpc > 10); + for(j=0; j<dst_stride/2+1; j++) + ((int32_t*)(c->chrUPixBuf[i]))[j] = 1<<18; + } else + for(j=0; j<dst_stride+1; j++) + ((int16_t*)(c->chrUPixBuf[i]))[j] = 1<<14; assert(c->chrDstH <= dstH); @@ -1070,7 +1090,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) else av_log(c, AV_LOG_INFO, "ehh flags invalid?! "); av_log(c, AV_LOG_INFO, "from %s to %s%s ", - sws_format_name(srcFormat), + av_get_pix_fmt_name(srcFormat), #ifdef DITHER1XBPP dstFormat == PIX_FMT_BGR555 || dstFormat == PIX_FMT_BGR565 || dstFormat == PIX_FMT_RGB444BE || dstFormat == PIX_FMT_RGB444LE || @@ -1078,7 +1098,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) #else "", #endif - sws_format_name(dstFormat)); + av_get_pix_fmt_name(dstFormat)); if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2) av_log(c, AV_LOG_INFO, "using MMX2\n"); else if (HAVE_AMD3DNOW && cpu_flags & AV_CPU_FLAG_3DNOW) av_log(c, AV_LOG_INFO, "using 3DNOW\n"); @@ -1479,7 +1499,7 @@ void sws_freeContext(SwsContext *c) #endif /* HAVE_MMX */ av_freep(&c->yuvTable); - av_free(c->formatConvBuffer); + av_freep(&c->formatConvBuffer); av_free(c); } diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c index 282618c301..9359f0b1f3 100644 --- a/libswscale/x86/rgb2rgb.c +++ b/libswscale/x86/rgb2rgb.c @@ -6,20 +6,20 @@ * Written by Nick Kurshev. * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -68,6 +68,9 @@ DECLARE_ASM_CONST(8, uint64_t, blue_16mask) = 0x0000001f0000001fULL; DECLARE_ASM_CONST(8, uint64_t, red_15mask) = 0x00007c0000007c00ULL; DECLARE_ASM_CONST(8, uint64_t, green_15mask) = 0x000003e0000003e0ULL; DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL; +DECLARE_ASM_CONST(8, uint64_t, mul15_mid) = 0x4200420042004200ULL; +DECLARE_ASM_CONST(8, uint64_t, mul15_hi) = 0x0210021002100210ULL; +DECLARE_ASM_CONST(8, uint64_t, mul16_mid) = 0x2080208020802080ULL; #define RGB2YUV_SHIFT 8 #define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5)) diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c index c255610193..3bca43c42e 100644 --- a/libswscale/x86/rgb2rgb_template.c +++ b/libswscale/x86/rgb2rgb_template.c @@ -7,20 +7,20 @@ * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at) * lot of big-endian byte order fixes by Alex Beregszaszi * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -801,27 +801,6 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_s } } -/* - I use less accurate approximation here by simply left-shifting the input - value and filling the low order bits with zeroes. This method improves PNG - compression but this scheme cannot reproduce white exactly, since it does - not generate an all-ones maximum value; the net effect is to darken the - image slightly. - - The better method should be "left bit replication": - - 4 3 2 1 0 - --------- - 1 1 0 1 1 - - 7 6 5 4 3 2 1 0 - ---------------- - 1 1 0 1 1 1 1 0 - |=======| |===| - | leftmost bits repeated to fill open bits - | - original bits -*/ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size) { const uint16_t *end; @@ -840,9 +819,10 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" - "psllq $3, %%mm0 \n\t" - "psrlq $2, %%mm1 \n\t" - "psrlq $7, %%mm2 \n\t" + "psllq $5, %%mm0 \n\t" + "pmulhw %6, %%mm0 \n\t" + "pmulhw %6, %%mm1 \n\t" + "pmulhw %7, %%mm2 \n\t" "movq %%mm0, %%mm3 \n\t" "movq %%mm1, %%mm4 \n\t" "movq %%mm2, %%mm5 \n\t" @@ -870,9 +850,10 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" - "psllq $3, %%mm0 \n\t" - "psrlq $2, %%mm1 \n\t" - "psrlq $7, %%mm2 \n\t" + "psllq $5, %%mm0 \n\t" + "pmulhw %6, %%mm0 \n\t" + "pmulhw %6, %%mm1 \n\t" + "pmulhw %7, %%mm2 \n\t" "movq %%mm0, %%mm3 \n\t" "movq %%mm1, %%mm4 \n\t" "movq %%mm2, %%mm5 \n\t" @@ -892,7 +873,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr "por %%mm5, %%mm3 \n\t" :"=m"(*d) - :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null) + :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r),"m"(mmx_null),"m"(mul15_mid),"m"(mul15_hi) :"memory"); /* borrowed 32 to 24 */ __asm__ volatile( @@ -919,9 +900,9 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr while (s < end) { register uint16_t bgr; bgr = *s++; - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x3E0)>>2; - *d++ = (bgr&0x7C00)>>7; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7); + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); } } @@ -943,9 +924,11 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" - "psllq $3, %%mm0 \n\t" - "psrlq $3, %%mm1 \n\t" - "psrlq $8, %%mm2 \n\t" + "psllq $5, %%mm0 \n\t" + "psrlq $1, %%mm2 \n\t" + "pmulhw %6, %%mm0 \n\t" + "pmulhw %8, %%mm1 \n\t" + "pmulhw %7, %%mm2 \n\t" "movq %%mm0, %%mm3 \n\t" "movq %%mm1, %%mm4 \n\t" "movq %%mm2, %%mm5 \n\t" @@ -973,9 +956,11 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" - "psllq $3, %%mm0 \n\t" - "psrlq $3, %%mm1 \n\t" - "psrlq $8, %%mm2 \n\t" + "psllq $5, %%mm0 \n\t" + "psrlq $1, %%mm2 \n\t" + "pmulhw %6, %%mm0 \n\t" + "pmulhw %8, %%mm1 \n\t" + "pmulhw %7, %%mm2 \n\t" "movq %%mm0, %%mm3 \n\t" "movq %%mm1, %%mm4 \n\t" "movq %%mm2, %%mm5 \n\t" @@ -994,7 +979,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr "por %%mm4, %%mm3 \n\t" "por %%mm5, %%mm3 \n\t" :"=m"(*d) - :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null) + :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null),"m"(mul15_mid),"m"(mul15_hi),"m"(mul16_mid) :"memory"); /* borrowed 32 to 24 */ __asm__ volatile( @@ -1021,9 +1006,9 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr while (s < end) { register uint16_t bgr; bgr = *s++; - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x7E0)>>3; - *d++ = (bgr&0xF800)>>8; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9); + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); } } @@ -1066,12 +1051,13 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" - "psllq $3, %%mm0 \n\t" - "psrlq $2, %%mm1 \n\t" - "psrlq $7, %%mm2 \n\t" + "psllq $5, %%mm0 \n\t" + "pmulhw %5, %%mm0 \n\t" + "pmulhw %5, %%mm1 \n\t" + "pmulhw %6, %%mm2 \n\t" PACK_RGB32 :"=m"(*d) - :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r) + :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r),"m"(mul15_mid),"m"(mul15_hi) :"memory"); d += 16; s += 4; @@ -1081,9 +1067,9 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s while (s < end) { register uint16_t bgr; bgr = *s++; - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x3E0)>>2; - *d++ = (bgr&0x7C00)>>7; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7); + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12); *d++ = 255; } } @@ -1108,12 +1094,14 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" - "psllq $3, %%mm0 \n\t" - "psrlq $3, %%mm1 \n\t" - "psrlq $8, %%mm2 \n\t" + "psllq $5, %%mm0 \n\t" + "psrlq $1, %%mm2 \n\t" + "pmulhw %5, %%mm0 \n\t" + "pmulhw %7, %%mm1 \n\t" + "pmulhw %6, %%mm2 \n\t" PACK_RGB32 :"=m"(*d) - :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r) + :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mul15_mid),"m"(mul15_hi),"m"(mul16_mid) :"memory"); d += 16; s += 4; @@ -1123,9 +1111,9 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s while (s < end) { register uint16_t bgr; bgr = *s++; - *d++ = (bgr&0x1F)<<3; - *d++ = (bgr&0x7E0)>>3; - *d++ = (bgr&0xF800)>>8; + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2); + *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9); + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13); *d++ = 255; } } diff --git a/libswscale/x86/scale.asm b/libswscale/x86/scale.asm index 3bdf71b542..e8a5e5cd7c 100644 --- a/libswscale/x86/scale.asm +++ b/libswscale/x86/scale.asm @@ -423,11 +423,13 @@ SCALE_FUNC %1, %2, X, X8, %3, 7, %4 SCALE_FUNCS 8, 15, %1, %2 SCALE_FUNCS 9, 15, %1, %3 SCALE_FUNCS 10, 15, %1, %3 +SCALE_FUNCS 14, 15, %1, %3 SCALE_FUNCS 16, 15, %1, %4 %endif ; !sse4 SCALE_FUNCS 8, 19, %1, %2 SCALE_FUNCS 9, 19, %1, %3 SCALE_FUNCS 10, 19, %1, %3 +SCALE_FUNCS 14, 19, %1, %3 SCALE_FUNCS 16, 19, %1, %4 %endmacro diff --git a/libswscale/x86/swscale_mmx.c b/libswscale/x86/swscale_mmx.c index b3f1deae60..360ec2a1c7 100644 --- a/libswscale/x86/swscale_mmx.c +++ b/libswscale/x86/swscale_mmx.c @@ -1,20 +1,20 @@ /* - * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> + * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -72,14 +72,14 @@ DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY1Coeff) = 0x0C88000040870C88ULL; DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY2Coeff) = 0x20DE4087000020DEULL; DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY1Coeff) = 0x20DE0000408720DEULL; DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY2Coeff) = 0x0C88408700000C88ULL; -DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toYOffset) = 0x0008400000084000ULL; +DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toYOffset) = 0x0008010000080100ULL; DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUV)[2][4] = { {0x38380000DAC83838ULL, 0xECFFDAC80000ECFFULL, 0xF6E40000D0E3F6E4ULL, 0x3838D0E300003838ULL}, {0xECFF0000DAC8ECFFULL, 0x3838DAC800003838ULL, 0x38380000D0E33838ULL, 0xF6E4D0E30000F6E4ULL}, }; -DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUVOffset)= 0x0040400000404000ULL; +DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUVOffset)= 0x0040010000400100ULL; //MMX versions #if HAVE_MMX @@ -176,6 +176,67 @@ void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufI } } +#if HAVE_MMX2 +static void yuv2yuvX_sse3(const int16_t *filter, int filterSize, + const int16_t **src, uint8_t *dest, int dstW, + const uint8_t *dither, int offset) +{ + if(((int)dest) & 15){ + return yuv2yuvX_MMX2(filter, filterSize, src, dest, dstW, dither, offset); + } + if (offset) { + __asm__ volatile("movq (%0), %%xmm3\n\t" + "movdqa %%xmm3, %%xmm4\n\t" + "psrlq $24, %%xmm3\n\t" + "psllq $40, %%xmm4\n\t" + "por %%xmm4, %%xmm3\n\t" + :: "r"(dither) + ); + } else { + __asm__ volatile("movq (%0), %%xmm3\n\t" + :: "r"(dither) + ); + } + __asm__ volatile( + "pxor %%xmm0, %%xmm0\n\t" + "punpcklbw %%xmm0, %%xmm3\n\t" + "psraw $4, %%xmm3\n\t" + "movdqa %%xmm3, %%xmm4\n\t" + "movdqa %%xmm3, %%xmm7\n\t" + "movl %3, %%ecx\n\t" + "mov %0, %%"REG_d" \n\t"\ + "mov (%%"REG_d"), %%"REG_S" \n\t"\ + ".p2align 4 \n\t" /* FIXME Unroll? */\ + "1: \n\t"\ + "movddup 8(%%"REG_d"), %%xmm0 \n\t" /* filterCoeff */\ + "movdqa (%%"REG_S", %%"REG_c", 2), %%xmm2 \n\t" /* srcData */\ + "movdqa 16(%%"REG_S", %%"REG_c", 2), %%xmm5 \n\t" /* srcData */\ + "add $16, %%"REG_d" \n\t"\ + "mov (%%"REG_d"), %%"REG_S" \n\t"\ + "test %%"REG_S", %%"REG_S" \n\t"\ + "pmulhw %%xmm0, %%xmm2 \n\t"\ + "pmulhw %%xmm0, %%xmm5 \n\t"\ + "paddw %%xmm2, %%xmm3 \n\t"\ + "paddw %%xmm5, %%xmm4 \n\t"\ + " jnz 1b \n\t"\ + "psraw $3, %%xmm3 \n\t"\ + "psraw $3, %%xmm4 \n\t"\ + "packuswb %%xmm4, %%xmm3 \n\t" + "movntdq %%xmm3, (%1, %%"REG_c")\n\t" + "add $16, %%"REG_c" \n\t"\ + "cmp %2, %%"REG_c" \n\t"\ + "movdqa %%xmm7, %%xmm3\n\t" + "movdqa %%xmm7, %%xmm4\n\t" + "mov %0, %%"REG_d" \n\t"\ + "mov (%%"REG_d"), %%"REG_S" \n\t"\ + "jb 1b \n\t"\ + :: "g" (filter), + "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset) + : "%"REG_d, "%"REG_S, "%"REG_c + ); +} +#endif + #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \ extern void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \ SwsContext *c, int16_t *data, \ @@ -187,10 +248,12 @@ extern void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( SCALE_FUNC(filter_n, 8, 15, opt); \ SCALE_FUNC(filter_n, 9, 15, opt); \ SCALE_FUNC(filter_n, 10, 15, opt); \ + SCALE_FUNC(filter_n, 14, 15, opt); \ SCALE_FUNC(filter_n, 16, 15, opt); \ SCALE_FUNC(filter_n, 8, 19, opt); \ SCALE_FUNC(filter_n, 9, 19, opt); \ SCALE_FUNC(filter_n, 10, 19, opt); \ + SCALE_FUNC(filter_n, 14, 19, opt); \ SCALE_FUNC(filter_n, 16, 19, opt) #define SCALE_FUNCS_MMX(opt) \ @@ -253,6 +316,10 @@ void ff_sws_init_swScale_mmx(SwsContext *c) #if HAVE_MMX2 if (cpu_flags & AV_CPU_FLAG_MMX2) sws_init_swScale_MMX2(c); + if (cpu_flags & AV_CPU_FLAG_SSE3){ + if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) + c->yuv2planeX = yuv2yuvX_sse3; + } #endif #if HAVE_YASM @@ -266,7 +333,10 @@ void ff_sws_init_swScale_mmx(SwsContext *c) } else if (c->srcBpc == 10) { \ hscalefn = c->dstBpc <= 10 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \ ff_hscale10to19_ ## filtersize ## _ ## opt1; \ - } else /* c->srcBpc == 16 */ { \ + } else if (c->srcBpc == 14 || ((c->srcFormat==PIX_FMT_PAL8||isAnyRGB(c->srcFormat)) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)) { \ + hscalefn = c->dstBpc <= 10 ? ff_hscale14to15_ ## filtersize ## _ ## opt2 : \ + ff_hscale14to19_ ## filtersize ## _ ## opt1; \ + } else { /* c->srcBpc == 16 */ \ hscalefn = c->dstBpc <= 10 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \ ff_hscale16to19_ ## filtersize ## _ ## opt1; \ } \ @@ -279,10 +349,10 @@ void ff_sws_init_swScale_mmx(SwsContext *c) } #define ASSIGN_VSCALEX_FUNC(vscalefn, opt1, opt2, opt2chk, do_16_case) \ switch(c->dstBpc){ \ - case 16: do_16_case; break; \ - case 10: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2planeX_10_ ## opt2; break; \ - case 9: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2planeX_9_ ## opt2; break; \ - default: vscalefn = ff_yuv2planeX_8_ ## opt1; break; \ + case 16: /*do_16_case;*/ break; \ + case 10: if (!isBE(c->dstFormat) && opt2chk) /*vscalefn = ff_yuv2planeX_10_ ## opt2;*/ break; \ + case 9: if (!isBE(c->dstFormat) && opt2chk) /*vscalefn = ff_yuv2planeX_9_ ## opt2;*/ break; \ + default: /*vscalefn = ff_yuv2planeX_8_ ## opt1;*/ break; \ } #define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk) \ switch(c->dstBpc){ \ diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c index 5e7df5c4a0..c7de145b52 100644 --- a/libswscale/x86/swscale_template.c +++ b/libswscale/x86/swscale_template.c @@ -1,25 +1,26 @@ /* - * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at> + * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at> * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #undef REAL_MOVNTQ #undef MOVNTQ +#undef MOVNTQ2 #undef PREFETCH #if COMPILE_TEMPLATE_MMX2 @@ -30,11 +31,84 @@ #if COMPILE_TEMPLATE_MMX2 #define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t" +#define MOVNTQ2 "movntq " #else #define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t" +#define MOVNTQ2 "movq " #endif #define MOVNTQ(a,b) REAL_MOVNTQ(a,b) +#if !COMPILE_TEMPLATE_MMX2 +static av_always_inline void +dither_8to16(const uint8_t *srcDither, int rot) +{ + if (rot) { + __asm__ volatile("pxor %%mm0, %%mm0\n\t" + "movq (%0), %%mm3\n\t" + "movq %%mm3, %%mm4\n\t" + "psrlq $24, %%mm3\n\t" + "psllq $40, %%mm4\n\t" + "por %%mm4, %%mm3\n\t" + "movq %%mm3, %%mm4\n\t" + "punpcklbw %%mm0, %%mm3\n\t" + "punpckhbw %%mm0, %%mm4\n\t" + :: "r"(srcDither) + ); + } else { + __asm__ volatile("pxor %%mm0, %%mm0\n\t" + "movq (%0), %%mm3\n\t" + "movq %%mm3, %%mm4\n\t" + "punpcklbw %%mm0, %%mm3\n\t" + "punpckhbw %%mm0, %%mm4\n\t" + :: "r"(srcDither) + ); + } +} +#endif + +static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize, + const int16_t **src, uint8_t *dest, int dstW, + const uint8_t *dither, int offset) +{ + dither_8to16(dither, offset); + __asm__ volatile(\ + "psraw $4, %%mm3\n\t" + "psraw $4, %%mm4\n\t" + "movq %%mm3, %%mm6\n\t" + "movq %%mm4, %%mm7\n\t" + "movl %3, %%ecx\n\t" + "mov %0, %%"REG_d" \n\t"\ + "mov (%%"REG_d"), %%"REG_S" \n\t"\ + ".p2align 4 \n\t" /* FIXME Unroll? */\ + "1: \n\t"\ + "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\ + "movq (%%"REG_S", %%"REG_c", 2), %%mm2 \n\t" /* srcData */\ + "movq 8(%%"REG_S", %%"REG_c", 2), %%mm5 \n\t" /* srcData */\ + "add $16, %%"REG_d" \n\t"\ + "mov (%%"REG_d"), %%"REG_S" \n\t"\ + "test %%"REG_S", %%"REG_S" \n\t"\ + "pmulhw %%mm0, %%mm2 \n\t"\ + "pmulhw %%mm0, %%mm5 \n\t"\ + "paddw %%mm2, %%mm3 \n\t"\ + "paddw %%mm5, %%mm4 \n\t"\ + " jnz 1b \n\t"\ + "psraw $3, %%mm3 \n\t"\ + "psraw $3, %%mm4 \n\t"\ + "packuswb %%mm4, %%mm3 \n\t" + MOVNTQ2 " %%mm3, (%1, %%"REG_c")\n\t" + "add $8, %%"REG_c" \n\t"\ + "cmp %2, %%"REG_c" \n\t"\ + "movq %%mm6, %%mm3\n\t" + "movq %%mm7, %%mm4\n\t" + "mov %0, %%"REG_d" \n\t"\ + "mov (%%"REG_d"), %%"REG_S" \n\t"\ + "jb 1b \n\t"\ + :: "g" (filter), + "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset) + : "%"REG_d, "%"REG_S, "%"REG_c + ); +} + #define YSCALEYUV2PACKEDX_UV \ __asm__ volatile(\ "xor %%"REG_a", %%"REG_a" \n\t"\ @@ -260,7 +334,7 @@ static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter, { x86_reg dummy=0; x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_off_byte; + x86_reg uv_off = c->uv_offx2; if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { YSCALEYUV2PACKEDX_ACCURATE @@ -293,7 +367,7 @@ static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter, { x86_reg dummy=0; x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_off_byte; + x86_reg uv_off = c->uv_offx2; if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { YSCALEYUV2PACKEDX @@ -350,7 +424,7 @@ static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter, { x86_reg dummy=0; x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_off_byte; + x86_reg uv_off = c->uv_offx2; YSCALEYUV2PACKEDX_ACCURATE YSCALEYUV2RGBX @@ -374,7 +448,7 @@ static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter, { x86_reg dummy=0; x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_off_byte; + x86_reg uv_off = c->uv_offx2; YSCALEYUV2PACKEDX YSCALEYUV2RGBX @@ -427,7 +501,7 @@ static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter, { x86_reg dummy=0; x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_off_byte; + x86_reg uv_off = c->uv_offx2; YSCALEYUV2PACKEDX_ACCURATE YSCALEYUV2RGBX @@ -451,7 +525,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter, { x86_reg dummy=0; x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_off_byte; + x86_reg uv_off = c->uv_offx2; YSCALEYUV2PACKEDX YSCALEYUV2RGBX @@ -584,7 +658,7 @@ static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter, { x86_reg dummy=0; x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_off_byte; + x86_reg uv_off = c->uv_offx2; YSCALEYUV2PACKEDX_ACCURATE YSCALEYUV2RGBX @@ -608,7 +682,7 @@ static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter, { x86_reg dummy=0; x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_off_byte; + x86_reg uv_off = c->uv_offx2; YSCALEYUV2PACKEDX YSCALEYUV2RGBX @@ -649,7 +723,7 @@ static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter, { x86_reg dummy=0; x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_off_byte; + x86_reg uv_off = c->uv_offx2; YSCALEYUV2PACKEDX_ACCURATE /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ @@ -670,7 +744,7 @@ static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter, { x86_reg dummy=0; x86_reg dstW_reg = dstW; - x86_reg uv_off = c->uv_off_byte; + x86_reg uv_off = c->uv_offx2; YSCALEYUV2PACKEDX /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ @@ -688,10 +762,10 @@ static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter, "1: \n\t"\ "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ - "add "UV_OFF_PX"("#c"), "#index" \n\t" \ + "add "UV_OFFx2"("#c"), "#index" \n\t" \ "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ - "sub "UV_OFF_PX"("#c"), "#index" \n\t" \ + "sub "UV_OFFx2"("#c"), "#index" \n\t" \ "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\ "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\ "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\ @@ -786,8 +860,8 @@ static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2], : "%r8" ); #else - *(const uint16_t **)(&c->u_temp)=abuf0; - *(const uint16_t **)(&c->v_temp)=abuf1; + c->u_temp=(intptr_t)abuf0; + c->v_temp=(intptr_t)abuf1; __asm__ volatile( "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %4, %%"REG_b" \n\t" @@ -919,10 +993,10 @@ static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2], "1: \n\t"\ "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ - "add "UV_OFF_PX"("#c"), "#index" \n\t" \ + "add "UV_OFFx2"("#c"), "#index" \n\t" \ "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ - "sub "UV_OFF_PX"("#c"), "#index" \n\t" \ + "sub "UV_OFFx2"("#c"), "#index" \n\t" \ "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\ "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\ "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\ @@ -974,9 +1048,9 @@ static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2], ".p2align 4 \n\t"\ "1: \n\t"\ "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\ - "add "UV_OFF_PX"("#c"), "#index" \n\t" \ + "add "UV_OFFx2"("#c"), "#index" \n\t" \ "movq (%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ - "sub "UV_OFF_PX"("#c"), "#index" \n\t" \ + "sub "UV_OFFx2"("#c"), "#index" \n\t" \ "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\ @@ -1027,10 +1101,10 @@ static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2], "1: \n\t"\ "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ - "add "UV_OFF_PX"("#c"), "#index" \n\t" \ + "add "UV_OFFx2"("#c"), "#index" \n\t" \ "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ - "sub "UV_OFF_PX"("#c"), "#index" \n\t" \ + "sub "UV_OFFx2"("#c"), "#index" \n\t" \ "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\ "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\ "psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\ @@ -1294,9 +1368,9 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0, ".p2align 4 \n\t"\ "1: \n\t"\ "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\ - "add "UV_OFF_PX"("#c"), "#index" \n\t" \ + "add "UV_OFFx2"("#c"), "#index" \n\t" \ "movq (%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ - "sub "UV_OFF_PX"("#c"), "#index" \n\t" \ + "sub "UV_OFFx2"("#c"), "#index" \n\t" \ "psraw $7, %%mm3 \n\t" \ "psraw $7, %%mm4 \n\t" \ "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\ @@ -1312,10 +1386,10 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0, "1: \n\t"\ "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ - "add "UV_OFF_PX"("#c"), "#index" \n\t" \ + "add "UV_OFFx2"("#c"), "#index" \n\t" \ "movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ "movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ - "sub "UV_OFF_PX"("#c"), "#index" \n\t" \ + "sub "UV_OFFx2"("#c"), "#index" \n\t" \ "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\ "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\ "psrlw $8, %%mm3 \n\t" \ @@ -1364,7 +1438,7 @@ static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0, #if !COMPILE_TEMPLATE_MMX2 //FIXME yuy2* can read up to 7 samples too much -static void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, +static void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) { __asm__ volatile( @@ -1385,7 +1459,7 @@ static void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, } static void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, + const uint8_t *unused1, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) { __asm__ volatile( @@ -1414,7 +1488,7 @@ static void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, /* This is almost identical to the previous, end exists only because * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */ -static void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, +static void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) { __asm__ volatile( @@ -1434,7 +1508,7 @@ static void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, } static void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, + const uint8_t *unused1, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) { __asm__ volatile( @@ -1488,21 +1562,21 @@ static av_always_inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2, } static void RENAME(nv12ToUV)(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, + const uint8_t *unused1, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) { RENAME(nvXXtoUV)(dstU, dstV, src1, width); } static void RENAME(nv21ToUV)(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, + const uint8_t *unused1, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) { RENAME(nvXXtoUV)(dstV, dstU, src1, width); } #endif /* !COMPILE_TEMPLATE_MMX2 */ -static av_always_inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src, +static av_always_inline void RENAME(bgr24ToY_mmx)(int16_t *dst, const uint8_t *src, int width, enum PixelFormat srcFormat) { @@ -1543,32 +1617,31 @@ static av_always_inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *s "paddd %%mm3, %%mm2 \n\t" "paddd %%mm4, %%mm0 \n\t" "paddd %%mm4, %%mm2 \n\t" - "psrad $15, %%mm0 \n\t" - "psrad $15, %%mm2 \n\t" + "psrad $9, %%mm0 \n\t" + "psrad $9, %%mm2 \n\t" "packssdw %%mm2, %%mm0 \n\t" - "packuswb %%mm0, %%mm0 \n\t" - "movd %%mm0, (%1, %%"REG_a") \n\t" - "add $4, %%"REG_a" \n\t" + "movq %%mm0, (%1, %%"REG_a") \n\t" + "add $8, %%"REG_a" \n\t" " js 1b \n\t" : "+r" (src) - : "r" (dst+width), "g" ((x86_reg)-width) + : "r" (dst+width), "g" ((x86_reg)-2*width) : "%"REG_a ); } -static void RENAME(bgr24ToY)(uint8_t *dst, const uint8_t *src, +static void RENAME(bgr24ToY)(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) { RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24); } -static void RENAME(rgb24ToY)(uint8_t *dst, const uint8_t *src, +static void RENAME(rgb24ToY)(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused) { RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24); } -static av_always_inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, +static av_always_inline void RENAME(bgr24ToUV_mmx)(int16_t *dstU, int16_t *dstV, const uint8_t *src, int width, enum PixelFormat srcFormat) { @@ -1610,34 +1683,32 @@ static av_always_inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, "paddd %%mm3, %%mm2 \n\t" "paddd %%mm3, %%mm1 \n\t" "paddd %%mm3, %%mm4 \n\t" - "psrad $15, %%mm0 \n\t" - "psrad $15, %%mm2 \n\t" - "psrad $15, %%mm1 \n\t" - "psrad $15, %%mm4 \n\t" + "psrad $9, %%mm0 \n\t" + "psrad $9, %%mm2 \n\t" + "psrad $9, %%mm1 \n\t" + "psrad $9, %%mm4 \n\t" "packssdw %%mm1, %%mm0 \n\t" "packssdw %%mm4, %%mm2 \n\t" - "packuswb %%mm0, %%mm0 \n\t" - "packuswb %%mm2, %%mm2 \n\t" - "movd %%mm0, (%1, %%"REG_a") \n\t" - "movd %%mm2, (%2, %%"REG_a") \n\t" - "add $4, %%"REG_a" \n\t" + "movq %%mm0, (%1, %%"REG_a") \n\t" + "movq %%mm2, (%2, %%"REG_a") \n\t" + "add $8, %%"REG_a" \n\t" " js 1b \n\t" : "+r" (src) - : "r" (dstU+width), "r" (dstV+width), "g" ((x86_reg)-width), "r"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24]) + : "r" (dstU+width), "r" (dstV+width), "g" ((x86_reg)-2*width), "r"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24]) : "%"REG_a ); } -static void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, +static void RENAME(bgr24ToUV)(int16_t *dstU, int16_t *dstV, + const uint8_t *unused1, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) { RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_BGR24); assert(src1 == src2); } -static void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, - const uint8_t *src1, const uint8_t *src2, +static void RENAME(rgb24ToUV)(int16_t *dstU, int16_t *dstV, + const uint8_t *unused1, const uint8_t *src1, const uint8_t *src2, int width, uint32_t *unused) { assert(src1==src2); @@ -1784,10 +1855,9 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c) { enum PixelFormat srcFormat = c->srcFormat, dstFormat = c->dstFormat; - - if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) && - dstFormat != PIX_FMT_NV12 && dstFormat != PIX_FMT_NV21) { - if (!(c->flags & SWS_BITEXACT)) { + c->use_mmx_vfilter= 0; + if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) && dstFormat != PIX_FMT_NV12 + && dstFormat != PIX_FMT_NV21 && !(c->flags & SWS_BITEXACT)) { if (c->flags & SWS_ACCURATE_RND) { if (!(c->flags & SWS_FULL_CHR_H_INT)) { switch (c->dstFormat) { @@ -1800,6 +1870,9 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c) } } } else { + int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat); + c->use_mmx_vfilter= 1; + c->yuv2planeX = RENAME(yuv2yuvX ); if (!(c->flags & SWS_FULL_CHR_H_INT)) { switch (c->dstFormat) { case PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X); break; @@ -1811,7 +1884,6 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c) } } } - } if (!(c->flags & SWS_FULL_CHR_H_INT)) { switch (c->dstFormat) { case PIX_FMT_RGB32: diff --git a/libswscale/x86/yuv2rgb_mmx.c b/libswscale/x86/yuv2rgb_mmx.c index 0eaea77485..df0e1a3726 100644 --- a/libswscale/x86/yuv2rgb_mmx.c +++ b/libswscale/x86/yuv2rgb_mmx.c @@ -7,20 +7,20 @@ * 1,4,8bpp support and context / deglobalize stuff * by Michael Niedermayer (michaelni@gmx.at) * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ diff --git a/libswscale/x86/yuv2rgb_template.c b/libswscale/x86/yuv2rgb_template.c index 5d1fa5b309..926e3fb9c4 100644 --- a/libswscale/x86/yuv2rgb_template.c +++ b/libswscale/x86/yuv2rgb_template.c @@ -4,20 +4,20 @@ * Copyright (C) 2001-2007 Michael Niedermayer * (c) 2010 Konstantin Shishkov * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -43,17 +43,14 @@ if (h_size * depth > FFABS(dstStride[0])) \ h_size -= 8; \ \ - if (c->srcFormat == PIX_FMT_YUV422P) { \ - srcStride[1] *= 2; \ - srcStride[2] *= 2; \ - } \ + vshift = c->srcFormat != PIX_FMT_YUV422P; \ \ __asm__ volatile ("pxor %mm4, %mm4\n\t"); \ for (y = 0; y < srcSliceH; y++) { \ uint8_t *image = dst[0] + (y + srcSliceY) * dstStride[0]; \ const uint8_t *py = src[0] + y * srcStride[0]; \ - const uint8_t *pu = src[1] + (y >> 1) * srcStride[1]; \ - const uint8_t *pv = src[2] + (y >> 1) * srcStride[2]; \ + const uint8_t *pu = src[1] + (y >> vshift) * srcStride[1]; \ + const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; \ x86_reg index = -h_size / 2; \ #define YUV2RGB_INITIAL_LOAD \ @@ -188,7 +185,7 @@ static inline int RENAME(yuv420_rgb15)(SwsContext *c, const uint8_t *src[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(2) @@ -216,7 +213,7 @@ static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(2) @@ -306,7 +303,7 @@ static inline int RENAME(yuv420_rgb24)(SwsContext *c, const uint8_t *src[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(3) @@ -324,7 +321,7 @@ static inline int RENAME(yuv420_bgr24)(SwsContext *c, const uint8_t *src[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(3) @@ -368,7 +365,7 @@ static inline int RENAME(yuv420_rgb32)(SwsContext *c, const uint8_t *src[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(4) @@ -389,7 +386,7 @@ static inline int RENAME(yuva420_rgb32)(SwsContext *c, const uint8_t *src[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(4) @@ -411,7 +408,7 @@ static inline int RENAME(yuv420_bgr32)(SwsContext *c, const uint8_t *src[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(4) @@ -432,7 +429,7 @@ static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t *src[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[]) { - int y, h_size; + int y, h_size, vshift; YUV2RGB_LOOP(4) diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c index 39c8b9c6fb..c2c9090a36 100644 --- a/libswscale/yuv2rgb.c +++ b/libswscale/yuv2rgb.c @@ -6,20 +6,20 @@ * 1,4,8bpp support and context / deglobalize stuff * by Michael Niedermayer (michaelni@gmx.at) * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -34,6 +34,7 @@ #include "swscale_internal.h" #include "libavutil/cpu.h" #include "libavutil/bswap.h" +#include "libavutil/pixdesc.h" extern const uint8_t dither_4x4_16[4][8]; extern const uint8_t dither_8x8_32[8][8]; @@ -521,7 +522,8 @@ SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c) if (t) return t; - av_log(c, AV_LOG_WARNING, "No accelerated colorspace conversion found from %s to %s.\n", sws_format_name(c->srcFormat), sws_format_name(c->dstFormat)); + av_log(c, AV_LOG_WARNING, "No accelerated colorspace conversion found from %s to %s.\n", + av_get_pix_fmt_name(c->srcFormat), av_get_pix_fmt_name(c->dstFormat)); switch (c->dstFormat) { case PIX_FMT_BGR48BE: @@ -613,7 +615,7 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4], int uint8_t *y_table; uint16_t *y_table16; uint32_t *y_table32; - int i, base, rbase, gbase, bbase, abase, needAlpha; + int i, base, rbase, gbase, bbase, av_uninit(abase), needAlpha; const int yoffs = fullRange ? 384 : 326; int64_t crv = inv_table[0]; |