summaryrefslogtreecommitdiff
path: root/libswscale
diff options
context:
space:
mode:
Diffstat (limited to 'libswscale')
-rw-r--r--libswscale/Makefile2
-rw-r--r--libswscale/bfin/internal_bfin.S8
-rw-r--r--libswscale/bfin/swscale_bfin.c8
-rw-r--r--libswscale/bfin/yuv2rgb_bfin.c11
-rw-r--r--libswscale/colorspace-test.c18
-rw-r--r--libswscale/input.c428
-rw-r--r--libswscale/options.c15
-rw-r--r--libswscale/output.c195
-rw-r--r--libswscale/ppc/swscale_altivec.c10
-rw-r--r--libswscale/ppc/yuv2rgb_altivec.c26
-rw-r--r--libswscale/ppc/yuv2rgb_altivec.h8
-rw-r--r--libswscale/ppc/yuv2yuv_altivec.c8
-rw-r--r--libswscale/rgb2rgb.c110
-rw-r--r--libswscale/rgb2rgb.h15
-rw-r--r--libswscale/rgb2rgb_template.c68
-rw-r--r--libswscale/sparc/yuv2rgb_vis.c8
-rw-r--r--libswscale/swscale-test.c13
-rw-r--r--libswscale/swscale.c155
-rw-r--r--libswscale/swscale.h18
-rw-r--r--libswscale/swscale_internal.h169
-rw-r--r--libswscale/swscale_unscaled.c572
-rw-r--r--libswscale/utils.c223
-rw-r--r--libswscale/version.h13
-rw-r--r--libswscale/x86/Makefile2
-rw-r--r--libswscale/x86/input.asm122
-rw-r--r--libswscale/x86/output.asm4
-rw-r--r--libswscale/x86/rgb2rgb.c11
-rw-r--r--libswscale/x86/rgb2rgb_template.c102
-rw-r--r--libswscale/x86/scale.asm4
-rw-r--r--libswscale/x86/swscale.c111
-rw-r--r--libswscale/x86/swscale_template.c119
-rw-r--r--libswscale/x86/yuv2rgb.c12
-rw-r--r--libswscale/x86/yuv2rgb_template.c35
-rw-r--r--libswscale/yuv2rgb.c113
34 files changed, 1740 insertions, 996 deletions
diff --git a/libswscale/Makefile b/libswscale/Makefile
index 0799b458be..dd00f7d708 100644
--- a/libswscale/Makefile
+++ b/libswscale/Makefile
@@ -1,3 +1,5 @@
+include $(SUBDIR)../config.mak
+
NAME = swscale
FFLIBS = avutil
diff --git a/libswscale/bfin/internal_bfin.S b/libswscale/bfin/internal_bfin.S
index b007f07f53..eab30aa6ce 100644
--- a/libswscale/bfin/internal_bfin.S
+++ b/libswscale/bfin/internal_bfin.S
@@ -5,20 +5,20 @@
* Blackfin video color space converter operations
* convert I420 YV12 to RGB in various formats
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libswscale/bfin/swscale_bfin.c b/libswscale/bfin/swscale_bfin.c
index f2fe871fc8..a51b2af56c 100644
--- a/libswscale/bfin/swscale_bfin.c
+++ b/libswscale/bfin/swscale_bfin.c
@@ -3,20 +3,20 @@
*
* Blackfin software video scaler operations
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libswscale/bfin/yuv2rgb_bfin.c b/libswscale/bfin/yuv2rgb_bfin.c
index 4078a18660..dae7f314e7 100644
--- a/libswscale/bfin/yuv2rgb_bfin.c
+++ b/libswscale/bfin/yuv2rgb_bfin.c
@@ -4,23 +4,24 @@
* Blackfin video color space converter operations
* convert I420 YV12 to RGB in various formats
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
+#include "libavutil/pixdesc.h"
#include <stdint.h>
#include "config.h"
@@ -195,7 +196,7 @@ SwsFunc ff_yuv2rgb_get_func_ptr_bfin(SwsContext *c)
}
av_log(c, AV_LOG_INFO, "BlackFin accelerated color space converter %s\n",
- sws_format_name(c->dstFormat));
+ av_get_pix_fmt_name(c->dstFormat));
return f;
}
diff --git a/libswscale/colorspace-test.c b/libswscale/colorspace-test.c
index 135924c1ab..6d16785b12 100644
--- a/libswscale/colorspace-test.c
+++ b/libswscale/colorspace-test.c
@@ -1,20 +1,20 @@
/*
* Copyright (C) 2002 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -33,7 +33,7 @@
#define FUNC(s, d, n) { s, d, #n, n }
-int main(void)
+int main(int argc, char **argv)
{
int i, funcNum;
uint8_t *srcBuffer = av_malloc(SIZE);
@@ -54,6 +54,7 @@ int main(void)
const char *name;
void (*func)(const uint8_t *src, uint8_t *dst, int src_size);
} func_info[] = {
+ FUNC(2, 2, rgb12to15),
FUNC(2, 2, rgb15to16),
FUNC(2, 3, rgb15to24),
FUNC(2, 4, rgb15to32),
@@ -66,6 +67,7 @@ int main(void)
FUNC(4, 2, rgb32to16),
FUNC(4, 3, rgb32to24),
FUNC(2, 2, rgb16to15),
+ FUNC(2, 2, rgb12tobgr12),
FUNC(2, 2, rgb15tobgr15),
FUNC(2, 2, rgb15tobgr16),
FUNC(2, 3, rgb15tobgr24),
@@ -82,6 +84,12 @@ int main(void)
FUNC(4, 2, rgb32tobgr16),
FUNC(4, 3, rgb32tobgr24),
FUNC(4, 4, shuffle_bytes_2103), /* rgb32tobgr32 */
+ FUNC(6, 6, rgb48tobgr48_nobswap),
+ FUNC(6, 6, rgb48tobgr48_bswap),
+ FUNC(8, 6, rgb64to48_nobswap),
+ FUNC(8, 6, rgb64to48_bswap),
+ FUNC(8, 6, rgb64tobgr48_nobswap),
+ FUNC(8, 6, rgb64tobgr48_bswap),
FUNC(0, 0, NULL)
};
int width;
diff --git a/libswscale/input.c b/libswscale/input.c
index 40ed122427..9234646435 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c
@@ -1,20 +1,20 @@
/*
- * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (C) 2001-2012 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -30,6 +30,7 @@
#include "libavutil/intreadwrite.h"
#include "libavutil/mathematics.h"
#include "libavutil/pixdesc.h"
+#include "libavutil/avassert.h"
#include "config.h"
#include "rgb2rgb.h"
#include "swscale.h"
@@ -51,6 +52,86 @@
#define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
#define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
+static av_always_inline void
+rgb64ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
+ enum PixelFormat origin)
+{
+ int i;
+ for (i = 0; i < width; i++) {
+ unsigned int r_b = input_pixel(&src[i*4+0]);
+ unsigned int g = input_pixel(&src[i*4+1]);
+ unsigned int b_r = input_pixel(&src[i*4+2]);
+
+ dst[i] = (RY*r + GY*g + BY*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+ }
+}
+
+static av_always_inline void
+rgb64ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
+ const uint16_t *src1, const uint16_t *src2,
+ int width, enum PixelFormat origin)
+{
+ int i;
+ av_assert1(src1==src2);
+ for (i = 0; i < width; i++) {
+ int r_b = input_pixel(&src1[i*4+0]);
+ int g = input_pixel(&src1[i*4+1]);
+ int b_r = input_pixel(&src1[i*4+2]);
+
+ dstU[i] = (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+ dstV[i] = (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+ }
+}
+
+static av_always_inline void
+rgb64ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
+ const uint16_t *src1, const uint16_t *src2,
+ int width, enum PixelFormat origin)
+{
+ int i;
+ av_assert1(src1==src2);
+ for (i = 0; i < width; i++) {
+ int r_b = (input_pixel(&src1[8 * i + 0]) + input_pixel(&src1[8 * i + 4]) + 1) >> 1;
+ int g = (input_pixel(&src1[8 * i + 1]) + input_pixel(&src1[8 * i + 5]) + 1) >> 1;
+ int b_r = (input_pixel(&src1[8 * i + 2]) + input_pixel(&src1[8 * i + 6]) + 1) >> 1;
+
+ dstU[i]= (RU*r + GU*g + BU*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+ dstV[i]= (RV*r + GV*g + BV*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+ }
+}
+
+#define rgb64funcs(pattern, BE_LE, origin) \
+static void pattern ## 64 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused0, const uint8_t *unused1,\
+ int width, uint32_t *unused) \
+{ \
+ const uint16_t *src = (const uint16_t *) _src; \
+ uint16_t *dst = (uint16_t *) _dst; \
+ rgb64ToY_c_template(dst, src, width, origin); \
+} \
+ \
+static void pattern ## 64 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
+ const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \
+ int width, uint32_t *unused) \
+{ \
+ const uint16_t *src1 = (const uint16_t *) _src1, \
+ *src2 = (const uint16_t *) _src2; \
+ uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
+ rgb64ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
+} \
+ \
+static void pattern ## 64 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
+ const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \
+ int width, uint32_t *unused) \
+{ \
+ const uint16_t *src1 = (const uint16_t *) _src1, \
+ *src2 = (const uint16_t *) _src2; \
+ uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
+ rgb64ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
+}
+
+rgb64funcs(rgb, LE, PIX_FMT_RGBA64LE)
+rgb64funcs(rgb, BE, PIX_FMT_RGBA64BE)
+
static av_always_inline void rgb48ToY_c_template(uint16_t *dst,
const uint16_t *src, int width,
enum PixelFormat origin)
@@ -73,7 +154,7 @@ static av_always_inline void rgb48ToUV_c_template(uint16_t *dstU,
enum PixelFormat origin)
{
int i;
- assert(src1 == src2);
+ av_assert1(src1 == src2);
for (i = 0; i < width; i++) {
int r_b = input_pixel(&src1[i * 3 + 0]);
int g = input_pixel(&src1[i * 3 + 1]);
@@ -92,7 +173,7 @@ static av_always_inline void rgb48ToUV_half_c_template(uint16_t *dstU,
enum PixelFormat origin)
{
int i;
- assert(src1 == src2);
+ av_assert1(src1 == src2);
for (i = 0; i < width; i++) {
int r_b = (input_pixel(&src1[6 * i + 0]) +
input_pixel(&src1[6 * i + 3]) + 1) >> 1;
@@ -113,6 +194,7 @@ static av_always_inline void rgb48ToUV_half_c_template(uint16_t *dstU,
#define rgb48funcs(pattern, BE_LE, origin) \
static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, \
const uint8_t *_src, \
+ const uint8_t *unused0, const uint8_t *unused1,\
int width, \
uint32_t *unused) \
{ \
@@ -123,6 +205,7 @@ static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, \
\
static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, \
uint8_t *_dstV, \
+ const uint8_t *unused0, \
const uint8_t *_src1, \
const uint8_t *_src2, \
int width, \
@@ -137,6 +220,7 @@ static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, \
\
static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, \
uint8_t *_dstV, \
+ const uint8_t *unused0, \
const uint8_t *_src1, \
const uint8_t *_src2, \
int width, \
@@ -162,7 +246,7 @@ rgb48funcs(bgr, BE, PIX_FMT_BGR48BE)
: (isBE(origin) ? AV_RB16(&src[(i) * 2]) \
: AV_RL16(&src[(i) * 2])))
-static av_always_inline void rgb16_32ToY_c_template(uint8_t *dst,
+static av_always_inline void rgb16_32ToY_c_template(int16_t *dst,
const uint8_t *src,
int width,
enum PixelFormat origin,
@@ -173,7 +257,7 @@ static av_always_inline void rgb16_32ToY_c_template(uint8_t *dst,
int gsh, int bsh, int S)
{
const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh;
- const unsigned rnd = 33u << (S - 1);
+ const unsigned rnd = (32<<((S)-1)) + (1<<(S-7));
int i;
for (i = 0; i < width; i++) {
@@ -182,12 +266,12 @@ static av_always_inline void rgb16_32ToY_c_template(uint8_t *dst,
int g = (px & maskg) >> shg;
int r = (px & maskr) >> shr;
- dst[i] = (ry * r + gy * g + by * b + rnd) >> S;
+ dst[i] = (ry * r + gy * g + by * b + rnd) >> ((S)-6);
}
}
-static av_always_inline void rgb16_32ToUV_c_template(uint8_t *dstU,
- uint8_t *dstV,
+static av_always_inline void rgb16_32ToUV_c_template(int16_t *dstU,
+ int16_t *dstV,
const uint8_t *src,
int width,
enum PixelFormat origin,
@@ -199,7 +283,7 @@ static av_always_inline void rgb16_32ToUV_c_template(uint8_t *dstU,
{
const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
rv = RV << rsh, gv = GV << gsh, bv = BV << bsh;
- const unsigned rnd = 257u << (S - 1);
+ const unsigned rnd = (256u<<((S)-1)) + (1<<(S-7));
int i;
for (i = 0; i < width; i++) {
@@ -208,13 +292,13 @@ static av_always_inline void rgb16_32ToUV_c_template(uint8_t *dstU,
int g = (px & maskg) >> shg;
int r = (px & maskr) >> shr;
- dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S;
- dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S;
+ dstU[i] = (ru * r + gu * g + bu * b + rnd) >> ((S)-6);
+ dstV[i] = (rv * r + gv * g + bv * b + rnd) >> ((S)-6);
}
}
-static av_always_inline void rgb16_32ToUV_half_c_template(uint8_t *dstU,
- uint8_t *dstV,
+static av_always_inline void rgb16_32ToUV_half_c_template(int16_t *dstU,
+ int16_t *dstV,
const uint8_t *src,
int width,
enum PixelFormat origin,
@@ -227,7 +311,7 @@ static av_always_inline void rgb16_32ToUV_half_c_template(uint8_t *dstU,
const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
maskgx = ~(maskr | maskb);
- const unsigned rnd = 257u << S;
+ const unsigned rnd = (256U<<(S)) + (1<<(S-6));
int i;
maskr |= maskr << 1;
@@ -249,8 +333,8 @@ static av_always_inline void rgb16_32ToUV_half_c_template(uint8_t *dstU,
}
r = (rb & maskr) >> shr;
- dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1);
- dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1);
+ dstU[i] = (ru * r + gu * g + bu * b + (unsigned)rnd) >> ((S)-6+1);
+ dstV[i] = (rv * r + gv * g + bv * b + (unsigned)rnd) >> ((S)-6+1);
}
}
@@ -258,28 +342,28 @@ static av_always_inline void rgb16_32ToUV_half_c_template(uint8_t *dstU,
#define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
maskg, maskb, rsh, gsh, bsh, S) \
-static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
+static void name ## ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, \
int width, uint32_t *unused) \
{ \
- rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
+ rgb16_32ToY_c_template((int16_t*)dst, src, width, fmt, shr, shg, shb, shp, \
maskr, maskg, maskb, rsh, gsh, bsh, S); \
} \
\
static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
- const uint8_t *src, const uint8_t *dummy, \
+ const uint8_t *unused0, const uint8_t *src, const uint8_t *dummy, \
int width, uint32_t *unused) \
{ \
- rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, \
+ rgb16_32ToUV_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \
shr, shg, shb, shp, \
maskr, maskg, maskb, rsh, gsh, bsh, S); \
} \
\
static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
- const uint8_t *src, \
+ const uint8_t *unused0, const uint8_t *src, \
const uint8_t *dummy, \
int width, uint32_t *unused) \
{ \
- rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, \
+ rgb16_32ToUV_half_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \
shr, shg, shb, shp, \
maskr, maskg, maskb, \
rsh, gsh, bsh, S); \
@@ -302,71 +386,124 @@ rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x0
rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT + 7)
rgb16_32_wrapper(PIX_FMT_RGB444BE, rgb12be, 0, 0, 0, 0, 0x0F00, 0x00F0, 0x000F, 0, 4, 8, RGB2YUV_SHIFT + 4)
-static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width,
- uint32_t *unused)
+static void gbr24pToUV_half_c(uint8_t *_dstU, uint8_t *_dstV,
+ const uint8_t *gsrc, const uint8_t *bsrc, const uint8_t *rsrc,
+ int width, uint32_t *unused)
{
+ uint16_t *dstU = (uint16_t *)_dstU;
+ uint16_t *dstV = (uint16_t *)_dstV;
int i;
- for (i = 0; i < width; i++)
- dst[i] = src[4 * i];
+ for (i = 0; i < width; i++) {
+ unsigned int g = gsrc[2*i] + gsrc[2*i+1];
+ unsigned int b = bsrc[2*i] + bsrc[2*i+1];
+ unsigned int r = rsrc[2*i] + rsrc[2*i+1];
+
+ dstU[i] = (RU*r + GU*g + BU*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1);
+ dstV[i] = (RV*r + GV*g + BV*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1);
+ }
}
-static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width,
- uint32_t *unused)
+static void rgba64ToA_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1,
+ const uint8_t *unused2, int width, uint32_t *unused)
{
+ int16_t *dst = (int16_t *)_dst;
+ const uint16_t *src = (const uint16_t *)_src;
int i;
for (i = 0; i < width; i++)
dst[i] = src[4 * i + 3];
}
-static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
+static void abgrToA_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
+{
+ int16_t *dst = (int16_t *)_dst;
+ int i;
+ for (i=0; i<width; i++) {
+ dst[i]= src[4*i]<<6;
+ }
+}
+
+static void rgbaToA_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
+{
+ int16_t *dst = (int16_t *)_dst;
+ int i;
+ for (i=0; i<width; i++) {
+ dst[i]= src[4*i+3]<<6;
+ }
+}
+
+static void palToA_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *pal)
+{
+ int16_t *dst = (int16_t *)_dst;
+ int i;
+ for (i=0; i<width; i++) {
+ int d= src[i];
+
+ dst[i]= (pal[d] >> 24)<<6;
+ }
+}
+
+static void palToY_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *pal)
{
+ int16_t *dst = (int16_t *)_dst;
int i;
for (i = 0; i < width; i++) {
int d = src[i];
- dst[i] = pal[d] & 0xFF;
+ dst[i] = (pal[d] & 0xFF)<<6;
}
}
-static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
- const uint8_t *src1, const uint8_t *src2,
+static void palToUV_c(uint8_t *_dstU, uint8_t *_dstV,
+ const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
int width, uint32_t *pal)
{
+ uint16_t *dstU = (uint16_t *)_dstU;
+ int16_t *dstV = (int16_t *)_dstV;
int i;
- assert(src1 == src2);
+ av_assert1(src1 == src2);
for (i = 0; i < width; i++) {
int p = pal[src1[i]];
- dstU[i] = p >> 8;
- dstV[i] = p >> 16;
+ dstU[i] = (uint8_t)(p>> 8)<<6;
+ dstV[i] = (uint8_t)(p>>16)<<6;
}
}
-static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
- int width, uint32_t *unused)
+static void monowhite2Y_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
{
+ int16_t *dst = (int16_t *)_dst;
int i, j;
width = (width + 7) >> 3;
for (i = 0; i < width; i++) {
int d = ~src[i];
for (j = 0; j < 8; j++)
- dst[8 * i + j] = ((d >> (7 - j)) & 1) * 255;
+ dst[8*i+j]= ((d>>(7-j))&1) * 16383;
+ }
+ if(width&7){
+ int d= ~src[i];
+ for (j = 0; j < (width&7); j++)
+ dst[8*i+j]= ((d>>(7-j))&1) * 16383;
}
}
-static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
- int width, uint32_t *unused)
+static void monoblack2Y_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
{
+ int16_t *dst = (int16_t *)_dst;
int i, j;
width = (width + 7) >> 3;
for (i = 0; i < width; i++) {
int d = src[i];
for (j = 0; j < 8; j++)
- dst[8 * i + j] = ((d >> (7 - j)) & 1) * 255;
+ dst[8*i+j]= ((d>>(7-j))&1) * 16383;
+ }
+ if(width&7){
+ int d = src[i];
+ for (j = 0; j < (width&7); j++)
+ dst[8*i+j] = ((d>>(7-j))&1) * 16383;
}
}
-static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
+static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
uint32_t *unused)
{
int i;
@@ -374,7 +511,7 @@ static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
dst[i] = src[2 * i];
}
-static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
const uint8_t *src2, int width, uint32_t *unused)
{
int i;
@@ -382,10 +519,10 @@ static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
dstU[i] = src1[4 * i + 1];
dstV[i] = src1[4 * i + 3];
}
- assert(src1 == src2);
+ av_assert1(src1 == src2);
}
-static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width,
+static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, const uint8_t *unused2, int width,
uint32_t *unused)
{
int i;
@@ -395,7 +532,7 @@ static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width,
dst[i] = av_bswap16(src[i]);
}
-static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1,
+static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *_src1,
const uint8_t *_src2, int width, uint32_t *unused)
{
int i;
@@ -410,7 +547,7 @@ static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1,
/* This is almost identical to the previous, end exists only because
* yuy2ToY/UV)(dst, src + 1, ...) would have 100% unaligned accesses. */
-static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
+static void uyvyToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
uint32_t *unused)
{
int i;
@@ -418,7 +555,7 @@ static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
dst[i] = src[2 * i + 1];
}
-static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
const uint8_t *src2, int width, uint32_t *unused)
{
int i;
@@ -426,7 +563,7 @@ static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
dstU[i] = src1[4 * i + 0];
dstV[i] = src1[4 * i + 2];
}
- assert(src1 == src2);
+ av_assert1(src1 == src2);
}
static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
@@ -440,14 +577,14 @@ static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
}
static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
- const uint8_t *src1, const uint8_t *src2,
+ const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
int width, uint32_t *unused)
{
nvXXtoUV_c(dstU, dstV, src1, width);
}
static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
- const uint8_t *src1, const uint8_t *src2,
+ const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
int width, uint32_t *unused)
{
nvXXtoUV_c(dstV, dstU, src1, width);
@@ -455,114 +592,127 @@ static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
#define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
-static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
+static void bgr24ToY_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,
int width, uint32_t *unused)
{
+ int16_t *dst = (int16_t *)_dst;
int i;
for (i = 0; i < width; i++) {
int b = src[i * 3 + 0];
int g = src[i * 3 + 1];
int r = src[i * 3 + 2];
- dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
+ dst[i] = ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
}
}
-static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+static void bgr24ToUV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *src1,
const uint8_t *src2, int width, uint32_t *unused)
{
+ int16_t *dstU = (int16_t *)_dstU;
+ int16_t *dstV = (int16_t *)_dstV;
int i;
for (i = 0; i < width; i++) {
int b = src1[3 * i + 0];
int g = src1[3 * i + 1];
int r = src1[3 * i + 2];
- dstU[i] = (RU * r + GU * g + BU * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
- dstV[i] = (RV * r + GV * g + BV * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
+ dstU[i] = (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
+ dstV[i] = (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
}
- assert(src1 == src2);
+ av_assert1(src1 == src2);
}
-static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+static void bgr24ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *src1,
const uint8_t *src2, int width, uint32_t *unused)
{
+ int16_t *dstU = (int16_t *)_dstU;
+ int16_t *dstV = (int16_t *)_dstV;
int i;
for (i = 0; i < width; i++) {
int b = src1[6 * i + 0] + src1[6 * i + 3];
int g = src1[6 * i + 1] + src1[6 * i + 4];
int r = src1[6 * i + 2] + src1[6 * i + 5];
- dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
- dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
+ dstU[i] = (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
+ dstV[i] = (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
}
- assert(src1 == src2);
+ av_assert1(src1 == src2);
}
-static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
+static void rgb24ToY_c(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
uint32_t *unused)
{
+ int16_t *dst = (int16_t *)_dst;
int i;
for (i = 0; i < width; i++) {
int r = src[i * 3 + 0];
int g = src[i * 3 + 1];
int b = src[i * 3 + 2];
- dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
+ dst[i] = ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
}
}
-static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+static void rgb24ToUV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *src1,
const uint8_t *src2, int width, uint32_t *unused)
{
+ int16_t *dstU = (int16_t *)_dstU;
+ int16_t *dstV = (int16_t *)_dstV;
int i;
- assert(src1 == src2);
+ av_assert1(src1 == src2);
for (i = 0; i < width; i++) {
int r = src1[3 * i + 0];
int g = src1[3 * i + 1];
int b = src1[3 * i + 2];
- dstU[i] = (RU * r + GU * g + BU * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
- dstV[i] = (RV * r + GV * g + BV * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
+ dstU[i] = (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
+ dstV[i] = (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
}
}
-static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+static void rgb24ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *src1,
const uint8_t *src2, int width, uint32_t *unused)
{
+ int16_t *dstU = (int16_t *)_dstU;
+ int16_t *dstV = (int16_t *)_dstV;
int i;
- assert(src1 == src2);
+ av_assert1(src1 == src2);
for (i = 0; i < width; i++) {
int r = src1[6 * i + 0] + src1[6 * i + 3];
int g = src1[6 * i + 1] + src1[6 * i + 4];
int b = src1[6 * i + 2] + src1[6 * i + 5];
- dstU[i] = (RU * r + GU * g + BU * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
- dstV[i] = (RV * r + GV * g + BV * b + (257 << RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT + 1);
+ dstU[i] = (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
+ dstV[i] = (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
}
}
-static void planar_rgb_to_y(uint8_t *dst, const uint8_t *src[4], int width)
+static void planar_rgb_to_y(uint8_t *_dst, const uint8_t *src[4], int width)
{
+ uint16_t *dst = (uint16_t *)_dst;
int i;
for (i = 0; i < width; i++) {
int g = src[0][i];
int b = src[1][i];
int r = src[2][i];
- dst[i] = ((RY * r + GY * g + BY * b + (33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
+ dst[i] = (RY*r + GY*g + BY*b + (0x801<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
}
}
-static void planar_rgb_to_uv(uint8_t *dstU, uint8_t *dstV, const uint8_t *src[4], int width)
+static void planar_rgb_to_uv(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *src[4], int width)
{
+ uint16_t *dstU = (uint16_t *)_dstU;
+ uint16_t *dstV = (uint16_t *)_dstV;
int i;
for (i = 0; i < width; i++) {
int g = src[0][i];
int b = src[1][i];
int r = src[2][i];
- dstU[i] = (RU * r + GU * g + BU * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
- dstV[i] = (RV * r + GV * g + BV * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT;
+ dstU[i] = (RU*r + GU*g + BU*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
+ dstV[i] = (RV*r + GV*g + BV*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
}
}
@@ -603,6 +753,26 @@ static void planar_rgb10be_to_y(uint8_t *dst, const uint8_t *src[4], int w)
planar_rgb16_to_y(dst, src, w, 10, 1);
}
+static void planar_rgb12le_to_y(uint8_t *dst, const uint8_t *src[4], int w)
+{
+ planar_rgb16_to_y(dst, src, w, 12, 0);
+}
+
+static void planar_rgb12be_to_y(uint8_t *dst, const uint8_t *src[4], int w)
+{
+ planar_rgb16_to_y(dst, src, w, 12, 1);
+}
+
+static void planar_rgb14le_to_y(uint8_t *dst, const uint8_t *src[4], int w)
+{
+ planar_rgb16_to_y(dst, src, w, 14, 0);
+}
+
+static void planar_rgb14be_to_y(uint8_t *dst, const uint8_t *src[4], int w)
+{
+ planar_rgb16_to_y(dst, src, w, 14, 1);
+}
+
static void planar_rgb16le_to_y(uint8_t *dst, const uint8_t *src[4], int w)
{
planar_rgb16_to_y(dst, src, w, 16, 0);
@@ -656,6 +826,30 @@ static void planar_rgb10be_to_uv(uint8_t *dstU, uint8_t *dstV,
planar_rgb16_to_uv(dstU, dstV, src, w, 10, 1);
}
+static void planar_rgb12le_to_uv(uint8_t *dstU, uint8_t *dstV,
+ const uint8_t *src[4], int w)
+{
+ planar_rgb16_to_uv(dstU, dstV, src, w, 12, 0);
+}
+
+static void planar_rgb12be_to_uv(uint8_t *dstU, uint8_t *dstV,
+ const uint8_t *src[4], int w)
+{
+ planar_rgb16_to_uv(dstU, dstV, src, w, 12, 1);
+}
+
+static void planar_rgb14le_to_uv(uint8_t *dstU, uint8_t *dstV,
+ const uint8_t *src[4], int w)
+{
+ planar_rgb16_to_uv(dstU, dstV, src, w, 14, 0);
+}
+
+static void planar_rgb14be_to_uv(uint8_t *dstU, uint8_t *dstV,
+ const uint8_t *src[4], int w)
+{
+ planar_rgb16_to_uv(dstU, dstV, src, w, 14, 1);
+}
+
static void planar_rgb16le_to_uv(uint8_t *dstU, uint8_t *dstV,
const uint8_t *src[4], int w)
{
@@ -699,6 +893,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
case PIX_FMT_GBRP10LE:
c->readChrPlanar = planar_rgb10le_to_uv;
break;
+ case PIX_FMT_GBRP12LE:
+ c->readChrPlanar = planar_rgb12le_to_uv;
+ break;
+ case PIX_FMT_GBRP14LE:
+ c->readChrPlanar = planar_rgb14le_to_uv;
+ break;
case PIX_FMT_GBRP16LE:
c->readChrPlanar = planar_rgb16le_to_uv;
break;
@@ -708,6 +908,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
case PIX_FMT_GBRP10BE:
c->readChrPlanar = planar_rgb10be_to_uv;
break;
+ case PIX_FMT_GBRP12BE:
+ c->readChrPlanar = planar_rgb12be_to_uv;
+ break;
+ case PIX_FMT_GBRP14BE:
+ c->readChrPlanar = planar_rgb14be_to_uv;
+ break;
case PIX_FMT_GBRP16BE:
c->readChrPlanar = planar_rgb16be_to_uv;
break;
@@ -721,6 +927,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
case PIX_FMT_YUV422P10LE:
case PIX_FMT_YUV444P10LE:
case PIX_FMT_YUV420P10LE:
+ case PIX_FMT_YUV422P12LE:
+ case PIX_FMT_YUV444P12LE:
+ case PIX_FMT_YUV420P12LE:
+ case PIX_FMT_YUV422P14LE:
+ case PIX_FMT_YUV444P14LE:
+ case PIX_FMT_YUV420P14LE:
case PIX_FMT_YUV420P16LE:
case PIX_FMT_YUV422P16LE:
case PIX_FMT_YUV444P16LE:
@@ -733,6 +945,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
case PIX_FMT_YUV444P10BE:
case PIX_FMT_YUV422P10BE:
case PIX_FMT_YUV420P10BE:
+ case PIX_FMT_YUV444P12BE:
+ case PIX_FMT_YUV422P12BE:
+ case PIX_FMT_YUV420P12BE:
+ case PIX_FMT_YUV444P14BE:
+ case PIX_FMT_YUV422P14BE:
+ case PIX_FMT_YUV420P14BE:
case PIX_FMT_YUV420P16BE:
case PIX_FMT_YUV422P16BE:
case PIX_FMT_YUV444P16BE:
@@ -742,6 +960,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
}
if (c->chrSrcHSubSample) {
switch (srcFormat) {
+ case PIX_FMT_RGBA64BE:
+ c->chrToYV12 = rgb64BEToUV_half_c;
+ break;
+ case PIX_FMT_RGBA64LE:
+ c->chrToYV12 = rgb64LEToUV_half_c;
+ break;
case PIX_FMT_RGB48BE:
c->chrToYV12 = rgb48BEToUV_half_c;
break;
@@ -775,6 +999,9 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
case PIX_FMT_BGR555BE:
c->chrToYV12 = bgr15beToUV_half_c;
break;
+ case PIX_FMT_GBR24P :
+ c->chrToYV12 = gbr24pToUV_half_c;
+ break;
case PIX_FMT_BGR444LE:
c->chrToYV12 = bgr12leToUV_half_c;
break;
@@ -811,6 +1038,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
}
} else {
switch (srcFormat) {
+ case PIX_FMT_RGBA64BE:
+ c->chrToYV12 = rgb64BEToUV_c;
+ break;
+ case PIX_FMT_RGBA64LE:
+ c->chrToYV12 = rgb64LEToUV_c;
+ break;
case PIX_FMT_RGB48BE:
c->chrToYV12 = rgb48BEToUV_c;
break;
@@ -889,6 +1122,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
case PIX_FMT_GBRP10LE:
c->readLumPlanar = planar_rgb10le_to_y;
break;
+ case PIX_FMT_GBRP12LE:
+ c->readLumPlanar = planar_rgb12le_to_y;
+ break;
+ case PIX_FMT_GBRP14LE:
+ c->readLumPlanar = planar_rgb14le_to_y;
+ break;
case PIX_FMT_GBRP16LE:
c->readLumPlanar = planar_rgb16le_to_y;
break;
@@ -898,6 +1137,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
case PIX_FMT_GBRP10BE:
c->readLumPlanar = planar_rgb10be_to_y;
break;
+ case PIX_FMT_GBRP12BE:
+ c->readLumPlanar = planar_rgb12be_to_y;
+ break;
+ case PIX_FMT_GBRP14BE:
+ c->readLumPlanar = planar_rgb14be_to_y;
+ break;
case PIX_FMT_GBRP16BE:
c->readLumPlanar = planar_rgb16be_to_y;
break;
@@ -911,6 +1156,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
case PIX_FMT_YUV444P10LE:
case PIX_FMT_YUV422P10LE:
case PIX_FMT_YUV420P10LE:
+ case PIX_FMT_YUV444P12LE:
+ case PIX_FMT_YUV422P12LE:
+ case PIX_FMT_YUV420P12LE:
+ case PIX_FMT_YUV444P14LE:
+ case PIX_FMT_YUV422P14LE:
+ case PIX_FMT_YUV420P14LE:
case PIX_FMT_YUV420P16LE:
case PIX_FMT_YUV422P16LE:
case PIX_FMT_YUV444P16LE:
@@ -924,6 +1175,12 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
case PIX_FMT_YUV444P10BE:
case PIX_FMT_YUV422P10BE:
case PIX_FMT_YUV420P10BE:
+ case PIX_FMT_YUV444P12BE:
+ case PIX_FMT_YUV422P12BE:
+ case PIX_FMT_YUV420P12BE:
+ case PIX_FMT_YUV444P14BE:
+ case PIX_FMT_YUV422P14BE:
+ case PIX_FMT_YUV420P14BE:
case PIX_FMT_YUV420P16BE:
case PIX_FMT_YUV422P16BE:
case PIX_FMT_YUV444P16BE:
@@ -1017,9 +1274,17 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
case PIX_FMT_BGR48LE:
c->lumToYV12 = bgr48LEToY_c;
break;
+ case PIX_FMT_RGBA64BE:
+ c->lumToYV12 = rgb64BEToY_c;
+ break;
+ case PIX_FMT_RGBA64LE:
+ c->lumToYV12 = rgb64LEToY_c;
+ break;
}
if (c->alpPixBuf) {
switch (srcFormat) {
+ case PIX_FMT_RGBA64LE:
+ case PIX_FMT_RGBA64BE: c->alpToYV12 = rgba64ToA_c; break;
case PIX_FMT_BGRA:
case PIX_FMT_RGBA:
c->alpToYV12 = rgbaToA_c;
@@ -1031,6 +1296,9 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
case PIX_FMT_Y400A:
c->alpToYV12 = uyvyToY_c;
break;
+ case PIX_FMT_PAL8 :
+ c->alpToYV12 = palToA_c;
+ break;
}
}
}
diff --git a/libswscale/options.c b/libswscale/options.c
index 7ed5254aa8..c02c084a87 100644
--- a/libswscale/options.c
+++ b/libswscale/options.c
@@ -1,20 +1,20 @@
/*
* Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -66,7 +66,12 @@ static const AVOption options[] = {
{ NULL }
};
-const AVClass sws_context_class = { "SWScaler", sws_context_to_name, options };
+const AVClass sws_context_class = {
+ .class_name = "SWScaler",
+ .item_name = sws_context_to_name,
+ .option = options,
+ .category = AV_CLASS_CATEGORY_SWSCALER,
+};
const AVClass *sws_get_class(void)
{
diff --git a/libswscale/output.c b/libswscale/output.c
index 43d5435b98..8955665f66 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -1,20 +1,20 @@
/*
- * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (C) 2001-2012 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -26,6 +26,7 @@
#include "libavutil/attributes.h"
#include "libavutil/avutil.h"
+#include "libavutil/avassert.h"
#include "libavutil/bswap.h"
#include "libavutil/cpu.h"
#include "libavutil/intreadwrite.h"
@@ -36,24 +37,27 @@
#include "swscale.h"
#include "swscale_internal.h"
-DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
+DECLARE_ALIGNED(8, const uint8_t, dither_2x2_4)[][8]={
{ 1, 3, 1, 3, 1, 3, 1, 3, },
{ 2, 0, 2, 0, 2, 0, 2, 0, },
+{ 1, 3, 1, 3, 1, 3, 1, 3, },
};
-DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
+DECLARE_ALIGNED(8, const uint8_t, dither_2x2_8)[][8]={
{ 6, 2, 6, 2, 6, 2, 6, 2, },
{ 0, 4, 0, 4, 0, 4, 0, 4, },
+{ 6, 2, 6, 2, 6, 2, 6, 2, },
};
-DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
+DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[][8]={
{ 8, 4, 11, 7, 8, 4, 11, 7, },
{ 2, 14, 1, 13, 2, 14, 1, 13, },
{ 10, 6, 9, 5, 10, 6, 9, 5, },
{ 0, 12, 3, 15, 0, 12, 3, 15, },
+{ 8, 4, 11, 7, 8, 4, 11, 7, },
};
-DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
+DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[][8]={
{ 17, 9, 23, 15, 16, 8, 22, 14, },
{ 5, 29, 3, 27, 4, 28, 2, 26, },
{ 21, 13, 19, 11, 20, 12, 18, 10, },
@@ -62,9 +66,10 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
{ 4, 28, 2, 26, 5, 29, 3, 27, },
{ 20, 12, 18, 10, 21, 13, 19, 11, },
{ 1, 25, 7, 31, 0, 24, 6, 30, },
+{ 17, 9, 23, 15, 16, 8, 22, 14, },
};
-DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
+DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[][8]={
{ 0, 55, 14, 68, 3, 58, 17, 72, },
{ 37, 18, 50, 32, 40, 22, 54, 35, },
{ 9, 64, 5, 59, 13, 67, 8, 63, },
@@ -73,10 +78,11 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
{ 39, 21, 52, 34, 38, 19, 51, 33, },
{ 11, 66, 7, 62, 10, 65, 6, 60, },
{ 48, 30, 43, 25, 47, 29, 42, 24, },
+{ 0, 55, 14, 68, 3, 58, 17, 72, },
};
#if 1
-DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
+DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[][8]={
{117, 62, 158, 103, 113, 58, 155, 100, },
{ 34, 199, 21, 186, 31, 196, 17, 182, },
{144, 89, 131, 76, 141, 86, 127, 72, },
@@ -85,10 +91,11 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
{ 28, 193, 14, 179, 38, 203, 24, 189, },
{138, 83, 124, 69, 148, 93, 134, 79, },
{ 7, 172, 48, 213, 3, 168, 45, 210, },
+{117, 62, 158, 103, 113, 58, 155, 100, },
};
#elif 1
// tries to correct a gamma of 1.5
-DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
+DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[][8]={
{ 0, 143, 18, 200, 2, 156, 25, 215, },
{ 78, 28, 125, 64, 89, 36, 138, 74, },
{ 10, 180, 3, 161, 16, 195, 8, 175, },
@@ -97,10 +104,11 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
{ 85, 33, 134, 71, 81, 30, 130, 67, },
{ 14, 190, 6, 171, 12, 185, 5, 166, },
{117, 57, 101, 44, 113, 54, 97, 41, },
+{ 0, 143, 18, 200, 2, 156, 25, 215, },
};
#elif 1
// tries to correct a gamma of 2.0
-DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
+DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[][8]={
{ 0, 124, 8, 193, 0, 140, 12, 213, },
{ 55, 14, 104, 42, 66, 19, 119, 52, },
{ 3, 168, 1, 145, 6, 187, 3, 162, },
@@ -109,10 +117,11 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
{ 62, 17, 114, 48, 58, 16, 109, 45, },
{ 5, 181, 2, 157, 4, 175, 1, 151, },
{ 95, 36, 78, 26, 90, 34, 74, 24, },
+{ 0, 124, 8, 193, 0, 140, 12, 213, },
};
#else
// tries to correct a gamma of 2.5
-DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
+DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[][8]={
{ 0, 107, 3, 187, 0, 125, 6, 212, },
{ 39, 7, 86, 28, 49, 11, 102, 36, },
{ 1, 158, 0, 131, 3, 180, 1, 151, },
@@ -121,6 +130,7 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
{ 45, 9, 96, 33, 42, 8, 91, 30, },
{ 2, 172, 1, 144, 2, 165, 0, 137, },
{ 77, 23, 60, 15, 72, 21, 56, 14, },
+{ 0, 107, 3, 187, 0, 125, 6, 212, },
};
#endif
@@ -136,7 +146,8 @@ yuv2plane1_16_c_template(const int32_t *src, uint16_t *dest, int dstW,
int big_endian, int output_bits)
{
int i;
- int shift = 19 - output_bits;
+ int shift = 3;
+ av_assert0(output_bits == 16);
for (i = 0; i < dstW; i++) {
int val = src[i] + (1 << (shift - 1));
@@ -150,10 +161,11 @@ yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
int big_endian, int output_bits)
{
int i;
- int shift = 15 + 16 - output_bits;
+ int shift = 15;
+ av_assert0(output_bits == 16);
for (i = 0; i < dstW; i++) {
- int val = 1 << (30-output_bits);
+ int val = 1 << (shift - 1);
int j;
/* range of val is [0,0x7FFFFFFF], so 31 bits, but with lanczos/spline
@@ -200,7 +212,7 @@ yuv2planeX_10_c_template(const int16_t *filter, int filterSize,
int shift = 11 + 16 - output_bits;
for (i = 0; i < dstW; i++) {
- int val = 1 << (26-output_bits);
+ int val = 1 << (shift - 1);
int j;
for (j = 0; j < filterSize; j++)
@@ -393,14 +405,14 @@ yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
for (i = 0; i < dstW; i += 8) {
int acc = 0;
- accumulate_bit(acc, (buf0[i + 0] >> 7) + d128[0]);
- accumulate_bit(acc, (buf0[i + 1] >> 7) + d128[1]);
- accumulate_bit(acc, (buf0[i + 2] >> 7) + d128[2]);
- accumulate_bit(acc, (buf0[i + 3] >> 7) + d128[3]);
- accumulate_bit(acc, (buf0[i + 4] >> 7) + d128[4]);
- accumulate_bit(acc, (buf0[i + 5] >> 7) + d128[5]);
- accumulate_bit(acc, (buf0[i + 6] >> 7) + d128[6]);
- accumulate_bit(acc, (buf0[i + 7] >> 7) + d128[7]);
+ accumulate_bit(acc, ((buf0[i + 0] + 64) >> 7) + d128[0]);
+ accumulate_bit(acc, ((buf0[i + 1] + 64) >> 7) + d128[1]);
+ accumulate_bit(acc, ((buf0[i + 2] + 64) >> 7) + d128[2]);
+ accumulate_bit(acc, ((buf0[i + 3] + 64) >> 7) + d128[3]);
+ accumulate_bit(acc, ((buf0[i + 4] + 64) >> 7) + d128[4]);
+ accumulate_bit(acc, ((buf0[i + 5] + 64) >> 7) + d128[5]);
+ accumulate_bit(acc, ((buf0[i + 6] + 64) >> 7) + d128[6]);
+ accumulate_bit(acc, ((buf0[i + 7] + 64) >> 7) + d128[7]);
output_pixel(*dest++, acc);
}
@@ -516,10 +528,12 @@ yuv2422_2_c_template(SwsContext *c, const int16_t *buf[2],
int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
- Y1 = av_clip_uint8(Y1);
- Y2 = av_clip_uint8(Y2);
- U = av_clip_uint8(U);
- V = av_clip_uint8(V);
+ if ((Y1 | Y2 | U | V) & 0x100) {
+ Y1 = av_clip_uint8(Y1);
+ Y2 = av_clip_uint8(Y2);
+ U = av_clip_uint8(U);
+ V = av_clip_uint8(V);
+ }
output_pixels(i * 4, Y1, U, Y2, V);
}
@@ -536,10 +550,17 @@ yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
if (uvalpha < 2048) {
for (i = 0; i < ((dstW + 1) >> 1); i++) {
- int Y1 = buf0[i * 2] >> 7;
- int Y2 = buf0[i * 2 + 1] >> 7;
- int U = ubuf0[i] >> 7;
- int V = vbuf0[i] >> 7;
+ int Y1 = (buf0[i * 2 ]+64) >> 7;
+ int Y2 = (buf0[i * 2 + 1]+64) >> 7;
+ int U = (ubuf0[i] +64) >> 7;
+ int V = (vbuf0[i] +64) >> 7;
+
+ if ((Y1 | Y2 | U | V) & 0x100) {
+ Y1 = av_clip_uint8(Y1);
+ Y2 = av_clip_uint8(Y2);
+ U = av_clip_uint8(U);
+ V = av_clip_uint8(V);
+ }
Y1 = av_clip_uint8(Y1);
Y2 = av_clip_uint8(Y2);
@@ -551,10 +572,17 @@ yuv2422_1_c_template(SwsContext *c, const int16_t *buf0,
} else {
const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
for (i = 0; i < ((dstW + 1) >> 1); i++) {
- int Y1 = buf0[i * 2] >> 7;
- int Y2 = buf0[i * 2 + 1] >> 7;
- int U = (ubuf0[i] + ubuf1[i]) >> 8;
- int V = (vbuf0[i] + vbuf1[i]) >> 8;
+ int Y1 = (buf0[i * 2 ] + 64) >> 7;
+ int Y2 = (buf0[i * 2 + 1] + 64) >> 7;
+ int U = (ubuf0[i] + ubuf1[i]+128) >> 8;
+ int V = (vbuf0[i] + vbuf1[i]+128) >> 8;
+
+ if ((Y1 | Y2 | U | V) & 0x100) {
+ Y1 = av_clip_uint8(Y1);
+ Y2 = av_clip_uint8(Y2);
+ U = av_clip_uint8(U);
+ V = av_clip_uint8(V);
+ }
Y1 = av_clip_uint8(Y1);
Y2 = av_clip_uint8(Y2);
@@ -812,7 +840,7 @@ YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE)
* correct RGB values into the destination buffer.
*/
static av_always_inline void
-yuv2rgb_write(uint8_t *_dest, int i, unsigned Y1, unsigned Y2,
+yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
unsigned A1, unsigned A2,
const void *_r, const void *_g, const void *_b, int y,
enum PixelFormat target, int hasAlpha)
@@ -848,6 +876,7 @@ yuv2rgb_write(uint8_t *_dest, int i, unsigned Y1, unsigned Y2,
#define r_b ((target == PIX_FMT_RGB24) ? r : b)
#define b_r ((target == PIX_FMT_RGB24) ? b : r)
+
dest[i * 6 + 0] = r_b[Y1];
dest[i * 6 + 1] = g[Y1];
dest[i * 6 + 2] = b_r[Y1];
@@ -953,12 +982,6 @@ yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
Y2 >>= 19;
U >>= 19;
V >>= 19;
- if ((Y1 | Y2 | U | V) & 0x100) {
- Y1 = av_clip_uint8(Y1);
- Y2 = av_clip_uint8(Y2);
- U = av_clip_uint8(U);
- V = av_clip_uint8(V);
- }
if (hasAlpha) {
A1 = 1 << 18;
A2 = 1 << 18;
@@ -974,10 +997,9 @@ yuv2rgb_X_c_template(SwsContext *c, const int16_t *lumFilter,
}
}
- /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
- r = c->table_rV[V];
- g = (c->table_gU[U] + c->table_gV[V]);
- b = c->table_bU[U];
+ r = c->table_rV[V + YUVRGB_TABLE_HEADROOM];
+ g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]);
+ b = c->table_bU[U + YUVRGB_TABLE_HEADROOM];
yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
r, g, b, y, target, hasAlpha);
@@ -1006,16 +1028,9 @@ yuv2rgb_2_c_template(SwsContext *c, const int16_t *buf[2],
int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
int A1, A2;
- const void *r, *g, *b;
-
- Y1 = av_clip_uint8(Y1);
- Y2 = av_clip_uint8(Y2);
- U = av_clip_uint8(U);
- V = av_clip_uint8(V);
-
- r = c->table_rV[V];
- g = (c->table_gU[U] + c->table_gV[V]);
- b = c->table_bU[U];
+ const void *r = c->table_rV[V + YUVRGB_TABLE_HEADROOM],
+ *g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]),
+ *b = c->table_bU[U + YUVRGB_TABLE_HEADROOM];
if (hasAlpha) {
A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
@@ -1041,25 +1056,18 @@ yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
if (uvalpha < 2048) {
for (i = 0; i < ((dstW + 1) >> 1); i++) {
- int Y1 = buf0[i * 2] >> 7;
- int Y2 = buf0[i * 2 + 1] >> 7;
- int U = ubuf0[i] >> 7;
- int V = vbuf0[i] >> 7;
+ int Y1 = (buf0[i * 2 ] + 64) >> 7;
+ int Y2 = (buf0[i * 2 + 1] + 64) >> 7;
+ int U = (ubuf0[i] + 64) >> 7;
+ int V = (vbuf0[i] + 64) >> 7;
int A1, A2;
- const void *r, *g, *b;
-
- Y1 = av_clip_uint8(Y1);
- Y2 = av_clip_uint8(Y2);
- U = av_clip_uint8(U);
- V = av_clip_uint8(V);
-
- r = c->table_rV[V];
- g = (c->table_gU[U] + c->table_gV[V]);
- b = c->table_bU[U];
+ const void *r = c->table_rV[V + YUVRGB_TABLE_HEADROOM],
+ *g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]),
+ *b = c->table_bU[U + YUVRGB_TABLE_HEADROOM];
if (hasAlpha) {
- A1 = abuf0[i * 2 ] >> 7;
- A2 = abuf0[i * 2 + 1] >> 7;
+ A1 = abuf0[i * 2 ] * 255 + 16384 >> 15;
+ A2 = abuf0[i * 2 + 1] * 255 + 16384 >> 15;
A1 = av_clip_uint8(A1);
A2 = av_clip_uint8(A2);
}
@@ -1070,25 +1078,18 @@ yuv2rgb_1_c_template(SwsContext *c, const int16_t *buf0,
} else {
const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
for (i = 0; i < ((dstW + 1) >> 1); i++) {
- int Y1 = buf0[i * 2] >> 7;
- int Y2 = buf0[i * 2 + 1] >> 7;
- int U = (ubuf0[i] + ubuf1[i]) >> 8;
- int V = (vbuf0[i] + vbuf1[i]) >> 8;
+ int Y1 = (buf0[i * 2 ] + 64) >> 7;
+ int Y2 = (buf0[i * 2 + 1] + 64) >> 7;
+ int U = (ubuf0[i] + ubuf1[i] + 128) >> 8;
+ int V = (vbuf0[i] + vbuf1[i] + 128) >> 8;
int A1, A2;
- const void *r, *g, *b;
-
- Y1 = av_clip_uint8(Y1);
- Y2 = av_clip_uint8(Y2);
- U = av_clip_uint8(U);
- V = av_clip_uint8(V);
-
- r = c->table_rV[V];
- g = (c->table_gU[U] + c->table_gV[V]);
- b = c->table_bU[U];
+ const void *r = c->table_rV[V + YUVRGB_TABLE_HEADROOM],
+ *g = (c->table_gU[U + YUVRGB_TABLE_HEADROOM] + c->table_gV[V + YUVRGB_TABLE_HEADROOM]),
+ *b = c->table_bU[U + YUVRGB_TABLE_HEADROOM];
if (hasAlpha) {
- A1 = abuf0[i * 2 ] >> 7;
- A2 = abuf0[i * 2 + 1] >> 7;
+ A1 = (abuf0[i * 2 ] + 64) >> 7;
+ A2 = (abuf0[i * 2 + 1] + 64) >> 7;
A1 = av_clip_uint8(A1);
A2 = av_clip_uint8(A2);
}
@@ -1164,9 +1165,9 @@ yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
for (i = 0; i < dstW; i++) {
int j;
- int Y = 0;
- int U = -128 << 19;
- int V = -128 << 19;
+ int Y = 1<<9;
+ int U = (1<<9)-(128 << 19);
+ int V = (1<<9)-(128 << 19);
int R, G, B, A;
for (j = 0; j < lumFilterSize; j++) {
@@ -1180,7 +1181,7 @@ yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
U >>= 10;
V >>= 10;
if (hasAlpha) {
- A = 1 << 21;
+ A = 1 << 18;
for (j = 0; j < lumFilterSize; j++) {
A += alpSrc[j][i] * lumFilter[j];
}
@@ -1223,7 +1224,6 @@ yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
dest[1] = B >> 22;
dest[2] = G >> 22;
dest[3] = R >> 22;
- dest += 4;
break;
case PIX_FMT_BGR24:
dest[0] = B >> 22;
@@ -1354,7 +1354,10 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c,
*yuv2packedX = yuv2bgr24_full_X_c;
break;
}
+ if(!*yuv2packedX)
+ goto YUV_PACKED;
} else {
+ YUV_PACKED:
switch (dstFormat) {
case PIX_FMT_RGB48LE:
*yuv2packed1 = yuv2rgb48le_1_c;
diff --git a/libswscale/ppc/swscale_altivec.c b/libswscale/ppc/swscale_altivec.c
index 0e66ec1f7b..c621866eed 100644
--- a/libswscale/ppc/swscale_altivec.c
+++ b/libswscale/ppc/swscale_altivec.c
@@ -4,20 +4,20 @@
* Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
* based on the equivalent C code in swscale.c
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -318,7 +318,7 @@ av_cold void ff_sws_init_swScale_altivec(SwsContext *c)
if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
return;
- if (c->srcBpc == 8 && c->dstBpc <= 10) {
+ if (c->srcBpc == 8 && c->dstBpc <= 14) {
c->hyScale = c->hcScale = hScale_altivec_real;
}
if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) &&
diff --git a/libswscale/ppc/yuv2rgb_altivec.c b/libswscale/ppc/yuv2rgb_altivec.c
index 380c76f4d1..ade134d779 100644
--- a/libswscale/ppc/yuv2rgb_altivec.c
+++ b/libswscale/ppc/yuv2rgb_altivec.c
@@ -3,20 +3,20 @@
*
* copyright (C) 2004 Marc Hoffman <marc.hoffman@analog.com>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -97,6 +97,7 @@
#include "libswscale/swscale_internal.h"
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
+#include "libavutil/pixdesc.h"
#include "yuv2rgb_altivec.h"
#undef PROFILE_THE_BEAST
@@ -317,12 +318,7 @@ static int altivec_ ## name(SwsContext *c, const unsigned char **in, \
const ubyte *ui = in[1]; \
const ubyte *vi = in[2]; \
\
- vector unsigned char *oute = \
- (vector unsigned char *) \
- (oplanes[0] + srcSliceY * outstrides[0]); \
- vector unsigned char *outo = \
- (vector unsigned char *) \
- (oplanes[0] + srcSliceY * outstrides[0] + outstrides[0]); \
+ vector unsigned char *oute, *outo; \
\
/* loop moves y{1, 2}i by w */ \
instrides_scl[0] = instrides[0] * 2 - w; \
@@ -332,6 +328,9 @@ static int altivec_ ## name(SwsContext *c, const unsigned char **in, \
instrides_scl[2] = instrides[2] - w / 2; \
\
for (i = 0; i < h / 2; i++) { \
+ oute = (vector unsigned char *)(oplanes[0] + outstrides[0] * \
+ (srcSliceY + i * 2)); \
+ outo = oute + (outstrides[0] >> 4); \
vec_dstst(outo, (0x02000002 | (((w * 3 + 32) / 32) << 16)), 0); \
vec_dstst(oute, (0x02000002 | (((w * 3 + 32) / 32) << 16)), 1); \
\
@@ -429,9 +428,6 @@ static int altivec_ ## name(SwsContext *c, const unsigned char **in, \
vi += 8; \
} \
\
- outo += (outstrides[0]) >> 4; \
- oute += (outstrides[0]) >> 4; \
- \
ui += instrides_scl[1]; \
vi += instrides_scl[2]; \
y1i += instrides_scl[0]; \
@@ -736,7 +732,7 @@ static av_always_inline void ff_yuv2packedX_altivec(SwsContext *c,
if (!printed_error_message) {
av_log(c, AV_LOG_ERROR,
"altivec_yuv2packedX doesn't support %s output\n",
- sws_format_name(c->dstFormat));
+ av_get_pix_fmt_name(c->dstFormat));
printed_error_message = 1;
}
return;
@@ -824,7 +820,7 @@ static av_always_inline void ff_yuv2packedX_altivec(SwsContext *c,
/* Unreachable, I think. */
av_log(c, AV_LOG_ERROR,
"altivec_yuv2packedX doesn't support %s output\n",
- sws_format_name(c->dstFormat));
+ av_get_pix_fmt_name(c->dstFormat));
return;
}
diff --git a/libswscale/ppc/yuv2rgb_altivec.h b/libswscale/ppc/yuv2rgb_altivec.h
index 2c5e7ed876..aa52a4743e 100644
--- a/libswscale/ppc/yuv2rgb_altivec.h
+++ b/libswscale/ppc/yuv2rgb_altivec.h
@@ -4,20 +4,20 @@
* Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
* based on the equivalent C code in swscale.c
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libswscale/ppc/yuv2yuv_altivec.c b/libswscale/ppc/yuv2yuv_altivec.c
index 45d766bd02..af78782b54 100644
--- a/libswscale/ppc/yuv2yuv_altivec.c
+++ b/libswscale/ppc/yuv2yuv_altivec.c
@@ -4,20 +4,20 @@
* Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
* based on the equivalent C code in swscale.c
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
index 26ef64879c..321e5ff1b8 100644
--- a/libswscale/rgb2rgb.c
+++ b/libswscale/rgb2rgb.c
@@ -6,20 +6,20 @@
* Written by Nick Kurshev.
* palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -181,13 +181,13 @@ void rgb16tobgr32(const uint8_t *src, uint8_t *dst, int src_size)
register uint16_t bgr = *s++;
#if HAVE_BIGENDIAN
*d++ = 255;
- *d++ = (bgr & 0x1F) << 3;
- *d++ = (bgr & 0x7E0) >> 3;
- *d++ = (bgr & 0xF800) >> 8;
+ *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
+ *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
+ *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
#else
- *d++ = (bgr & 0xF800) >> 8;
- *d++ = (bgr & 0x7E0) >> 3;
- *d++ = (bgr & 0x1F) << 3;
+ *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
+ *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
+ *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
*d++ = 255;
#endif
}
@@ -220,9 +220,9 @@ void rgb16to24(const uint8_t *src, uint8_t *dst, int src_size)
while (s < end) {
register uint16_t bgr = *s++;
- *d++ = (bgr & 0xF800) >> 8;
- *d++ = (bgr & 0x7E0) >> 3;
- *d++ = (bgr & 0x1F) << 3;
+ *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
+ *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
+ *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
}
}
@@ -256,13 +256,13 @@ void rgb15tobgr32(const uint8_t *src, uint8_t *dst, int src_size)
register uint16_t bgr = *s++;
#if HAVE_BIGENDIAN
*d++ = 255;
- *d++ = (bgr & 0x1F) << 3;
- *d++ = (bgr & 0x3E0) >> 2;
- *d++ = (bgr & 0x7C00) >> 7;
+ *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
+ *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
+ *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
#else
- *d++ = (bgr & 0x7C00) >> 7;
- *d++ = (bgr & 0x3E0) >> 2;
- *d++ = (bgr & 0x1F) << 3;
+ *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
+ *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
+ *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
*d++ = 255;
#endif
}
@@ -276,9 +276,9 @@ void rgb15to24(const uint8_t *src, uint8_t *dst, int src_size)
while (s < end) {
register uint16_t bgr = *s++;
- *d++ = (bgr & 0x7C00) >> 7;
- *d++ = (bgr & 0x3E0) >> 2;
- *d++ = (bgr & 0x1F) << 3;
+ *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
+ *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
+ *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
}
}
@@ -315,18 +315,6 @@ void rgb12tobgr12(const uint8_t *src, uint8_t *dst, int src_size)
}
}
-void bgr8torgb8(const uint8_t *src, uint8_t *dst, int src_size)
-{
- int i, num_pixels = src_size;
-
- for (i = 0; i < num_pixels; i++) {
- register uint8_t rgb = src[i];
- unsigned r = (rgb & 0x07);
- unsigned g = (rgb & 0x38) >> 3;
- unsigned b = (rgb & 0xC0) >> 6;
- dst[i] = ((b << 1) & 0x07) | ((g & 0x07) << 3) | ((r & 0x03) << 6);
- }
-}
#define DEFINE_SHUFFLE_BYTES(a, b, c, d) \
void shuffle_bytes_ ## a ## b ## c ## d(const uint8_t *src, \
@@ -346,3 +334,57 @@ DEFINE_SHUFFLE_BYTES(0, 3, 2, 1)
DEFINE_SHUFFLE_BYTES(1, 2, 3, 0)
DEFINE_SHUFFLE_BYTES(3, 0, 1, 2)
DEFINE_SHUFFLE_BYTES(3, 2, 1, 0)
+
+#define DEFINE_RGB48TOBGR48(need_bswap, swap) \
+void rgb48tobgr48_ ## need_bswap(const uint8_t *src, \
+ uint8_t *dst, int src_size) \
+{ \
+ uint16_t *d = (uint16_t *)dst; \
+ uint16_t *s = (uint16_t *)src; \
+ int i, num_pixels = src_size >> 1; \
+ \
+ for (i = 0; i < num_pixels; i += 3) { \
+ d[i ] = swap ? av_bswap16(s[i + 2]) : s[i + 2]; \
+ d[i + 1] = swap ? av_bswap16(s[i + 1]) : s[i + 1]; \
+ d[i + 2] = swap ? av_bswap16(s[i ]) : s[i ]; \
+ } \
+}
+
+DEFINE_RGB48TOBGR48(nobswap, 0)
+DEFINE_RGB48TOBGR48(bswap, 1)
+
+#define DEFINE_RGB64TOBGR48(need_bswap, swap) \
+void rgb64tobgr48_ ## need_bswap(const uint8_t *src, \
+ uint8_t *dst, int src_size) \
+{ \
+ uint16_t *d = (uint16_t *)dst; \
+ uint16_t *s = (uint16_t *)src; \
+ int i, num_pixels = src_size >> 3; \
+ \
+ for (i = 0; i < num_pixels; i++) { \
+ d[3 * i ] = swap ? av_bswap16(s[4 * i + 2]) : s[4 * i + 2]; \
+ d[3 * i + 1] = swap ? av_bswap16(s[4 * i + 1]) : s[4 * i + 1]; \
+ d[3 * i + 2] = swap ? av_bswap16(s[4 * i ]) : s[4 * i ]; \
+ } \
+}
+
+DEFINE_RGB64TOBGR48(nobswap, 0)
+DEFINE_RGB64TOBGR48(bswap, 1)
+
+#define DEFINE_RGB64TO48(need_bswap, swap) \
+void rgb64to48_ ## need_bswap(const uint8_t *src, \
+ uint8_t *dst, int src_size) \
+{ \
+ uint16_t *d = (uint16_t *)dst; \
+ uint16_t *s = (uint16_t *)src; \
+ int i, num_pixels = src_size >> 3; \
+ \
+ for (i = 0; i < num_pixels; i++) { \
+ d[3 * i ] = swap ? av_bswap16(s[4 * i ]) : s[4 * i ]; \
+ d[3 * i + 1] = swap ? av_bswap16(s[4 * i + 1]) : s[4 * i + 1]; \
+ d[3 * i + 2] = swap ? av_bswap16(s[4 * i + 2]) : s[4 * i + 2]; \
+ } \
+}
+
+DEFINE_RGB64TO48(nobswap, 0)
+DEFINE_RGB64TO48(bswap, 1)
diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h
index 42f468fe21..e37f0fbbbe 100644
--- a/libswscale/rgb2rgb.h
+++ b/libswscale/rgb2rgb.h
@@ -6,20 +6,20 @@
* Written by Nick Kurshev.
* YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -52,6 +52,12 @@ extern void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int src_size);
extern void (*shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size);
+void rgb64tobgr48_nobswap(const uint8_t *src, uint8_t *dst, int src_size);
+void rgb64tobgr48_bswap(const uint8_t *src, uint8_t *dst, int src_size);
+void rgb48tobgr48_nobswap(const uint8_t *src, uint8_t *dst, int src_size);
+void rgb48tobgr48_bswap(const uint8_t *src, uint8_t *dst, int src_size);
+void rgb64to48_nobswap(const uint8_t *src, uint8_t *dst, int src_size);
+void rgb64to48_bswap(const uint8_t *src, uint8_t *dst, int src_size);
void rgb24to32(const uint8_t *src, uint8_t *dst, int src_size);
void rgb32to24(const uint8_t *src, uint8_t *dst, int src_size);
void rgb16tobgr32(const uint8_t *src, uint8_t *dst, int src_size);
@@ -64,7 +70,6 @@ void rgb15tobgr16(const uint8_t *src, uint8_t *dst, int src_size);
void rgb15tobgr15(const uint8_t *src, uint8_t *dst, int src_size);
void rgb12tobgr12(const uint8_t *src, uint8_t *dst, int src_size);
void rgb12to15(const uint8_t *src, uint8_t *dst, int src_size);
-void bgr8torgb8(const uint8_t *src, uint8_t *dst, int src_size);
void shuffle_bytes_0321(const uint8_t *src, uint8_t *dst, int src_size);
void shuffle_bytes_1230(const uint8_t *src, uint8_t *dst, int src_size);
diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c
index d1a43e01cb..c05cdc8549 100644
--- a/libswscale/rgb2rgb_template.c
+++ b/libswscale/rgb2rgb_template.c
@@ -7,20 +7,20 @@
* palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
* lot of big-endian byte order fixes by Alex Beregszaszi
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -238,27 +238,6 @@ static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, int src_size)
}
}
-/*
- * I use less accurate approximation here by simply left-shifting the input
- * value and filling the low order bits with zeroes. This method improves PNG
- * compression but this scheme cannot reproduce white exactly, since it does
- * not generate an all-ones maximum value; the net effect is to darken the
- * image slightly.
- *
- * The better method should be "left bit replication":
- *
- * 4 3 2 1 0
- * ---------
- * 1 1 0 1 1
- *
- * 7 6 5 4 3 2 1 0
- * ----------------
- * 1 1 0 1 1 1 1 0
- * |=======| |===|
- * | leftmost bits repeated to fill open bits
- * |
- * original bits
- */
static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst,
int src_size)
{
@@ -268,9 +247,9 @@ static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst,
while (s < end) {
register uint16_t bgr = *s++;
- *d++ = (bgr & 0x1F) << 3;
- *d++ = (bgr & 0x3E0) >> 2;
- *d++ = (bgr & 0x7C00) >> 7;
+ *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
+ *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
+ *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
}
}
@@ -283,9 +262,9 @@ static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst,
while (s < end) {
register uint16_t bgr = *s++;
- *d++ = (bgr & 0x1F) << 3;
- *d++ = (bgr & 0x7E0) >> 3;
- *d++ = (bgr & 0xF800) >> 8;
+ *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
+ *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
+ *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
}
}
@@ -299,13 +278,13 @@ static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, int src_size)
register uint16_t bgr = *s++;
#if HAVE_BIGENDIAN
*d++ = 255;
- *d++ = (bgr & 0x7C00) >> 7;
- *d++ = (bgr & 0x3E0) >> 2;
- *d++ = (bgr & 0x1F) << 3;
+ *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
+ *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
+ *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
#else
- *d++ = (bgr & 0x1F) << 3;
- *d++ = (bgr & 0x3E0) >> 2;
- *d++ = (bgr & 0x7C00) >> 7;
+ *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
+ *d++ = ((bgr&0x03E0)>>2) | ((bgr&0x03E0)>> 7);
+ *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
*d++ = 255;
#endif
}
@@ -321,13 +300,13 @@ static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, int src_size)
register uint16_t bgr = *s++;
#if HAVE_BIGENDIAN
*d++ = 255;
- *d++ = (bgr & 0xF800) >> 8;
- *d++ = (bgr & 0x7E0) >> 3;
- *d++ = (bgr & 0x1F) << 3;
+ *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
+ *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
+ *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
#else
- *d++ = (bgr & 0x1F) << 3;
- *d++ = (bgr & 0x7E0) >> 3;
- *d++ = (bgr & 0xF800) >> 8;
+ *d++ = ((bgr&0x001F)<<3) | ((bgr&0x001F)>> 2);
+ *d++ = ((bgr&0x07E0)>>3) | ((bgr&0x07E0)>> 9);
+ *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
*d++ = 255;
#endif
}
@@ -665,6 +644,9 @@ void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
ydst += lumStride;
src += srcStride;
+ if (y+1 == height)
+ break;
+
for (i = 0; i < chromWidth; i++) {
unsigned int b = src[6 * i + 0];
unsigned int g = src[6 * i + 1];
diff --git a/libswscale/sparc/yuv2rgb_vis.c b/libswscale/sparc/yuv2rgb_vis.c
index d7102a3a45..bb9ab22dbe 100644
--- a/libswscale/sparc/yuv2rgb_vis.c
+++ b/libswscale/sparc/yuv2rgb_vis.c
@@ -2,20 +2,20 @@
* VIS optimized software YUV to RGB converter
* Copyright (c) 2007 Denes Balatoni <dbalatoni@programozo.hu>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libswscale/swscale-test.c b/libswscale/swscale-test.c
index 3497dffbe0..ef6c55ce02 100644
--- a/libswscale/swscale-test.c
+++ b/libswscale/swscale-test.c
@@ -1,20 +1,20 @@
/*
- * Copyright (C) 2003 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (C) 2003-2011 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -103,6 +103,7 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h,
av_image_fill_linesizes(srcStride, srcFormat, srcW);
for (p = 0; p < 4; p++) {
+ srcStride[p] = FFALIGN(srcStride[p], 16);
if (srcStride[p])
src[p] = av_mallocz(srcStride[p] * srcH + 16);
if (srcStride[p] && !src[p]) {
@@ -136,6 +137,7 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h,
* allocated with av_malloc). */
/* An extra 16 bytes is being allocated because some scalers may write
* out of bounds. */
+ dstStride[i] = FFALIGN(dstStride[i], 16);
if (dstStride[i])
dst[i] = av_mallocz(dstStride[i] * dstH + 16);
if (dstStride[i] && !dst[i]) {
@@ -175,6 +177,7 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h,
ssdA = r->ssdA;
} else {
for (i = 0; i < 4; i++) {
+ refStride[i] = FFALIGN(refStride[i], 16);
if (refStride[i])
out[i] = av_mallocz(refStride[i] * h);
if (refStride[i] && !out[i]) {
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 9da250e1d1..ae79eb66e8 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1,20 +1,20 @@
/*
- * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -24,6 +24,7 @@
#include <stdio.h>
#include <string.h>
+#include "libavutil/avassert.h"
#include "libavutil/avutil.h"
#include "libavutil/bswap.h"
#include "libavutil/cpu.h"
@@ -71,6 +72,9 @@ static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW,
int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
int sh = bits - 4;
+ if((isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
+ sh= 9;
+
for (i = 0; i < dstW; i++) {
int j;
int srcPos = filterPos[i];
@@ -92,6 +96,9 @@ static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW,
const uint16_t *src = (const uint16_t *) _src;
int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
+ if(sh<15)
+ sh= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
+
for (i = 0; i < dstW; i++) {
int j;
int srcPos = filterPos[i];
@@ -208,7 +215,7 @@ static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
int i;
int32_t *dst = (int32_t *) _dst;
for (i = 0; i < width; i++)
- dst[i] = (dst[i] * 14071 + (33561947 << 4)) >> 14;
+ dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12;
}
static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
@@ -222,6 +229,8 @@ static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
dst[i] = (src[xx] << 7) + (src[xx + 1] - src[xx]) * xalpha;
xpos += xInc;
}
+ for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
+ dst[i] = src[srcW-1]*128;
}
// *** horizontal scale Y line to temp buffer
@@ -234,13 +243,13 @@ static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
uint8_t *formatConvBuffer,
uint32_t *pal, int isAlpha)
{
- void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) =
+ void (*toYV12)(uint8_t *, const uint8_t *, const uint8_t *, const uint8_t *, int, uint32_t *) =
isAlpha ? c->alpToYV12 : c->lumToYV12;
void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
const uint8_t *src = src_in[isAlpha ? 3 : 0];
if (toYV12) {
- toYV12(formatConvBuffer, src, srcW, pal);
+ toYV12(formatConvBuffer, src, src_in[1], src_in[2], srcW, pal);
src = formatConvBuffer;
} else if (c->readLumPlanar && !isAlpha) {
c->readLumPlanar(formatConvBuffer, src_in, srcW);
@@ -271,6 +280,10 @@ static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
dst2[i] = (src2[xx] * (xalpha ^ 127) + src2[xx + 1] * xalpha);
xpos += xInc;
}
+ for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
+ dst1[i] = src1[srcW-1]*128;
+ dst2[i] = src2[srcW-1]*128;
+ }
}
static av_always_inline void hcscale(SwsContext *c, int16_t *dst1,
@@ -285,13 +298,13 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1,
const uint8_t *src1 = src_in[1], *src2 = src_in[2];
if (c->chrToYV12) {
uint8_t *buf2 = formatConvBuffer +
- FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
- c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
- src1 = formatConvBuffer;
- src2 = buf2;
+ FFALIGN(srcW*2+78, 16);
+ c->chrToYV12(formatConvBuffer, buf2, src_in[0], src1, src2, srcW, pal);
+ src1= formatConvBuffer;
+ src2= buf2;
} else if (c->readChrPlanar) {
uint8_t *buf2 = formatConvBuffer +
- FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
+ FFALIGN(srcW*2+78, 16);
c->readChrPlanar(formatConvBuffer, buf2, src_in, srcW);
src1 = formatConvBuffer;
src2 = buf2;
@@ -392,8 +405,8 @@ static int swScale(SwsContext *c, const uint8_t *src[],
DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
- if (dstStride[0] % 8 != 0 || dstStride[1] % 8 != 0 ||
- dstStride[2] % 8 != 0 || dstStride[3] % 8 != 0) {
+ if (dstStride[0]%16 !=0 || dstStride[1]%16 !=0 ||
+ dstStride[2]%16 !=0 || dstStride[3]%16 != 0) {
static int warnedAlready = 0; // FIXME maybe move this into the context
if (flags & SWS_PRINT_INFO && !warnedAlready) {
av_log(c, AV_LOG_WARNING,
@@ -403,6 +416,18 @@ static int swScale(SwsContext *c, const uint8_t *src[],
}
}
+ if ((int)dst[0]%16 || (int)dst[1]%16 || (int)dst[2]%16 || (int)src[0]%16 || (int)src[1]%16 || (int)src[2]%16
+ || dstStride[0]%16 || dstStride[1]%16 || dstStride[2]%16 || dstStride[3]%16
+ || srcStride[0]%16 || srcStride[1]%16 || srcStride[2]%16 || srcStride[3]%16
+ ) {
+ static int warnedAlready=0;
+ int cpu_flags = av_get_cpu_flags();
+ if (HAVE_MMX2 && (cpu_flags & AV_CPU_FLAG_SSE2) && !warnedAlready){
+ av_log(c, AV_LOG_WARNING, "Warning: data is not aligned! This can lead to a speedloss\n");
+ warnedAlready=1;
+ }
+ }
+
/* Note the user might start scaling the picture in the middle so this
* will not get executed. This is not really intended but works
* currently, so people might do it. */
@@ -427,6 +452,7 @@ static int swScale(SwsContext *c, const uint8_t *src[],
dst[2] + dstStride[2] * chrDstY,
(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3] + dstStride[3] * dstY : NULL,
};
+ int use_mmx_vfilter= c->use_mmx_vfilter;
// First line needed as input
const int firstLumSrcY = FFMAX(1 - vLumFilterSize, vLumFilterPos[dstY]);
@@ -531,98 +557,74 @@ static int swScale(SwsContext *c, const uint8_t *src[],
* this array's tail */
ff_sws_init_output_funcs(c, &yuv2plane1, &yuv2planeX, &yuv2nv12cX,
&yuv2packed1, &yuv2packed2, &yuv2packedX);
+ use_mmx_vfilter= 0;
}
{
- const int16_t **lumSrcPtr = (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
- const int16_t **chrUSrcPtr = (const int16_t **)chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
- const int16_t **chrVSrcPtr = (const int16_t **)chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+ const int16_t **lumSrcPtr = (const int16_t **)(void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
+ const int16_t **chrUSrcPtr = (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+ const int16_t **chrVSrcPtr = (const int16_t **)(void*) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
const int16_t **alpSrcPtr = (CONFIG_SWSCALE_ALPHA && alpPixBuf) ?
- (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
-
- if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
- const int16_t **tmpY = (const int16_t **)lumPixBuf +
- 2 * vLumBufSize;
- int neg = -firstLumSrcY, i;
- int end = FFMIN(c->srcH - firstLumSrcY, vLumFilterSize);
- for (i = 0; i < neg; i++)
- tmpY[i] = lumSrcPtr[neg];
- for (; i < end; i++)
- tmpY[i] = lumSrcPtr[i];
- for (; i < vLumFilterSize; i++)
- tmpY[i] = tmpY[i - 1];
- lumSrcPtr = tmpY;
-
- if (alpSrcPtr) {
- const int16_t **tmpA = (const int16_t **)alpPixBuf +
- 2 * vLumBufSize;
- for (i = 0; i < neg; i++)
- tmpA[i] = alpSrcPtr[neg];
- for (; i < end; i++)
- tmpA[i] = alpSrcPtr[i];
- for (; i < vLumFilterSize; i++)
- tmpA[i] = tmpA[i - 1];
- alpSrcPtr = tmpA;
- }
- }
- if (firstChrSrcY < 0 ||
- firstChrSrcY + vChrFilterSize > c->chrSrcH) {
- const int16_t **tmpU = (const int16_t **)chrUPixBuf + 2 * vChrBufSize,
- **tmpV = (const int16_t **)chrVPixBuf + 2 * vChrBufSize;
- int neg = -firstChrSrcY, i;
- int end = FFMIN(c->chrSrcH - firstChrSrcY, vChrFilterSize);
- for (i = 0; i < neg; i++) {
- tmpU[i] = chrUSrcPtr[neg];
- tmpV[i] = chrVSrcPtr[neg];
- }
- for (; i < end; i++) {
- tmpU[i] = chrUSrcPtr[i];
- tmpV[i] = chrVSrcPtr[i];
- }
- for (; i < vChrFilterSize; i++) {
- tmpU[i] = tmpU[i - 1];
- tmpV[i] = tmpV[i - 1];
- }
- chrUSrcPtr = tmpU;
- chrVSrcPtr = tmpV;
- }
+ (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
+ int16_t *vLumFilter = c->vLumFilter;
+ int16_t *vChrFilter = c->vChrFilter;
if (isPlanarYUV(dstFormat) ||
(isGray(dstFormat) && !isALPHA(dstFormat))) { // YV12 like
const int chrSkipMask = (1 << c->chrDstVSubSample) - 1;
+ vLumFilter += dstY * vLumFilterSize;
+ vChrFilter += chrDstY * vChrFilterSize;
+
+// av_assert0(use_mmx_vfilter != (
+// yuv2planeX == yuv2planeX_10BE_c
+// || yuv2planeX == yuv2planeX_10LE_c
+// || yuv2planeX == yuv2planeX_9BE_c
+// || yuv2planeX == yuv2planeX_9LE_c
+// || yuv2planeX == yuv2planeX_16BE_c
+// || yuv2planeX == yuv2planeX_16LE_c
+// || yuv2planeX == yuv2planeX_8_c) || !ARCH_X86);
+
+ if(use_mmx_vfilter){
+ vLumFilter= c->lumMmxFilter;
+ vChrFilter= c->chrMmxFilter;
+ }
+
if (vLumFilterSize == 1) {
yuv2plane1(lumSrcPtr[0], dest[0], dstW, c->lumDither8, 0);
} else {
- yuv2planeX(vLumFilter + dstY * vLumFilterSize,
- vLumFilterSize, lumSrcPtr, dest[0],
+ yuv2planeX(vLumFilter, vLumFilterSize,
+ lumSrcPtr, dest[0],
dstW, c->lumDither8, 0);
}
if (!((dstY & chrSkipMask) || isGray(dstFormat))) {
if (yuv2nv12cX) {
- yuv2nv12cX(c, vChrFilter + chrDstY * vChrFilterSize,
+ yuv2nv12cX(c, vChrFilter,
vChrFilterSize, chrUSrcPtr, chrVSrcPtr,
dest[1], chrDstW);
} else if (vChrFilterSize == 1) {
yuv2plane1(chrUSrcPtr[0], dest[1], chrDstW, c->chrDither8, 0);
yuv2plane1(chrVSrcPtr[0], dest[2], chrDstW, c->chrDither8, 3);
} else {
- yuv2planeX(vChrFilter + chrDstY * vChrFilterSize,
+ yuv2planeX(vChrFilter,
vChrFilterSize, chrUSrcPtr, dest[1],
chrDstW, c->chrDither8, 0);
- yuv2planeX(vChrFilter + chrDstY * vChrFilterSize,
+ yuv2planeX(vChrFilter,
vChrFilterSize, chrVSrcPtr, dest[2],
- chrDstW, c->chrDither8, 3);
+ chrDstW, c->chrDither8, use_mmx_vfilter ? (c->uv_offx2 >> 1) : 3);
}
}
if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
+ if(use_mmx_vfilter){
+ vLumFilter= c->alpMmxFilter;
+ }
if (vLumFilterSize == 1) {
yuv2plane1(alpSrcPtr[0], dest[3], dstW,
c->lumDither8, 0);
} else {
- yuv2planeX(vLumFilter + dstY * vLumFilterSize,
+ yuv2planeX(vLumFilter,
vLumFilterSize, alpSrcPtr, dest[3],
dstW, c->lumDither8, 0);
}
@@ -687,8 +689,9 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
ff_sws_init_input_funcs(c);
+
if (c->srcBpc == 8) {
- if (c->dstBpc <= 10) {
+ if (c->dstBpc <= 14) {
c->hyScale = c->hcScale = hScale8To15_c;
if (c->flags & SWS_FAST_BILINEAR) {
c->hyscale_fast = hyscale_fast_c;
@@ -698,12 +701,12 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
c->hyScale = c->hcScale = hScale8To19_c;
}
} else {
- c->hyScale = c->hcScale = c->dstBpc > 10 ? hScale16To19_c
+ c->hyScale = c->hcScale = c->dstBpc > 14 ? hScale16To19_c
: hScale16To15_c;
}
if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
- if (c->dstBpc <= 10) {
+ if (c->dstBpc <= 14) {
if (c->srcRange) {
c->lumConvertRange = lumRangeFromJpeg_c;
c->chrConvertRange = chrRangeFromJpeg_c;
diff --git a/libswscale/swscale.h b/libswscale/swscale.h
index c3efd48b1a..da29d47e3a 100644
--- a/libswscale/swscale.h
+++ b/libswscale/swscale.h
@@ -1,20 +1,20 @@
/*
- * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -215,7 +215,13 @@ int sws_scale(struct SwsContext *c, const uint8_t *const srcSlice[],
uint8_t *const dst[], const int dstStride[]);
/**
- * @param inv_table the yuv2rgb coefficients, normally ff_yuv2rgb_coeffs[x]
+ * @param dstRange flag indicating the while-black range of the output (1=jpeg / 0=mpeg)
+ * @param srcRange flag indicating the while-black range of the input (1=jpeg / 0=mpeg)
+ * @param table the yuv2rgb coefficients describing the output yuv space, normally ff_yuv2rgb_coeffs[x]
+ * @param inv_table the yuv2rgb coefficients describing the input yuv space, normally ff_yuv2rgb_coeffs[x]
+ * @param brightness 16.16 fixed point brightness correction
+ * @param contrast 16.16 fixed point contrast correction
+ * @param saturation 16.16 fixed point saturation correction
* @return -1 if not supported
*/
int sws_setColorspaceDetails(struct SwsContext *c, const int inv_table[4],
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 2a7d2dbd1d..5a584f04ca 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -1,20 +1,20 @@
/*
- * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -34,10 +34,14 @@
#define STR(s) AV_TOSTRING(s) // AV_STRINGIFY is too long
+#define YUVRGB_TABLE_HEADROOM 128
+
#define FAST_BGR2YV12 // use 7-bit instead of 15-bit coefficients
#define MAX_FILTER_SIZE 256
+#define DITHER1XBPP
+
#if HAVE_BIGENDIAN
#define ALT32_CORR (-1)
#else
@@ -315,10 +319,10 @@ typedef struct SwsContext {
int dstY; ///< Last destination vertical line output from last slice.
int flags; ///< Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc...
void *yuvTable; // pointer to the yuv->rgb table start so it can be freed()
- uint8_t *table_rV[256];
- uint8_t *table_gU[256];
- int table_gV[256];
- uint8_t *table_bU[256];
+ uint8_t *table_rV[256 + 2*YUVRGB_TABLE_HEADROOM];
+ uint8_t *table_gU[256 + 2*YUVRGB_TABLE_HEADROOM];
+ int table_gV[256 + 2*YUVRGB_TABLE_HEADROOM];
+ uint8_t *table_bU[256 + 2*YUVRGB_TABLE_HEADROOM];
//Colorspace stuff
int contrast, brightness, saturation; // for sws_getColorspaceDetails
@@ -326,6 +330,8 @@ typedef struct SwsContext {
int dstColorspaceTable[4];
int srcRange; ///< 0 = MPG YUV range, 1 = JPG YUV range (source image).
int dstRange; ///< 0 = MPG YUV range, 1 = JPG YUV range (destination image).
+ int src0Alpha;
+ int dst0Alpha;
int yuv2rgb_y_offset;
int yuv2rgb_y_coeff;
int yuv2rgb_v2r_coeff;
@@ -382,8 +388,8 @@ typedef struct SwsContext {
// alignment of these values is not necessary, but merely here
// to maintain the same offset across x8632 and x86-64. Once we
// use proper offset macros in the asm, they can be removed.
- DECLARE_ALIGNED(8, ptrdiff_t, uv_off_px); ///< offset (in pixels) between u and v planes
- DECLARE_ALIGNED(8, ptrdiff_t, uv_off_byte); ///< offset (in bytes) between u and v planes
+ DECLARE_ALIGNED(8, ptrdiff_t, uv_off); ///< offset (in pixels) between u and v planes
+ DECLARE_ALIGNED(8, ptrdiff_t, uv_offx2); ///< offset (in bytes) between u and v planes
DECLARE_ALIGNED(8, uint16_t, dither16)[8];
DECLARE_ALIGNED(8, uint32_t, dither32)[8];
@@ -417,6 +423,7 @@ typedef struct SwsContext {
#if HAVE_VIS
DECLARE_ALIGNED(8, uint64_t, sparc_coeffs)[10];
#endif
+ int use_mmx_vfilter;
/* function pointers for swScale() */
yuv2planar1_fn yuv2plane1;
@@ -427,14 +434,14 @@ typedef struct SwsContext {
yuv2packedX_fn yuv2packedX;
/// Unscaled conversion of luma plane to YV12 for horizontal scaler.
- void (*lumToYV12)(uint8_t *dst, const uint8_t *src,
+ void (*lumToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3,
int width, uint32_t *pal);
/// Unscaled conversion of alpha plane to YV12 for horizontal scaler.
- void (*alpToYV12)(uint8_t *dst, const uint8_t *src,
+ void (*alpToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3,
int width, uint32_t *pal);
/// Unscaled conversion of chroma planes to YV12 for horizontal scaler.
void (*chrToYV12)(uint8_t *dstU, uint8_t *dstV,
- const uint8_t *src1, const uint8_t *src2,
+ const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
int width, uint32_t *pal);
/**
@@ -539,14 +546,22 @@ SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c);
SwsFunc ff_yuv2rgb_get_func_ptr_bfin(SwsContext *c);
void ff_bfin_get_unscaled_swscale(SwsContext *c);
+#if FF_API_SWS_FORMAT_NAME
+/**
+ * @deprecated Use av_get_pix_fmt_name() instead.
+ */
+attribute_deprecated
const char *sws_format_name(enum PixelFormat format);
+#endif
#define is16BPS(x) \
(av_pix_fmt_descriptors[x].comp[0].depth_minus1 == 15)
#define is9_OR_10BPS(x) \
- (av_pix_fmt_descriptors[x].comp[0].depth_minus1 == 8 || \
- av_pix_fmt_descriptors[x].comp[0].depth_minus1 == 9)
+ (av_pix_fmt_descriptors[x].comp[0].depth_minus1 >= 8 && \
+ av_pix_fmt_descriptors[x].comp[0].depth_minus1 <= 13)
+
+#define isNBPS(x) is9_OR_10BPS(x)
#define isBE(x) \
(av_pix_fmt_descriptors[x].flags & PIX_FMT_BE)
@@ -561,7 +576,6 @@ const char *sws_format_name(enum PixelFormat format);
#define isRGB(x) \
(av_pix_fmt_descriptors[x].flags & PIX_FMT_RGB)
-
#if 0 // FIXME
#define isGray(x) \
(!(av_pix_fmt_descriptors[x].flags & PIX_FMT_PAL) && \
@@ -574,55 +588,95 @@ const char *sws_format_name(enum PixelFormat format);
(x) == PIX_FMT_GRAY16LE)
#endif
-#define isRGBinInt(x) \
- ((x) == PIX_FMT_RGB48BE || \
- (x) == PIX_FMT_RGB48LE || \
- (x) == PIX_FMT_RGB32 || \
- (x) == PIX_FMT_RGB32_1 || \
- (x) == PIX_FMT_RGB24 || \
- (x) == PIX_FMT_RGB565BE || \
- (x) == PIX_FMT_RGB565LE || \
- (x) == PIX_FMT_RGB555BE || \
- (x) == PIX_FMT_RGB555LE || \
- (x) == PIX_FMT_RGB444BE || \
- (x) == PIX_FMT_RGB444LE || \
- (x) == PIX_FMT_RGB8 || \
- (x) == PIX_FMT_RGB4 || \
- (x) == PIX_FMT_RGB4_BYTE || \
- (x) == PIX_FMT_MONOBLACK || \
- (x) == PIX_FMT_MONOWHITE)
-
-#define isBGRinInt(x) \
- ((x) == PIX_FMT_BGR48BE || \
- (x) == PIX_FMT_BGR48LE || \
- (x) == PIX_FMT_BGR32 || \
- (x) == PIX_FMT_BGR32_1 || \
- (x) == PIX_FMT_BGR24 || \
- (x) == PIX_FMT_BGR565BE || \
- (x) == PIX_FMT_BGR565LE || \
- (x) == PIX_FMT_BGR555BE || \
- (x) == PIX_FMT_BGR555LE || \
- (x) == PIX_FMT_BGR444BE || \
- (x) == PIX_FMT_BGR444LE || \
- (x) == PIX_FMT_BGR8 || \
- (x) == PIX_FMT_BGR4 || \
- (x) == PIX_FMT_BGR4_BYTE || \
- (x) == PIX_FMT_MONOBLACK || \
- (x) == PIX_FMT_MONOWHITE)
-
-#define isAnyRGB(x) \
- (isRGBinInt(x) || \
- isBGRinInt(x))
+#define isRGBinInt(x) \
+ ( \
+ (x)==PIX_FMT_RGB48BE || \
+ (x)==PIX_FMT_RGB48LE || \
+ (x)==PIX_FMT_RGBA64BE || \
+ (x)==PIX_FMT_RGBA64LE || \
+ (x)==PIX_FMT_RGB32 || \
+ (x)==PIX_FMT_RGB32_1 || \
+ (x)==PIX_FMT_RGB24 || \
+ (x)==PIX_FMT_RGB565BE || \
+ (x)==PIX_FMT_RGB565LE || \
+ (x)==PIX_FMT_RGB555BE || \
+ (x)==PIX_FMT_RGB555LE || \
+ (x)==PIX_FMT_RGB444BE || \
+ (x)==PIX_FMT_RGB444LE || \
+ (x)==PIX_FMT_RGB8 || \
+ (x)==PIX_FMT_RGB4 || \
+ (x)==PIX_FMT_RGB4_BYTE || \
+ (x)==PIX_FMT_MONOBLACK || \
+ (x)==PIX_FMT_MONOWHITE \
+ )
+#define isBGRinInt(x) \
+ ( \
+ (x)==PIX_FMT_BGR48BE || \
+ (x)==PIX_FMT_BGR48LE || \
+ (x)==PIX_FMT_BGRA64BE || \
+ (x)==PIX_FMT_BGRA64LE || \
+ (x)==PIX_FMT_BGR32 || \
+ (x)==PIX_FMT_BGR32_1 || \
+ (x)==PIX_FMT_BGR24 || \
+ (x)==PIX_FMT_BGR565BE || \
+ (x)==PIX_FMT_BGR565LE || \
+ (x)==PIX_FMT_BGR555BE || \
+ (x)==PIX_FMT_BGR555LE || \
+ (x)==PIX_FMT_BGR444BE || \
+ (x)==PIX_FMT_BGR444LE || \
+ (x)==PIX_FMT_BGR8 || \
+ (x)==PIX_FMT_BGR4 || \
+ (x)==PIX_FMT_BGR4_BYTE|| \
+ (x)==PIX_FMT_MONOBLACK|| \
+ (x)==PIX_FMT_MONOWHITE \
+ )
+
+#define isRGBinBytes(x) ( \
+ (x)==PIX_FMT_RGB48BE \
+ || (x)==PIX_FMT_RGB48LE \
+ || (x)==PIX_FMT_RGBA64BE \
+ || (x)==PIX_FMT_RGBA64LE \
+ || (x)==PIX_FMT_RGBA \
+ || (x)==PIX_FMT_ARGB \
+ || (x)==PIX_FMT_RGB24 \
+ )
+#define isBGRinBytes(x) ( \
+ (x)==PIX_FMT_BGR48BE \
+ || (x)==PIX_FMT_BGR48LE \
+ || (x)==PIX_FMT_BGRA64BE \
+ || (x)==PIX_FMT_BGRA64LE \
+ || (x)==PIX_FMT_BGRA \
+ || (x)==PIX_FMT_ABGR \
+ || (x)==PIX_FMT_BGR24 \
+ )
+
+#define isAnyRGB(x) \
+ ( \
+ isRGBinInt(x) || \
+ isBGRinInt(x) || \
+ (x)==PIX_FMT_GBR24P \
+ )
#define isALPHA(x) \
(av_pix_fmt_descriptors[x].nb_components == 2 || \
av_pix_fmt_descriptors[x].nb_components == 4)
+#if 1
+#define isPacked(x) ( \
+ (x)==PIX_FMT_PAL8 \
+ || (x)==PIX_FMT_YUYV422 \
+ || (x)==PIX_FMT_UYVY422 \
+ || (x)==PIX_FMT_Y400A \
+ || isRGBinInt(x) \
+ || isBGRinInt(x) \
+ )
+#else
#define isPacked(x) \
((av_pix_fmt_descriptors[x].nb_components >= 2 && \
!(av_pix_fmt_descriptors[x].flags & PIX_FMT_PLANAR)) || \
(x) == PIX_FMT_PAL8)
+#endif
#define isPlanar(x) \
(av_pix_fmt_descriptors[x].nb_components >= 2 && \
(av_pix_fmt_descriptors[x].flags & PIX_FMT_PLANAR))
@@ -641,6 +695,9 @@ const char *sws_format_name(enum PixelFormat format);
extern const uint64_t ff_dither4[2];
extern const uint64_t ff_dither8[2];
+extern const uint8_t dithers[8][8][8];
+extern const uint16_t dither_scale[15][16];
+
extern const AVClass sws_context_class;
diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index 595edf8ae1..9180f2eb5c 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -1,20 +1,20 @@
/*
- * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -23,7 +23,6 @@
#include <math.h>
#include <stdio.h>
#include "config.h"
-#include <assert.h>
#include "swscale.h"
#include "swscale_internal.h"
#include "rgb2rgb.h"
@@ -33,48 +32,7 @@
#include "libavutil/mathematics.h"
#include "libavutil/bswap.h"
#include "libavutil/pixdesc.h"
-
-DECLARE_ALIGNED(8, const uint8_t, dither_8x8_1)[8][8] = {
- { 0, 1, 0, 1, 0, 1, 0, 1,},
- { 1, 0, 1, 0, 1, 0, 1, 0,},
- { 0, 1, 0, 1, 0, 1, 0, 1,},
- { 1, 0, 1, 0, 1, 0, 1, 0,},
- { 0, 1, 0, 1, 0, 1, 0, 1,},
- { 1, 0, 1, 0, 1, 0, 1, 0,},
- { 0, 1, 0, 1, 0, 1, 0, 1,},
- { 1, 0, 1, 0, 1, 0, 1, 0,},
-};
-DECLARE_ALIGNED(8, const uint8_t, dither_8x8_3)[8][8] = {
- { 1, 2, 1, 2, 1, 2, 1, 2,},
- { 3, 0, 3, 0, 3, 0, 3, 0,},
- { 1, 2, 1, 2, 1, 2, 1, 2,},
- { 3, 0, 3, 0, 3, 0, 3, 0,},
- { 1, 2, 1, 2, 1, 2, 1, 2,},
- { 3, 0, 3, 0, 3, 0, 3, 0,},
- { 1, 2, 1, 2, 1, 2, 1, 2,},
- { 3, 0, 3, 0, 3, 0, 3, 0,},
-};
-DECLARE_ALIGNED(8, const uint8_t, dither_8x8_64)[8][8] = {
- { 18, 34, 30, 46, 17, 33, 29, 45,},
- { 50, 2, 62, 14, 49, 1, 61, 13,},
- { 26, 42, 22, 38, 25, 41, 21, 37,},
- { 58, 10, 54, 6, 57, 9, 53, 5,},
- { 16, 32, 28, 44, 19, 35, 31, 47,},
- { 48, 0, 60, 12, 51, 3, 63, 15,},
- { 24, 40, 20, 36, 27, 43, 23, 39,},
- { 56, 8, 52, 4, 59, 11, 55, 7,},
-};
-extern const uint8_t dither_8x8_128[8][8];
-DECLARE_ALIGNED(8, const uint8_t, dither_8x8_256)[8][8] = {
- { 72, 136, 120, 184, 68, 132, 116, 180,},
- { 200, 8, 248, 56, 196, 4, 244, 52,},
- { 104, 168, 88, 152, 100, 164, 84, 148,},
- { 232, 40, 216, 24, 228, 36, 212, 20,},
- { 64, 128, 102, 176, 76, 140, 124, 188,},
- { 192, 0, 240, 48, 204, 12, 252, 60,},
- { 96, 160, 80, 144, 108, 172, 92, 156,},
- { 224, 32, 208, 16, 236, 44, 220, 28,},
-};
+#include "libavutil/avassert.h"
#define RGB2YUV_SHIFT 15
#define BY ( (int) (0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
@@ -87,6 +45,102 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_256)[8][8] = {
#define RV ( (int) (0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
#define RU (-(int) (0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5))
+DECLARE_ALIGNED(8, const uint8_t, dithers)[8][8][8]={
+{
+ { 0, 1, 0, 1, 0, 1, 0, 1,},
+ { 1, 0, 1, 0, 1, 0, 1, 0,},
+ { 0, 1, 0, 1, 0, 1, 0, 1,},
+ { 1, 0, 1, 0, 1, 0, 1, 0,},
+ { 0, 1, 0, 1, 0, 1, 0, 1,},
+ { 1, 0, 1, 0, 1, 0, 1, 0,},
+ { 0, 1, 0, 1, 0, 1, 0, 1,},
+ { 1, 0, 1, 0, 1, 0, 1, 0,},
+},{
+ { 1, 2, 1, 2, 1, 2, 1, 2,},
+ { 3, 0, 3, 0, 3, 0, 3, 0,},
+ { 1, 2, 1, 2, 1, 2, 1, 2,},
+ { 3, 0, 3, 0, 3, 0, 3, 0,},
+ { 1, 2, 1, 2, 1, 2, 1, 2,},
+ { 3, 0, 3, 0, 3, 0, 3, 0,},
+ { 1, 2, 1, 2, 1, 2, 1, 2,},
+ { 3, 0, 3, 0, 3, 0, 3, 0,},
+},{
+ { 2, 4, 3, 5, 2, 4, 3, 5,},
+ { 6, 0, 7, 1, 6, 0, 7, 1,},
+ { 3, 5, 2, 4, 3, 5, 2, 4,},
+ { 7, 1, 6, 0, 7, 1, 6, 0,},
+ { 2, 4, 3, 5, 2, 4, 3, 5,},
+ { 6, 0, 7, 1, 6, 0, 7, 1,},
+ { 3, 5, 2, 4, 3, 5, 2, 4,},
+ { 7, 1, 6, 0, 7, 1, 6, 0,},
+},{
+ { 4, 8, 7, 11, 4, 8, 7, 11,},
+ { 12, 0, 15, 3, 12, 0, 15, 3,},
+ { 6, 10, 5, 9, 6, 10, 5, 9,},
+ { 14, 2, 13, 1, 14, 2, 13, 1,},
+ { 4, 8, 7, 11, 4, 8, 7, 11,},
+ { 12, 0, 15, 3, 12, 0, 15, 3,},
+ { 6, 10, 5, 9, 6, 10, 5, 9,},
+ { 14, 2, 13, 1, 14, 2, 13, 1,},
+},{
+ { 9, 17, 15, 23, 8, 16, 14, 22,},
+ { 25, 1, 31, 7, 24, 0, 30, 6,},
+ { 13, 21, 11, 19, 12, 20, 10, 18,},
+ { 29, 5, 27, 3, 28, 4, 26, 2,},
+ { 8, 16, 14, 22, 9, 17, 15, 23,},
+ { 24, 0, 30, 6, 25, 1, 31, 7,},
+ { 12, 20, 10, 18, 13, 21, 11, 19,},
+ { 28, 4, 26, 2, 29, 5, 27, 3,},
+},{
+ { 18, 34, 30, 46, 17, 33, 29, 45,},
+ { 50, 2, 62, 14, 49, 1, 61, 13,},
+ { 26, 42, 22, 38, 25, 41, 21, 37,},
+ { 58, 10, 54, 6, 57, 9, 53, 5,},
+ { 16, 32, 28, 44, 19, 35, 31, 47,},
+ { 48, 0, 60, 12, 51, 3, 63, 15,},
+ { 24, 40, 20, 36, 27, 43, 23, 39,},
+ { 56, 8, 52, 4, 59, 11, 55, 7,},
+},{
+ { 18, 34, 30, 46, 17, 33, 29, 45,},
+ { 50, 2, 62, 14, 49, 1, 61, 13,},
+ { 26, 42, 22, 38, 25, 41, 21, 37,},
+ { 58, 10, 54, 6, 57, 9, 53, 5,},
+ { 16, 32, 28, 44, 19, 35, 31, 47,},
+ { 48, 0, 60, 12, 51, 3, 63, 15,},
+ { 24, 40, 20, 36, 27, 43, 23, 39,},
+ { 56, 8, 52, 4, 59, 11, 55, 7,},
+},{
+ { 36, 68, 60, 92, 34, 66, 58, 90,},
+ { 100, 4,124, 28, 98, 2,122, 26,},
+ { 52, 84, 44, 76, 50, 82, 42, 74,},
+ { 116, 20,108, 12,114, 18,106, 10,},
+ { 32, 64, 56, 88, 38, 70, 62, 94,},
+ { 96, 0,120, 24,102, 6,126, 30,},
+ { 48, 80, 40, 72, 54, 86, 46, 78,},
+ { 112, 16,104, 8,118, 22,110, 14,},
+}};
+
+static const uint8_t flat64[8]={64,64,64,64,64,64,64,64};
+
+const uint16_t dither_scale[15][16]={
+{ 2, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,},
+{ 2, 3, 7, 7, 13, 13, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,},
+{ 3, 3, 4, 15, 15, 29, 57, 57, 57, 113, 113, 113, 113, 113, 113, 113,},
+{ 3, 4, 4, 5, 31, 31, 61, 121, 241, 241, 241, 241, 481, 481, 481, 481,},
+{ 3, 4, 5, 5, 6, 63, 63, 125, 249, 497, 993, 993, 993, 993, 993, 1985,},
+{ 3, 5, 6, 6, 6, 7, 127, 127, 253, 505, 1009, 2017, 4033, 4033, 4033, 4033,},
+{ 3, 5, 6, 7, 7, 7, 8, 255, 255, 509, 1017, 2033, 4065, 8129,16257,16257,},
+{ 3, 5, 6, 8, 8, 8, 8, 9, 511, 511, 1021, 2041, 4081, 8161,16321,32641,},
+{ 3, 5, 7, 8, 9, 9, 9, 9, 10, 1023, 1023, 2045, 4089, 8177,16353,32705,},
+{ 3, 5, 7, 8, 10, 10, 10, 10, 10, 11, 2047, 2047, 4093, 8185,16369,32737,},
+{ 3, 5, 7, 8, 10, 11, 11, 11, 11, 11, 12, 4095, 4095, 8189,16377,32753,},
+{ 3, 5, 7, 9, 10, 12, 12, 12, 12, 12, 12, 13, 8191, 8191,16381,32761,},
+{ 3, 5, 7, 9, 10, 12, 13, 13, 13, 13, 13, 13, 14,16383,16383,32765,},
+{ 3, 5, 7, 9, 10, 12, 14, 14, 14, 14, 14, 14, 14, 15,32767,32767,},
+{ 3, 5, 7, 9, 11, 12, 14, 15, 15, 15, 15, 15, 15, 15, 16,65535,},
+};
+
+
static void fillPlane(uint8_t *plane, int stride, int width, int height, int y,
uint8_t val)
{
@@ -98,6 +152,20 @@ static void fillPlane(uint8_t *plane, int stride, int width, int height, int y,
}
}
+static void fillPlane16(uint8_t *plane, int stride, int width, int height, int y,
+ int alpha, int bits)
+{
+ int i, j;
+ uint8_t *ptr = plane + stride * y;
+ int v = alpha ? -1 : (1<<bits);
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j++) {
+ AV_WN16(ptr+2*j, v);
+ }
+ ptr += stride;
+ }
+}
+
static void copyPlane(const uint8_t *src, int srcStride,
int srcSliceY, int srcSliceH, int width,
uint8_t *dst, int dstStride)
@@ -310,7 +378,7 @@ static int palToRgbWrapper(SwsContext *c, const uint8_t *src[], int srcStride[],
uint8_t *dstPtr = dst[0] + dstStride[0] * srcSliceY;
const uint8_t *srcPtr = src[0];
- if (srcFormat == PIX_FMT_Y400A) {
+ if (srcFormat == PIX_FMT_GRAY8A) {
switch (dstFormat) {
case PIX_FMT_RGB32 : conv = gray8aToPacked32; break;
case PIX_FMT_BGR32 : conv = gray8aToPacked32; break;
@@ -332,7 +400,7 @@ static int palToRgbWrapper(SwsContext *c, const uint8_t *src[], int srcStride[],
if (!conv)
av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
- sws_format_name(srcFormat), sws_format_name(dstFormat));
+ av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat));
else {
for (i = 0; i < srcSliceH; i++) {
conv(srcPtr, dstPtr, c->srcW, (uint8_t *) c->pal_rgb);
@@ -453,6 +521,20 @@ static int planarRgbToRgbWrapper(SwsContext *c, const uint8_t *src[],
|| (x) == PIX_FMT_ABGR \
)
+#define isRGBA64(x) ( \
+ (x) == PIX_FMT_RGBA64LE \
+ || (x) == PIX_FMT_RGBA64BE \
+ || (x) == PIX_FMT_BGRA64LE \
+ || (x) == PIX_FMT_BGRA64BE \
+ )
+
+#define isRGB48(x) ( \
+ (x) == PIX_FMT_RGB48LE \
+ || (x) == PIX_FMT_RGB48BE \
+ || (x) == PIX_FMT_BGR48LE \
+ || (x) == PIX_FMT_BGR48BE \
+ )
+
/* {RGB,BGR}{15,16,24,32,32_1} -> {RGB,BGR}{15,16,24,32} */
typedef void (* rgbConvFn) (const uint8_t *, uint8_t *, int);
static rgbConvFn findRgbConvFn(SwsContext *c)
@@ -467,10 +549,6 @@ static rgbConvFn findRgbConvFn(SwsContext *c)
(((bpp + 7) >> 3) == 2 && \
(!(av_pix_fmt_descriptors[fmt].flags & PIX_FMT_BE) != !HAVE_BIGENDIAN))
- /* if this is non-native rgb444/555/565, don't handle it here. */
- if (IS_NOT_NE(srcId, srcFormat) || IS_NOT_NE(dstId, dstFormat))
- return NULL;
-
#define CONV_IS(src, dst) (srcFormat == PIX_FMT_##src && dstFormat == PIX_FMT_##dst)
if (isRGBA32(srcFormat) && isRGBA32(dstFormat)) {
@@ -486,6 +564,32 @@ static rgbConvFn findRgbConvFn(SwsContext *c)
|| CONV_IS(RGBA, BGRA)) conv = shuffle_bytes_2103;
else if (CONV_IS(BGRA, ABGR)
|| CONV_IS(RGBA, ARGB)) conv = shuffle_bytes_3012;
+ } else if (isRGB48(srcFormat) && isRGB48(dstFormat)) {
+ if (CONV_IS(RGB48LE, BGR48LE)
+ || CONV_IS(BGR48LE, RGB48LE)
+ || CONV_IS(RGB48BE, BGR48BE)
+ || CONV_IS(BGR48BE, RGB48BE)) conv = rgb48tobgr48_nobswap;
+ else if (CONV_IS(RGB48LE, BGR48BE)
+ || CONV_IS(BGR48LE, RGB48BE)
+ || CONV_IS(RGB48BE, BGR48LE)
+ || CONV_IS(BGR48BE, RGB48LE)) conv = rgb48tobgr48_bswap;
+ } else if (isRGBA64(srcFormat) && isRGB48(dstFormat)) {
+ if (CONV_IS(RGBA64LE, BGR48LE)
+ || CONV_IS(BGRA64LE, RGB48LE)
+ || CONV_IS(RGBA64BE, BGR48BE)
+ || CONV_IS(BGRA64BE, RGB48BE)) conv = rgb64tobgr48_nobswap;
+ else if (CONV_IS(RGBA64LE, BGR48BE)
+ || CONV_IS(BGRA64LE, RGB48BE)
+ || CONV_IS(RGBA64BE, BGR48LE)
+ || CONV_IS(BGRA64BE, RGB48LE)) conv = rgb64tobgr48_bswap;
+ else if (CONV_IS(RGBA64LE, RGB48LE)
+ || CONV_IS(BGRA64LE, BGR48LE)
+ || CONV_IS(RGBA64BE, RGB48BE)
+ || CONV_IS(BGRA64BE, BGR48BE)) conv = rgb64to48_nobswap;
+ else if (CONV_IS(RGBA64LE, RGB48BE)
+ || CONV_IS(BGRA64LE, BGR48BE)
+ || CONV_IS(RGBA64BE, RGB48LE)
+ || CONV_IS(BGRA64BE, BGR48LE)) conv = rgb64to48_bswap;
} else
/* BGR -> BGR */
if ((isBGRinInt(srcFormat) && isBGRinInt(dstFormat)) ||
@@ -544,10 +648,13 @@ static int rgbToRgbWrapper(SwsContext *c, const uint8_t *src[], int srcStride[],
if (!conv) {
av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
- sws_format_name(srcFormat), sws_format_name(dstFormat));
+ av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat));
} else {
const uint8_t *srcPtr = src[0];
uint8_t *dstPtr = dst[0];
+ int src_bswap = IS_NOT_NE(c->srcFormatBpp, srcFormat);
+ int dst_bswap = IS_NOT_NE(c->dstFormatBpp, dstFormat);
+
if ((srcFormat == PIX_FMT_RGB32_1 || srcFormat == PIX_FMT_BGR32_1) &&
!isRGBA32(dstFormat))
srcPtr += ALT32_CORR;
@@ -557,15 +664,23 @@ static int rgbToRgbWrapper(SwsContext *c, const uint8_t *src[], int srcStride[],
dstPtr += ALT32_CORR;
if (dstStride[0] * srcBpp == srcStride[0] * dstBpp && srcStride[0] > 0 &&
- !(srcStride[0] % srcBpp))
+ !(srcStride[0] % srcBpp) && !dst_bswap && !src_bswap)
conv(srcPtr, dstPtr + dstStride[0] * srcSliceY,
srcSliceH * srcStride[0]);
else {
- int i;
+ int i, j;
dstPtr += dstStride[0] * srcSliceY;
for (i = 0; i < srcSliceH; i++) {
- conv(srcPtr, dstPtr, c->srcW * srcBpp);
+ if(src_bswap) {
+ for(j=0; j<c->srcW; j++)
+ ((uint16_t*)c->formatConvBuffer)[j] = av_bswap16(((uint16_t*)srcPtr)[j]);
+ conv(c->formatConvBuffer, dstPtr, c->srcW * srcBpp);
+ }else
+ conv(srcPtr, dstPtr, c->srcW * srcBpp);
+ if(dst_bswap)
+ for(j=0; j<c->srcW; j++)
+ ((uint16_t*)dstPtr)[j] = av_bswap16(((uint16_t*)dstPtr)[j]);
srcPtr += srcStride[0];
dstPtr += dstStride[0];
}
@@ -623,7 +738,7 @@ static int packedCopyWrapper(SwsContext *c, const uint8_t *src[],
while (length + c->srcW <= FFABS(dstStride[0]) &&
length + c->srcW <= FFABS(srcStride[0]))
length += c->srcW;
- assert(length != 0);
+ av_assert1(length != 0);
for (i = 0; i < srcSliceH; i++) {
memcpy(dstPtr, srcPtr, length);
@@ -634,25 +749,25 @@ static int packedCopyWrapper(SwsContext *c, const uint8_t *src[],
return srcSliceH;
}
-#define clip9(x) av_clip_uintp2(x, 9)
-#define clip10(x) av_clip_uintp2(x, 10)
-#define DITHER_COPY(dst, dstStride, wfunc, src, srcStride, rfunc, dithers, shift, clip) \
- for (i = 0; i < height; i++) { \
- const uint8_t *dither = dithers[i & 7]; \
- for (j = 0; j < length - 7; j += 8) { \
- wfunc(&dst[j + 0], clip((rfunc(&src[j + 0]) + dither[0]) >> shift)); \
- wfunc(&dst[j + 1], clip((rfunc(&src[j + 1]) + dither[1]) >> shift)); \
- wfunc(&dst[j + 2], clip((rfunc(&src[j + 2]) + dither[2]) >> shift)); \
- wfunc(&dst[j + 3], clip((rfunc(&src[j + 3]) + dither[3]) >> shift)); \
- wfunc(&dst[j + 4], clip((rfunc(&src[j + 4]) + dither[4]) >> shift)); \
- wfunc(&dst[j + 5], clip((rfunc(&src[j + 5]) + dither[5]) >> shift)); \
- wfunc(&dst[j + 6], clip((rfunc(&src[j + 6]) + dither[6]) >> shift)); \
- wfunc(&dst[j + 7], clip((rfunc(&src[j + 7]) + dither[7]) >> shift)); \
- } \
- for (; j < length; j++) \
- wfunc(&dst[j], (rfunc(&src[j]) + dither[j & 7]) >> shift); \
- dst += dstStride; \
- src += srcStride; \
+#define DITHER_COPY(dst, dstStride, src, srcStride, bswap, dbswap)\
+ uint16_t scale= dither_scale[dst_depth-1][src_depth-1];\
+ int shift= src_depth-dst_depth + dither_scale[src_depth-2][dst_depth-1];\
+ for (i = 0; i < height; i++) {\
+ const uint8_t *dither= dithers[src_depth-9][i&7];\
+ for (j = 0; j < length-7; j+=8){\
+ dst[j+0] = dbswap((bswap(src[j+0]) + dither[0])*scale>>shift);\
+ dst[j+1] = dbswap((bswap(src[j+1]) + dither[1])*scale>>shift);\
+ dst[j+2] = dbswap((bswap(src[j+2]) + dither[2])*scale>>shift);\
+ dst[j+3] = dbswap((bswap(src[j+3]) + dither[3])*scale>>shift);\
+ dst[j+4] = dbswap((bswap(src[j+4]) + dither[4])*scale>>shift);\
+ dst[j+5] = dbswap((bswap(src[j+5]) + dither[5])*scale>>shift);\
+ dst[j+6] = dbswap((bswap(src[j+6]) + dither[6])*scale>>shift);\
+ dst[j+7] = dbswap((bswap(src[j+7]) + dither[7])*scale>>shift);\
+ }\
+ for (; j < length; j++)\
+ dst[j] = dbswap((bswap(src[j]) + dither[j&7])*scale>>shift);\
+ dst += dstStride;\
+ src += srcStride;\
}
static int planarCopyWrapper(SwsContext *c, const uint8_t *src[],
@@ -666,162 +781,126 @@ static int planarCopyWrapper(SwsContext *c, const uint8_t *src[],
int height = (plane == 0 || plane == 3) ? srcSliceH: -((-srcSliceH) >> c->chrDstVSubSample);
const uint8_t *srcPtr = src[plane];
uint8_t *dstPtr = dst[plane] + dstStride[plane] * y;
+ int shiftonly= plane==1 || plane==2 || (!c->srcRange && plane==0);
if (!dst[plane])
continue;
// ignore palette for GRAY8
if (plane == 1 && !dst[2]) continue;
if (!src[plane] || (plane == 1 && !src[2])) {
- if (is16BPS(c->dstFormat))
- length *= 2;
- fillPlane(dst[plane], dstStride[plane], length, height, y,
- (plane == 3) ? 255 : 128);
+ if (is16BPS(c->dstFormat) || isNBPS(c->dstFormat)) {
+ fillPlane16(dst[plane], dstStride[plane], length, height, y,
+ plane == 3, av_pix_fmt_descriptors[c->dstFormat].comp[plane].depth_minus1);
+ } else {
+ fillPlane(dst[plane], dstStride[plane], length, height, y,
+ (plane == 3) ? 255 : 128);
+ }
} else {
- if (is9_OR_10BPS(c->srcFormat)) {
+ if(isNBPS(c->srcFormat) || isNBPS(c->dstFormat)
+ || (is16BPS(c->srcFormat) != is16BPS(c->dstFormat))
+ ) {
const int src_depth = av_pix_fmt_descriptors[c->srcFormat].comp[plane].depth_minus1 + 1;
const int dst_depth = av_pix_fmt_descriptors[c->dstFormat].comp[plane].depth_minus1 + 1;
const uint16_t *srcPtr2 = (const uint16_t *) srcPtr;
+ uint16_t *dstPtr2 = (uint16_t*)dstPtr;
- if (is16BPS(c->dstFormat)) {
- uint16_t *dstPtr2 = (uint16_t *) dstPtr;
-#define COPY9_OR_10TO16(rfunc, wfunc) \
- for (i = 0; i < height; i++) { \
- for (j = 0; j < length; j++) { \
- int srcpx = rfunc(&srcPtr2[j]); \
- wfunc(&dstPtr2[j], (srcpx << (16 - src_depth)) | (srcpx >> (2 * src_depth - 16))); \
- } \
- dstPtr2 += dstStride[plane] / 2; \
- srcPtr2 += srcStride[plane] / 2; \
+ if (dst_depth == 8) {
+ if(isBE(c->srcFormat) == HAVE_BIGENDIAN){
+ DITHER_COPY(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, , )
+ } else {
+ DITHER_COPY(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, av_bswap16, )
}
- if (isBE(c->dstFormat)) {
- if (isBE(c->srcFormat)) {
- COPY9_OR_10TO16(AV_RB16, AV_WB16);
- } else {
- COPY9_OR_10TO16(AV_RL16, AV_WB16);
+ } else if (src_depth == 8) {
+ for (i = 0; i < height; i++) {
+ #define COPY816(w)\
+ if(shiftonly){\
+ for (j = 0; j < length; j++)\
+ w(&dstPtr2[j], srcPtr[j]<<(dst_depth-8));\
+ }else{\
+ for (j = 0; j < length; j++)\
+ w(&dstPtr2[j], (srcPtr[j]<<(dst_depth-8)) |\
+ (srcPtr[j]>>(2*8-dst_depth)));\
}
- } else {
- if (isBE(c->srcFormat)) {
- COPY9_OR_10TO16(AV_RB16, AV_WL16);
+ if(isBE(c->dstFormat)){
+ COPY816(AV_WB16)
} else {
- COPY9_OR_10TO16(AV_RL16, AV_WL16);
+ COPY816(AV_WL16)
}
+ dstPtr2 += dstStride[plane]/2;
+ srcPtr += srcStride[plane];
}
- } else if (is9_OR_10BPS(c->dstFormat)) {
- uint16_t *dstPtr2 = (uint16_t *) dstPtr;
-#define COPY9_OR_10TO9_OR_10(loop) \
- for (i = 0; i < height; i++) { \
- for (j = 0; j < length; j++) { \
- loop; \
- } \
- dstPtr2 += dstStride[plane] / 2; \
- srcPtr2 += srcStride[plane] / 2; \
- }
-#define COPY9_OR_10TO9_OR_10_2(rfunc, wfunc) \
- if (dst_depth > src_depth) { \
- COPY9_OR_10TO9_OR_10(int srcpx = rfunc(&srcPtr2[j]); \
- wfunc(&dstPtr2[j], (srcpx << 1) | (srcpx >> 9))); \
- } else if (dst_depth < src_depth) { \
- DITHER_COPY(dstPtr2, dstStride[plane] / 2, wfunc, \
- srcPtr2, srcStride[plane] / 2, rfunc, \
- dither_8x8_1, 1, clip9); \
- } else { \
- COPY9_OR_10TO9_OR_10(wfunc(&dstPtr2[j], rfunc(&srcPtr2[j]))); \
- }
- if (isBE(c->dstFormat)) {
- if (isBE(c->srcFormat)) {
- COPY9_OR_10TO9_OR_10_2(AV_RB16, AV_WB16);
- } else {
- COPY9_OR_10TO9_OR_10_2(AV_RL16, AV_WB16);
+ } else if (src_depth <= dst_depth) {
+ int orig_length = length;
+ for (i = 0; i < height; i++) {
+ if(isBE(c->srcFormat) == HAVE_BIGENDIAN &&
+ isBE(c->dstFormat) == HAVE_BIGENDIAN &&
+ shiftonly) {
+ unsigned shift = dst_depth - src_depth;
+ length = orig_length;
+#if HAVE_FAST_64BIT
+#define FAST_COPY_UP(shift) \
+ for (j = 0; j < length - 3; j += 4) { \
+ uint64_t v = AV_RN64A(srcPtr2 + j); \
+ AV_WN64A(dstPtr2 + j, v << shift); \
+ } \
+ length &= 3;
+#else
+#define FAST_COPY_UP(shift) \
+ for (j = 0; j < length - 1; j += 2) { \
+ uint32_t v = AV_RN32A(srcPtr2 + j); \
+ AV_WN32A(dstPtr2 + j, v << shift); \
+ } \
+ length &= 1;
+#endif
+ switch (shift)
+ {
+ case 6: FAST_COPY_UP(6); break;
+ case 7: FAST_COPY_UP(7); break;
+ }
}
- } else {
- if (isBE(c->srcFormat)) {
- COPY9_OR_10TO9_OR_10_2(AV_RB16, AV_WL16);
+#define COPY_UP(r,w) \
+ if(shiftonly){\
+ for (j = 0; j < length; j++){ \
+ unsigned int v= r(&srcPtr2[j]);\
+ w(&dstPtr2[j], v<<(dst_depth-src_depth));\
+ }\
+ }else{\
+ for (j = 0; j < length; j++){ \
+ unsigned int v= r(&srcPtr2[j]);\
+ w(&dstPtr2[j], (v<<(dst_depth-src_depth)) | \
+ (v>>(2*src_depth-dst_depth)));\
+ }\
+ }
+ if(isBE(c->srcFormat)){
+ if(isBE(c->dstFormat)){
+ COPY_UP(AV_RB16, AV_WB16)
+ } else {
+ COPY_UP(AV_RB16, AV_WL16)
+ }
} else {
- COPY9_OR_10TO9_OR_10_2(AV_RL16, AV_WL16);
+ if(isBE(c->dstFormat)){
+ COPY_UP(AV_RL16, AV_WB16)
+ } else {
+ COPY_UP(AV_RL16, AV_WL16)
+ }
}
+ dstPtr2 += dstStride[plane]/2;
+ srcPtr2 += srcStride[plane]/2;
}
} else {
-#define W8(a, b) { *(a) = (b); }
-#define COPY9_OR_10TO8(rfunc) \
- if (src_depth == 9) { \
- DITHER_COPY(dstPtr, dstStride[plane], W8, \
- srcPtr2, srcStride[plane] / 2, rfunc, \
- dither_8x8_1, 1, av_clip_uint8); \
- } else { \
- DITHER_COPY(dstPtr, dstStride[plane], W8, \
- srcPtr2, srcStride[plane] / 2, rfunc, \
- dither_8x8_3, 2, av_clip_uint8); \
- }
- if (isBE(c->srcFormat)) {
- COPY9_OR_10TO8(AV_RB16);
- } else {
- COPY9_OR_10TO8(AV_RL16);
- }
- }
- } else if (is9_OR_10BPS(c->dstFormat)) {
- const int dst_depth = av_pix_fmt_descriptors[c->dstFormat].comp[plane].depth_minus1 + 1;
- uint16_t *dstPtr2 = (uint16_t *) dstPtr;
-
- if (is16BPS(c->srcFormat)) {
- const uint16_t *srcPtr2 = (const uint16_t *) srcPtr;
-#define COPY16TO9_OR_10(rfunc, wfunc) \
- if (dst_depth == 9) { \
- DITHER_COPY(dstPtr2, dstStride[plane] / 2, wfunc, \
- srcPtr2, srcStride[plane] / 2, rfunc, \
- dither_8x8_128, 7, clip9); \
- } else { \
- DITHER_COPY(dstPtr2, dstStride[plane] / 2, wfunc, \
- srcPtr2, srcStride[plane] / 2, rfunc, \
- dither_8x8_64, 6, clip10); \
- }
- if (isBE(c->dstFormat)) {
- if (isBE(c->srcFormat)) {
- COPY16TO9_OR_10(AV_RB16, AV_WB16);
+ if(isBE(c->srcFormat) == HAVE_BIGENDIAN){
+ if(isBE(c->dstFormat) == HAVE_BIGENDIAN){
+ DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , )
} else {
- COPY16TO9_OR_10(AV_RL16, AV_WB16);
+ DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , av_bswap16)
}
- } else {
- if (isBE(c->srcFormat)) {
- COPY16TO9_OR_10(AV_RB16, AV_WL16);
+ }else{
+ if(isBE(c->dstFormat) == HAVE_BIGENDIAN){
+ DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, )
} else {
- COPY16TO9_OR_10(AV_RL16, AV_WL16);
+ DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, av_bswap16)
}
}
- } else /* 8bit */ {
-#define COPY8TO9_OR_10(wfunc) \
- for (i = 0; i < height; i++) { \
- for (j = 0; j < length; j++) { \
- const int srcpx = srcPtr[j]; \
- wfunc(&dstPtr2[j], (srcpx << (dst_depth - 8)) | (srcpx >> (16 - dst_depth))); \
- } \
- dstPtr2 += dstStride[plane] / 2; \
- srcPtr += srcStride[plane]; \
- }
- if (isBE(c->dstFormat)) {
- COPY8TO9_OR_10(AV_WB16);
- } else {
- COPY8TO9_OR_10(AV_WL16);
- }
- }
- } else if (is16BPS(c->srcFormat) && !is16BPS(c->dstFormat)) {
- const uint16_t *srcPtr2 = (const uint16_t *) srcPtr;
-#define COPY16TO8(rfunc) \
- DITHER_COPY(dstPtr, dstStride[plane], W8, \
- srcPtr2, srcStride[plane] / 2, rfunc, \
- dither_8x8_256, 8, av_clip_uint8);
- if (isBE(c->srcFormat)) {
- COPY16TO8(AV_RB16);
- } else {
- COPY16TO8(AV_RL16);
- }
- } else if (!is16BPS(c->srcFormat) && is16BPS(c->dstFormat)) {
- for (i = 0; i < height; i++) {
- for (j = 0; j < length; j++) {
- dstPtr[ j << 1 ] = srcPtr[j];
- dstPtr[(j << 1) + 1] = srcPtr[j];
- }
- srcPtr += srcStride[plane];
- dstPtr += dstStride[plane];
}
} else if (is16BPS(c->srcFormat) && is16BPS(c->dstFormat) &&
isBE(c->srcFormat) != isBE(c->dstFormat)) {
@@ -899,28 +978,32 @@ void ff_get_unscaled_swscale(SwsContext *c)
&& (!needsDither || (c->flags&(SWS_FAST_BILINEAR|SWS_POINT))))
c->swScale= rgbToRgbWrapper;
- if (isPlanarRGB(srcFormat) && isPackedRGB(dstFormat))
+#define isByteRGB(f) (\
+ f == PIX_FMT_RGB32 ||\
+ f == PIX_FMT_RGB32_1 ||\
+ f == PIX_FMT_RGB24 ||\
+ f == PIX_FMT_BGR32 ||\
+ f == PIX_FMT_BGR32_1 ||\
+ f == PIX_FMT_BGR24)
+
+ if (isAnyRGB(srcFormat) && isPlanar(srcFormat) && isByteRGB(dstFormat))
c->swScale = planarRgbToRgbWrapper;
/* bswap 16 bits per pixel/component packed formats */
if (IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, PIX_FMT_BGR444) ||
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, PIX_FMT_BGR48) ||
+ IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, PIX_FMT_BGRA64) ||
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, PIX_FMT_BGR555) ||
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, PIX_FMT_BGR565) ||
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, PIX_FMT_GRAY16) ||
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, PIX_FMT_RGB444) ||
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, PIX_FMT_RGB48) ||
+ IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, PIX_FMT_RGBA64) ||
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, PIX_FMT_RGB555) ||
IS_DIFFERENT_ENDIANESS(srcFormat, dstFormat, PIX_FMT_RGB565))
c->swScale = packed_16bpc_bswap;
- if ((usePal(srcFormat) && (
- dstFormat == PIX_FMT_RGB32 ||
- dstFormat == PIX_FMT_RGB32_1 ||
- dstFormat == PIX_FMT_RGB24 ||
- dstFormat == PIX_FMT_BGR32 ||
- dstFormat == PIX_FMT_BGR32_1 ||
- dstFormat == PIX_FMT_BGR24)))
+ if (usePal(srcFormat) && isByteRGB(dstFormat))
c->swScale = palToRgbWrapper;
if (srcFormat == PIX_FMT_YUV422P) {
@@ -951,13 +1034,14 @@ void ff_get_unscaled_swscale(SwsContext *c)
if (srcFormat == PIX_FMT_UYVY422 && dstFormat == PIX_FMT_YUV422P)
c->swScale = uyvyToYuv422Wrapper;
+#define isPlanarGray(x) (isGray(x) && (x) != PIX_FMT_GRAY8A)
/* simple copy */
if ( srcFormat == dstFormat ||
(srcFormat == PIX_FMT_YUVA420P && dstFormat == PIX_FMT_YUV420P) ||
(srcFormat == PIX_FMT_YUV420P && dstFormat == PIX_FMT_YUVA420P) ||
- (isPlanarYUV(srcFormat) && isGray(dstFormat)) ||
- (isPlanarYUV(dstFormat) && isGray(srcFormat)) ||
- (isGray(dstFormat) && isGray(srcFormat)) ||
+ (isPlanarYUV(srcFormat) && isPlanarGray(dstFormat)) ||
+ (isPlanarYUV(dstFormat) && isPlanarGray(srcFormat)) ||
+ (isPlanarGray(dstFormat) && isPlanarGray(srcFormat)) ||
(isPlanarYUV(srcFormat) && isPlanarYUV(dstFormat) &&
c->chrDstHSubSample == c->chrSrcHSubSample &&
c->chrDstVSubSample == c->chrSrcVSubSample &&
@@ -988,7 +1072,7 @@ static void reset_ptr(const uint8_t *src[], int format)
}
}
-static int check_image_pointers(uint8_t *data[4], enum PixelFormat pix_fmt,
+static int check_image_pointers(const uint8_t * const data[4], enum PixelFormat pix_fmt,
const int linesizes[4])
{
const AVPixFmtDescriptor *desc = &av_pix_fmt_descriptors[pix_fmt];
@@ -1013,9 +1097,10 @@ int attribute_align_arg sws_scale(struct SwsContext *c,
int srcSliceH, uint8_t *const dst[],
const int dstStride[])
{
- int i;
+ int i, ret;
const uint8_t *src2[4] = { srcSlice[0], srcSlice[1], srcSlice[2], srcSlice[3] };
uint8_t *dst2[4] = { dst[0], dst[1], dst[2], dst[3] };
+ uint8_t *rgb0_tmp = NULL;
// do not mess up sliceDir if we have a "trailing" 0-size slice
if (srcSliceH == 0)
@@ -1025,7 +1110,7 @@ int attribute_align_arg sws_scale(struct SwsContext *c,
av_log(c, AV_LOG_ERROR, "bad src image pointers\n");
return 0;
}
- if (!check_image_pointers(dst, c->dstFormat, dstStride)) {
+ if (!check_image_pointers((const uint8_t* const*)dst, c->dstFormat, dstStride)) {
av_log(c, AV_LOG_ERROR, "bad dst image pointers\n");
return 0;
}
@@ -1040,9 +1125,10 @@ int attribute_align_arg sws_scale(struct SwsContext *c,
if (usePal(c->srcFormat)) {
for (i = 0; i < 256; i++) {
- int p, r, g, b, y, u, v;
+ int p, r, g, b, y, u, v, a = 0xff;
if (c->srcFormat == PIX_FMT_PAL8) {
p = ((const uint32_t *)(srcSlice[1]))[i];
+ a = (p >> 24) & 0xFF;
r = (p >> 16) & 0xFF;
g = (p >> 8) & 0xFF;
b = p & 0xFF;
@@ -1058,11 +1144,10 @@ int attribute_align_arg sws_scale(struct SwsContext *c,
r = ( i >> 3 ) * 255;
g = ((i >> 1) & 3) * 85;
b = ( i & 1) * 255;
- } else if (c->srcFormat == PIX_FMT_GRAY8 ||
- c->srcFormat == PIX_FMT_Y400A) {
+ } else if (c->srcFormat == PIX_FMT_GRAY8 || c->srcFormat == PIX_FMT_GRAY8A) {
r = g = b = i;
} else {
- assert(c->srcFormat == PIX_FMT_BGR4_BYTE);
+ av_assert1(c->srcFormat == PIX_FMT_BGR4_BYTE);
b = ( i >> 3 ) * 255;
g = ((i >> 1) & 3) * 85;
r = ( i & 1) * 255;
@@ -1070,37 +1155,51 @@ int attribute_align_arg sws_scale(struct SwsContext *c,
y = av_clip_uint8((RY * r + GY * g + BY * b + ( 33 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
u = av_clip_uint8((RU * r + GU * g + BU * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
v = av_clip_uint8((RV * r + GV * g + BV * b + (257 << (RGB2YUV_SHIFT - 1))) >> RGB2YUV_SHIFT);
- c->pal_yuv[i] = y + (u << 8) + (v << 16);
+ c->pal_yuv[i]= y + (u<<8) + (v<<16) + (a<<24);
switch (c->dstFormat) {
case PIX_FMT_BGR32:
#if !HAVE_BIGENDIAN
case PIX_FMT_RGB24:
#endif
- c->pal_rgb[i] = r + (g << 8) + (b << 16);
+ c->pal_rgb[i]= r + (g<<8) + (b<<16) + (a<<24);
break;
case PIX_FMT_BGR32_1:
#if HAVE_BIGENDIAN
case PIX_FMT_BGR24:
#endif
- c->pal_rgb[i] = (r + (g << 8) + (b << 16)) << 8;
+ c->pal_rgb[i]= a + (r<<8) + (g<<16) + (b<<24);
break;
case PIX_FMT_RGB32_1:
#if HAVE_BIGENDIAN
case PIX_FMT_RGB24:
#endif
- c->pal_rgb[i] = (b + (g << 8) + (r << 16)) << 8;
+ c->pal_rgb[i]= a + (b<<8) + (g<<16) + (r<<24);
break;
case PIX_FMT_RGB32:
#if !HAVE_BIGENDIAN
case PIX_FMT_BGR24:
#endif
default:
- c->pal_rgb[i] = b + (g << 8) + (r << 16);
+ c->pal_rgb[i]= b + (g<<8) + (r<<16) + (a<<24);
}
}
}
+ if (c->src0Alpha && !c->dst0Alpha && isALPHA(c->dstFormat)) {
+ uint8_t *base;
+ int x,y;
+ rgb0_tmp = av_malloc(FFABS(srcStride[0]) * srcSliceH + 32);
+ base = srcStride[0] < 0 ? rgb0_tmp - srcStride[0] * (srcSliceH-1) : rgb0_tmp;
+ for (y=0; y<srcSliceH; y++){
+ memcpy(base + srcStride[0]*y, src2[0] + srcStride[0]*y, 4*c->srcW);
+ for (x=c->src0Alpha-1; x<4*c->srcW; x+=4) {
+ base[ srcStride[0]*y + x] = 0xFF;
+ }
+ }
+ src2[0] = base;
+ }
+
// copy strides, so they can safely be modified
if (c->sliceDir == 1) {
// slices go from top to bottom
@@ -1110,13 +1209,13 @@ int attribute_align_arg sws_scale(struct SwsContext *c,
dstStride[3] };
reset_ptr(src2, c->srcFormat);
- reset_ptr((const uint8_t **) dst2, c->dstFormat);
+ reset_ptr((void*)dst2, c->dstFormat);
/* reset slice direction at end of frame */
if (srcSliceY + srcSliceH == c->srcH)
c->sliceDir = 0;
- return c->swScale(c, src2, srcStride2, srcSliceY, srcSliceH, dst2,
+ ret = c->swScale(c, src2, srcStride2, srcSliceY, srcSliceH, dst2,
dstStride2);
} else {
// slices go from bottom to top => we flip the image internally
@@ -1136,15 +1235,18 @@ int attribute_align_arg sws_scale(struct SwsContext *c,
dst2[3] += ( c->dstH - 1) * dstStride[3];
reset_ptr(src2, c->srcFormat);
- reset_ptr((const uint8_t **) dst2, c->dstFormat);
+ reset_ptr((void*)dst2, c->dstFormat);
/* reset slice direction at end of frame */
if (!srcSliceY)
c->sliceDir = 0;
- return c->swScale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH,
+ ret = c->swScale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH,
srcSliceH, dst2, dstStride2);
}
+
+ av_free(rgb0_tmp);
+ return ret;
}
/* Convert the palette to the same packed 32-bit format as the palette */
diff --git a/libswscale/utils.c b/libswscale/utils.c
index f890b5cee1..1e3b718232 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -1,27 +1,27 @@
/*
* Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#define _SVID_SOURCE // needed for MAP_ANONYMOUS
-#include <assert.h>
+#define _DARWIN_C_SOURCE // needed for MAP_ANON
#include <inttypes.h>
#include <math.h>
#include <stdio.h>
@@ -38,6 +38,7 @@
#endif
#include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
#include "libavutil/avutil.h"
#include "libavutil/bswap.h"
#include "libavutil/cpu.h"
@@ -52,18 +53,19 @@
unsigned swscale_version(void)
{
+ av_assert0(LIBSWSCALE_VERSION_MICRO >= 100);
return LIBSWSCALE_VERSION_INT;
}
const char *swscale_configuration(void)
{
- return LIBAV_CONFIGURATION;
+ return FFMPEG_CONFIGURATION;
}
const char *swscale_license(void)
{
#define LICENSE_PREFIX "libswscale license: "
- return LICENSE_PREFIX LIBAV_LICENSE + sizeof(LICENSE_PREFIX) - 1;
+ return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
}
#define RET 0xC3 // near return opcode for x86
@@ -102,13 +104,21 @@ static const FormatEntry format_entries[PIX_FMT_NB] = {
[PIX_FMT_RGBA] = { 1, 1 },
[PIX_FMT_ABGR] = { 1, 1 },
[PIX_FMT_BGRA] = { 1, 1 },
+ [PIX_FMT_0RGB] = { 1, 1 },
+ [PIX_FMT_RGB0] = { 1, 1 },
+ [PIX_FMT_0BGR] = { 1, 1 },
+ [PIX_FMT_BGR0] = { 1, 1 },
[PIX_FMT_GRAY16BE] = { 1, 1 },
[PIX_FMT_GRAY16LE] = { 1, 1 },
[PIX_FMT_YUV440P] = { 1, 1 },
[PIX_FMT_YUVJ440P] = { 1, 1 },
[PIX_FMT_YUVA420P] = { 1, 1 },
+ [PIX_FMT_YUVA422P] = { 1, 1 },
+ [PIX_FMT_YUVA444P] = { 1, 1 },
[PIX_FMT_RGB48BE] = { 1, 1 },
[PIX_FMT_RGB48LE] = { 1, 1 },
+ [PIX_FMT_RGBA64BE] = { 1, 0 },
+ [PIX_FMT_RGBA64LE] = { 1, 0 },
[PIX_FMT_RGB565BE] = { 1, 1 },
[PIX_FMT_RGB565LE] = { 1, 1 },
[PIX_FMT_RGB555BE] = { 1, 1 },
@@ -130,23 +140,41 @@ static const FormatEntry format_entries[PIX_FMT_NB] = {
[PIX_FMT_Y400A] = { 1, 0 },
[PIX_FMT_BGR48BE] = { 1, 1 },
[PIX_FMT_BGR48LE] = { 1, 1 },
+ [PIX_FMT_BGRA64BE] = { 0, 0 },
+ [PIX_FMT_BGRA64LE] = { 0, 0 },
[PIX_FMT_YUV420P9BE] = { 1, 1 },
[PIX_FMT_YUV420P9LE] = { 1, 1 },
[PIX_FMT_YUV420P10BE] = { 1, 1 },
[PIX_FMT_YUV420P10LE] = { 1, 1 },
+ [PIX_FMT_YUV420P12BE] = { 1, 1 },
+ [PIX_FMT_YUV420P12LE] = { 1, 1 },
+ [PIX_FMT_YUV420P14BE] = { 1, 1 },
+ [PIX_FMT_YUV420P14LE] = { 1, 1 },
[PIX_FMT_YUV422P9BE] = { 1, 1 },
[PIX_FMT_YUV422P9LE] = { 1, 1 },
[PIX_FMT_YUV422P10BE] = { 1, 1 },
[PIX_FMT_YUV422P10LE] = { 1, 1 },
+ [PIX_FMT_YUV422P12BE] = { 1, 1 },
+ [PIX_FMT_YUV422P12LE] = { 1, 1 },
+ [PIX_FMT_YUV422P14BE] = { 1, 1 },
+ [PIX_FMT_YUV422P14LE] = { 1, 1 },
[PIX_FMT_YUV444P9BE] = { 1, 1 },
[PIX_FMT_YUV444P9LE] = { 1, 1 },
[PIX_FMT_YUV444P10BE] = { 1, 1 },
[PIX_FMT_YUV444P10LE] = { 1, 1 },
+ [PIX_FMT_YUV444P12BE] = { 1, 1 },
+ [PIX_FMT_YUV444P12LE] = { 1, 1 },
+ [PIX_FMT_YUV444P14BE] = { 1, 1 },
+ [PIX_FMT_YUV444P14LE] = { 1, 1 },
[PIX_FMT_GBRP] = { 1, 0 },
[PIX_FMT_GBRP9LE] = { 1, 0 },
[PIX_FMT_GBRP9BE] = { 1, 0 },
[PIX_FMT_GBRP10LE] = { 1, 0 },
[PIX_FMT_GBRP10BE] = { 1, 0 },
+ [PIX_FMT_GBRP12LE] = { 1, 0 },
+ [PIX_FMT_GBRP12BE] = { 1, 0 },
+ [PIX_FMT_GBRP14LE] = { 1, 0 },
+ [PIX_FMT_GBRP14BE] = { 1, 0 },
[PIX_FMT_GBRP16LE] = { 1, 0 },
[PIX_FMT_GBRP16BE] = { 1, 0 },
};
@@ -165,13 +193,12 @@ int sws_isSupportedOutput(enum PixelFormat pix_fmt)
extern const int32_t ff_yuv2rgb_coeffs[8][4];
+#if FF_API_SWS_FORMAT_NAME
const char *sws_format_name(enum PixelFormat format)
{
- if ((unsigned)format < PIX_FMT_NB && av_pix_fmt_descriptors[format].name)
- return av_pix_fmt_descriptors[format].name;
- else
- return "Unknown format";
+ return av_get_pix_fmt_name(format);
}
+#endif
static double getSplineCoeff(double a, double b, double c, double d,
double dist)
@@ -190,7 +217,7 @@ static int initFilter(int16_t **outFilter, int32_t **filterPos,
int *outFilterSize, int xInc, int srcW, int dstW,
int filterAlign, int one, int flags, int cpu_flags,
SwsVector *srcFilter, SwsVector *dstFilter,
- double param[2], int is_horizontal)
+ double param[2])
{
int i;
int filterSize;
@@ -218,7 +245,7 @@ static int initFilter(int16_t **outFilter, int32_t **filterPos,
}
} else if (flags & SWS_POINT) { // lame looking point sampling mode
int i;
- int xDstInSrc;
+ int64_t xDstInSrc;
filterSize = 1;
FF_ALLOC_OR_GOTO(NULL, filter,
dstW * sizeof(*filter) * filterSize, fail);
@@ -234,7 +261,7 @@ static int initFilter(int16_t **outFilter, int32_t **filterPos,
} else if ((xInc <= (1 << 16) && (flags & SWS_AREA)) ||
(flags & SWS_FAST_BILINEAR)) { // bilinear upscale
int i;
- int xDstInSrc;
+ int64_t xDstInSrc;
filterSize = 2;
FF_ALLOC_OR_GOTO(NULL, filter,
dstW * sizeof(*filter) * filterSize, fail);
@@ -247,8 +274,7 @@ static int initFilter(int16_t **outFilter, int32_t **filterPos,
(*filterPos)[i] = xx;
// bilinear upscale / linear interpolate / area averaging
for (j = 0; j < filterSize; j++) {
- int64_t coeff = fone - FFABS((xx << 16) - xDstInSrc) *
- (fone >> 16);
+ int64_t coeff= fone - FFABS(((int64_t)xx<<16) - xDstInSrc)*(fone>>16);
if (coeff < 0)
coeff = 0;
filter[i * filterSize + j] = coeff;
@@ -277,8 +303,7 @@ static int initFilter(int16_t **outFilter, int32_t **filterPos,
else if (flags & SWS_BILINEAR)
sizeFactor = 2;
else {
- sizeFactor = 0; // GCC warning killer
- assert(0);
+ av_assert0(0);
}
if (xInc <= 1 << 16)
@@ -377,8 +402,7 @@ static int initFilter(int16_t **outFilter, int32_t **filterPos,
double p = -2.196152422706632;
coeff = getSplineCoeff(1.0, 0.0, p, -p - 1.0, floatd) * fone;
} else {
- coeff = 0.0; // GCC warning killer
- assert(0);
+ av_assert0(0);
}
filter[i * filterSize + j] = coeff;
@@ -391,13 +415,13 @@ static int initFilter(int16_t **outFilter, int32_t **filterPos,
/* apply src & dst Filter to filter -> filter2
* av_free(filter);
*/
- assert(filterSize > 0);
+ av_assert0(filterSize > 0);
filter2Size = filterSize;
if (srcFilter)
filter2Size += srcFilter->length - 1;
if (dstFilter)
filter2Size += dstFilter->length - 1;
- assert(filter2Size > 0);
+ av_assert0(filter2Size > 0);
FF_ALLOCZ_OR_GOTO(NULL, filter2, filter2Size * dstW * sizeof(*filter2), fail);
for (i = 0; i < dstW; i++) {
@@ -479,9 +503,9 @@ static int initFilter(int16_t **outFilter, int32_t **filterPos,
filterAlign = 1;
}
- assert(minFilterSize > 0);
+ av_assert0(minFilterSize > 0);
filterSize = (minFilterSize + (filterAlign - 1)) & (~(filterAlign - 1));
- assert(filterSize > 0);
+ av_assert0(filterSize > 0);
filter = av_malloc(filterSize * dstW * sizeof(*filter));
if (filterSize >= MAX_FILTER_SIZE * 16 /
((flags & SWS_ACCURATE_RND) ? APCK_SIZE : 16) || !filter)
@@ -509,29 +533,27 @@ static int initFilter(int16_t **outFilter, int32_t **filterPos,
// FIXME try to align filterPos if possible
// fix borders
- if (is_horizontal) {
- for (i = 0; i < dstW; i++) {
- int j;
- if ((*filterPos)[i] < 0) {
- // move filter coefficients left to compensate for filterPos
- for (j = 1; j < filterSize; j++) {
- int left = FFMAX(j + (*filterPos)[i], 0);
- filter[i * filterSize + left] += filter[i * filterSize + j];
- filter[i * filterSize + j] = 0;
- }
- (*filterPos)[i] = 0;
+ for (i = 0; i < dstW; i++) {
+ int j;
+ if ((*filterPos)[i] < 0) {
+ // move filter coefficients left to compensate for filterPos
+ for (j = 1; j < filterSize; j++) {
+ int left = FFMAX(j + (*filterPos)[i], 0);
+ filter[i * filterSize + left] += filter[i * filterSize + j];
+ filter[i * filterSize + j] = 0;
}
+ (*filterPos)[i]= 0;
+ }
- if ((*filterPos)[i] + filterSize > srcW) {
- int shift = (*filterPos)[i] + filterSize - srcW;
- // move filter coefficients right to compensate for filterPos
- for (j = filterSize - 2; j >= 0; j--) {
- int right = FFMIN(j + shift, filterSize - 1);
- filter[i * filterSize + right] += filter[i * filterSize + j];
- filter[i * filterSize + j] = 0;
- }
- (*filterPos)[i] = srcW - filterSize;
+ if ((*filterPos)[i] + filterSize > srcW) {
+ int shift = (*filterPos)[i] + filterSize - srcW;
+ // move filter coefficients right to compensate for filterPos
+ for (j = filterSize - 2; j >= 0; j--) {
+ int right = FFMIN(j + shift, filterSize - 1);
+ filter[i * filterSize + right] += filter[i * filterSize + j];
+ filter[i * filterSize + j] = 0;
}
+ (*filterPos)[i]= srcW - filterSize;
}
}
@@ -780,7 +802,7 @@ int sws_getColorspaceDetails(struct SwsContext *c, int **inv_table,
int *srcRange, int **table, int *dstRange,
int *brightness, int *contrast, int *saturation)
{
- if (isYUV(c->dstFormat) || isGray(c->dstFormat))
+ if (!c || isYUV(c->dstFormat) || isGray(c->dstFormat))
return -1;
*inv_table = c->srcColorspaceTable;
@@ -814,6 +836,17 @@ static int handle_jpeg(enum PixelFormat *format)
}
}
+static int handle_0alpha(enum PixelFormat *format)
+{
+ switch (*format) {
+ case PIX_FMT_0BGR : *format = PIX_FMT_ABGR ; return 1;
+ case PIX_FMT_BGR0 : *format = PIX_FMT_BGRA ; return 4;
+ case PIX_FMT_0RGB : *format = PIX_FMT_ARGB ; return 1;
+ case PIX_FMT_RGB0 : *format = PIX_FMT_RGBA ; return 4;
+ default: return 0;
+ }
+}
+
SwsContext *sws_alloc_context(void)
{
SwsContext *c = av_mallocz(sizeof(SwsContext));
@@ -827,7 +860,7 @@ SwsContext *sws_alloc_context(void)
av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
SwsFilter *dstFilter)
{
- int i;
+ int i, j;
int usesVFilter, usesHFilter;
int unscaled;
SwsFilter dummyFilter = { NULL, NULL, NULL, NULL };
@@ -835,8 +868,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
int srcH = c->srcH;
int dstW = c->dstW;
int dstH = c->dstH;
- int dst_stride = FFALIGN(dstW * sizeof(int16_t) + 16, 16);
- int dst_stride_px = dst_stride >> 1;
+ int dst_stride = FFALIGN(dstW * sizeof(int16_t) + 66, 16);
int flags, cpu_flags;
enum PixelFormat srcFormat = c->srcFormat;
enum PixelFormat dstFormat = c->dstFormat;
@@ -849,14 +881,25 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
unscaled = (srcW == dstW && srcH == dstH);
+ handle_jpeg(&srcFormat);
+ handle_jpeg(&dstFormat);
+ handle_0alpha(&srcFormat);
+ handle_0alpha(&dstFormat);
+
+ if(srcFormat!=c->srcFormat || dstFormat!=c->dstFormat){
+ av_log(c, AV_LOG_WARNING, "deprecated pixel format used, make sure you did set range correctly\n");
+ c->srcFormat= srcFormat;
+ c->dstFormat= dstFormat;
+ }
+
if (!sws_isSupportedInput(srcFormat)) {
av_log(c, AV_LOG_ERROR, "%s is not supported as input pixel format\n",
- sws_format_name(srcFormat));
+ av_get_pix_fmt_name(srcFormat));
return AVERROR(EINVAL);
}
if (!sws_isSupportedOutput(dstFormat)) {
av_log(c, AV_LOG_ERROR, "%s is not supported as output pixel format\n",
- sws_format_name(dstFormat));
+ av_get_pix_fmt_name(dstFormat));
return AVERROR(EINVAL);
}
@@ -872,8 +915,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
SWS_SPLINE |
SWS_BICUBLIN);
if (!i || (i & (i - 1))) {
- av_log(c, AV_LOG_ERROR,
- "Exactly one scaler algorithm must be chosen\n");
+ av_log(c, AV_LOG_ERROR, "Exactly one scaler algorithm must be chosen, got %X\n", i);
return AVERROR(EINVAL);
}
/* sanity check */
@@ -908,6 +950,14 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat);
getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat);
+
+ if (isAnyRGB(dstFormat) && !(flags&SWS_FULL_CHR_H_INT)) {
+ if (dstW&1) {
+ av_log(c, AV_LOG_DEBUG, "Forcing full internal H chroma due to odd output size\n");
+ flags |= SWS_FULL_CHR_H_INT;
+ c->flags = flags;
+ }
+ }
/* reuse chroma for 2 pixels RGB/BGR unless user wants full
* chroma interpolation */
if (flags & SWS_FULL_CHR_H_INT &&
@@ -918,9 +968,9 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
dstFormat != PIX_FMT_ABGR &&
dstFormat != PIX_FMT_RGB24 &&
dstFormat != PIX_FMT_BGR24) {
- av_log(c, AV_LOG_ERROR,
+ av_log(c, AV_LOG_WARNING,
"full chroma interpolation for destination format '%s' not yet implemented\n",
- sws_format_name(dstFormat));
+ av_get_pix_fmt_name(dstFormat));
flags &= ~SWS_FULL_CHR_H_INT;
c->flags = flags;
}
@@ -948,6 +998,8 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
c->chrDstW = -((-dstW) >> c->chrDstHSubSample);
c->chrDstH = -((-dstH) >> c->chrDstVSubSample);
+ FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW*2+78, 16) * 2, fail);
+
/* unscaled special cases */
if (unscaled && !usesHFilter && !usesVFilter &&
(c->srcRange == c->dstRange || isAnyRGB(dstFormat))) {
@@ -957,7 +1009,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
if (flags & SWS_PRINT_INFO)
av_log(c, AV_LOG_INFO,
"using unscaled %s -> %s special converter\n",
- sws_format_name(srcFormat), sws_format_name(dstFormat));
+ av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat));
return 0;
}
}
@@ -968,13 +1020,12 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
c->dstBpc = 1 + av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1;
if (c->dstBpc < 8)
c->dstBpc = 8;
+ if (isAnyRGB(srcFormat) || srcFormat == PIX_FMT_PAL8)
+ c->srcBpc = 16;
if (c->dstBpc == 16)
dst_stride <<= 1;
- FF_ALLOC_OR_GOTO(c, c->formatConvBuffer,
- (FFALIGN(srcW, 16) * 2 * FFALIGN(c->srcBpc, 8) >> 3) + 16,
- fail);
if (HAVE_MMXEXT && HAVE_INLINE_ASM && cpu_flags & AV_CPU_FLAG_MMXEXT &&
- c->srcBpc == 8 && c->dstBpc <= 10) {
+ c->srcBpc == 8 && c->dstBpc <= 14) {
c->canMMX2BeUsed = (dstW >= srcW && (dstW & 31) == 0 &&
(srcW & 15) == 0) ? 1 : 0;
if (!c->canMMX2BeUsed && dstW >= srcW && (srcW & 15) == 0
@@ -983,8 +1034,8 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
av_log(c, AV_LOG_INFO,
"output width is not a multiple of 32 -> no MMX2 scaler\n");
}
- if (usesHFilter)
- c->canMMX2BeUsed = 0;
+ if (usesHFilter || isNBPS(c->srcFormat) || is16BPS(c->srcFormat) || isAnyRGB(c->srcFormat))
+ c->canMMX2BeUsed=0;
} else
c->canMMX2BeUsed = 0;
@@ -1004,7 +1055,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
c->chrXInc += 20;
}
// we don't use the x86 asm scaler if MMX is available
- else if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) {
+ else if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX && c->dstBpc <= 14) {
c->lumXInc = ((int64_t)(srcW - 2) << 16) / (dstW - 2) - 20;
c->chrXInc = ((int64_t)(c->chrSrcW - 2) << 16) / (c->chrDstW - 2) - 20;
}
@@ -1031,17 +1082,25 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
c->chrMmx2FilterCode = av_malloc(c->chrMmx2FilterCodeSize);
#endif
+#ifdef MAP_ANONYMOUS
+ if (c->lumMmx2FilterCode == MAP_FAILED || c->chrMmx2FilterCode == MAP_FAILED)
+#else
if (!c->lumMmx2FilterCode || !c->chrMmx2FilterCode)
+#endif
+ {
+ av_log(c, AV_LOG_ERROR, "Failed to allocate MMX2FilterCode\n");
return AVERROR(ENOMEM);
+ }
+
FF_ALLOCZ_OR_GOTO(c, c->hLumFilter, (dstW / 8 + 8) * sizeof(int16_t), fail);
FF_ALLOCZ_OR_GOTO(c, c->hChrFilter, (c->chrDstW / 4 + 8) * sizeof(int16_t), fail);
FF_ALLOCZ_OR_GOTO(c, c->hLumFilterPos, (dstW / 2 / 8 + 8) * sizeof(int32_t), fail);
FF_ALLOCZ_OR_GOTO(c, c->hChrFilterPos, (c->chrDstW / 2 / 4 + 8) * sizeof(int32_t), fail);
- initMMX2HScaler(dstW, c->lumXInc, c->lumMmx2FilterCode,
- c->hLumFilter, c->hLumFilterPos, 8);
+ initMMX2HScaler( dstW, c->lumXInc, c->lumMmx2FilterCode,
+ c->hLumFilter, (uint32_t*)c->hLumFilterPos, 8);
initMMX2HScaler(c->chrDstW, c->chrXInc, c->chrMmx2FilterCode,
- c->hChrFilter, c->hChrFilterPos, 4);
+ c->hChrFilter, (uint32_t*)c->hChrFilterPos, 4);
#ifdef MAP_ANONYMOUS
mprotect(c->lumMmx2FilterCode, c->lumMmx2FilterCodeSize, PROT_EXEC | PROT_READ);
@@ -1060,14 +1119,14 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
srcW, dstW, filterAlign, 1 << 14,
(flags & SWS_BICUBLIN) ? (flags | SWS_BICUBIC) : flags,
cpu_flags, srcFilter->lumH, dstFilter->lumH,
- c->param, 1) < 0)
+ c->param) < 0)
goto fail;
if (initFilter(&c->hChrFilter, &c->hChrFilterPos,
&c->hChrFilterSize, c->chrXInc,
c->chrSrcW, c->chrDstW, filterAlign, 1 << 14,
(flags & SWS_BICUBLIN) ? (flags | SWS_BILINEAR) : flags,
cpu_flags, srcFilter->chrH, dstFilter->chrH,
- c->param, 1) < 0)
+ c->param) < 0)
goto fail;
}
} // initialize horizontal stuff
@@ -1083,14 +1142,14 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
c->lumYInc, srcH, dstH, filterAlign, (1 << 12),
(flags & SWS_BICUBLIN) ? (flags | SWS_BICUBIC) : flags,
cpu_flags, srcFilter->lumV, dstFilter->lumV,
- c->param, 0) < 0)
+ c->param) < 0)
goto fail;
if (initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize,
c->chrYInc, c->chrSrcH, c->chrDstH,
filterAlign, (1 << 12),
(flags & SWS_BICUBLIN) ? (flags | SWS_BILINEAR) : flags,
cpu_flags, srcFilter->chrV, dstFilter->chrV,
- c->param, 0) < 0)
+ c->param) < 0)
goto fail;
#if HAVE_ALTIVEC
@@ -1147,9 +1206,9 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
dst_stride + 16, fail);
c->lumPixBuf[i] = c->lumPixBuf[i + c->vLumBufSize];
}
- // 64 / (c->dstBpc & ~7) is the same as 16 / sizeof(scaling_intermediate)
- c->uv_off_px = dst_stride_px + 64 / (c->dstBpc & ~7);
- c->uv_off_byte = dst_stride + 16;
+ // 64 / c->scalingBpp is the same as 16 / sizeof(scaling_intermediate)
+ c->uv_off = (dst_stride>>1) + 64 / (c->dstBpc &~ 7);
+ c->uv_offx2 = dst_stride + 16;
for (i = 0; i < c->vChrBufSize; i++) {
FF_ALLOC_OR_GOTO(c, c->chrUPixBuf[i + c->vChrBufSize],
dst_stride * 2 + 32, fail);
@@ -1166,9 +1225,15 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
// try to avoid drawing green stuff between the right end and the stride end
for (i = 0; i < c->vChrBufSize; i++)
- memset(c->chrUPixBuf[i], 64, dst_stride * 2 + 1);
+ if(av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 == 15){
+ av_assert0(c->dstBpc > 14);
+ for(j=0; j<dst_stride/2+1; j++)
+ ((int32_t*)(c->chrUPixBuf[i]))[j] = 1<<18;
+ } else
+ for(j=0; j<dst_stride+1; j++)
+ ((int16_t*)(c->chrUPixBuf[i]))[j] = 1<<14;
- assert(c->chrDstH <= dstH);
+ av_assert0(c->chrDstH <= dstH);
if (flags & SWS_PRINT_INFO) {
if (flags & SWS_FAST_BILINEAR)
@@ -1197,7 +1262,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
av_log(c, AV_LOG_INFO, "ehh flags invalid?! ");
av_log(c, AV_LOG_INFO, "from %s to %s%s ",
- sws_format_name(srcFormat),
+ av_get_pix_fmt_name(srcFormat),
#ifdef DITHER1XBPP
dstFormat == PIX_FMT_BGR555 || dstFormat == PIX_FMT_BGR565 ||
dstFormat == PIX_FMT_RGB444BE || dstFormat == PIX_FMT_RGB444LE ||
@@ -1206,7 +1271,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
#else
"",
#endif
- sws_format_name(dstFormat));
+ av_get_pix_fmt_name(dstFormat));
if (HAVE_MMXEXT && cpu_flags & AV_CPU_FLAG_MMXEXT)
av_log(c, AV_LOG_INFO, "using MMX2\n");
@@ -1253,6 +1318,8 @@ SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat,
c->dstH = dstH;
c->srcRange = handle_jpeg(&srcFormat);
c->dstRange = handle_jpeg(&dstFormat);
+ c->src0Alpha = handle_0alpha(&srcFormat);
+ c->dst0Alpha = handle_0alpha(&dstFormat);
c->srcFormat = srcFormat;
c->dstFormat = dstFormat;
@@ -1641,7 +1708,7 @@ void sws_freeContext(SwsContext *c)
#endif /* HAVE_MMX */
av_freep(&c->yuvTable);
- av_free(c->formatConvBuffer);
+ av_freep(&c->formatConvBuffer);
av_free(c);
}
@@ -1680,10 +1747,12 @@ struct SwsContext *sws_getCachedContext(struct SwsContext *context, int srcW,
context->srcW = srcW;
context->srcH = srcH;
context->srcRange = handle_jpeg(&srcFormat);
+ context->src0Alpha = handle_0alpha(&srcFormat);
context->srcFormat = srcFormat;
context->dstW = dstW;
context->dstH = dstH;
context->dstRange = handle_jpeg(&dstFormat);
+ context->dst0Alpha = handle_0alpha(&dstFormat);
context->dstFormat = dstFormat;
context->flags = flags;
context->param[0] = param[0];
diff --git a/libswscale/version.h b/libswscale/version.h
index acbdf6b012..37dcc96572 100644
--- a/libswscale/version.h
+++ b/libswscale/version.h
@@ -1,18 +1,18 @@
/*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -28,7 +28,7 @@
#define LIBSWSCALE_VERSION_MAJOR 2
#define LIBSWSCALE_VERSION_MINOR 1
-#define LIBSWSCALE_VERSION_MICRO 1
+#define LIBSWSCALE_VERSION_MICRO 101
#define LIBSWSCALE_VERSION_INT AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \
LIBSWSCALE_VERSION_MINOR, \
@@ -52,5 +52,8 @@
#ifndef FF_API_SWS_CPU_CAPS
#define FF_API_SWS_CPU_CAPS (LIBSWSCALE_VERSION_MAJOR < 3)
#endif
+#ifndef FF_API_SWS_FORMAT_NAME
+#define FF_API_SWS_FORMAT_NAME (LIBSWSCALE_VERSION_MAJOR < 3)
+#endif
#endif /* SWSCALE_VERSION_H */
diff --git a/libswscale/x86/Makefile b/libswscale/x86/Makefile
index 5416d48a4c..7d219b458b 100644
--- a/libswscale/x86/Makefile
+++ b/libswscale/x86/Makefile
@@ -1,3 +1,5 @@
+$(SUBDIR)x86/swscale_mmx.o: CFLAGS += $(NOREDZONE_FLAGS)
+
OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o
MMX-OBJS += x86/rgb2rgb.o \
diff --git a/libswscale/x86/input.asm b/libswscale/x86/input.asm
index 66d88458bb..c4174ee65e 100644
--- a/libswscale/x86/input.asm
+++ b/libswscale/x86/input.asm
@@ -36,8 +36,8 @@ SECTION_RODATA
%define GV 0xD0E3
%define BV 0xF6E4
-rgb_Yrnd: times 4 dd 0x84000 ; 16.5 << 15
-rgb_UVrnd: times 4 dd 0x404000 ; 128.5 << 15
+rgb_Yrnd: times 4 dd 0x80100 ; 16.5 << 15
+rgb_UVrnd: times 4 dd 0x400100 ; 128.5 << 15
bgr_Ycoeff_12x4: times 2 dw BY, GY, 0, BY
bgr_Ycoeff_3x56: times 2 dw RY, 0, GY, RY
rgb_Ycoeff_12x4: times 2 dw RY, GY, 0, RY
@@ -83,7 +83,7 @@ SECTION .text
; %1 = nr. of XMM registers
; %2 = rgb or bgr
%macro RGB24_TO_Y_FN 2-3
-cglobal %2 %+ 24ToY, 3, 3, %1, dst, src, w
+cglobal %2 %+ 24ToY, 6, 6, %1, dst, src, u1, u2, w, u3
%if mmsize == 8
mova m5, [%2_Ycoeff_12x4]
mova m6, [%2_Ycoeff_3x56]
@@ -115,6 +115,7 @@ cglobal %2 %+ 24ToY, 3, 3, %1, dst, src, w
%if ARCH_X86_64
movsxd wq, wd
%endif
+ add wq, wq
add dstq, wq
neg wq
%if notcpuflag(ssse3)
@@ -158,12 +159,11 @@ cglobal %2 %+ 24ToY, 3, 3, %1, dst, src, w
paddd m2, m3 ; (dword) { Bx*BY + Gx*GY + Rx*RY }[4-7]
paddd m0, m4 ; += rgb_Yrnd, i.e. (dword) { Y[0-3] }
paddd m2, m4 ; += rgb_Yrnd, i.e. (dword) { Y[4-7] }
- psrad m0, 15
- psrad m2, 15
+ psrad m0, 9
+ psrad m2, 9
packssdw m0, m2 ; (word) { Y[0-7] }
- packuswb m0, m0 ; (byte) { Y[0-7] }
- movh [dstq+wq], m0
- add wq, mmsize / 2
+ mova [dstq+wq], m0
+ add wq, mmsize
jl .loop
REP_RET
%endif ; (ARCH_X86_64 && %0 == 3) || mmsize == 8
@@ -172,7 +172,7 @@ cglobal %2 %+ 24ToY, 3, 3, %1, dst, src, w
; %1 = nr. of XMM registers
; %2 = rgb or bgr
%macro RGB24_TO_UV_FN 2-3
-cglobal %2 %+ 24ToUV, 3, 4, %1, dstU, dstV, src, w
+cglobal %2 %+ 24ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, u3
%if ARCH_X86_64
mova m8, [%2_Ucoeff_12x4]
mova m9, [%2_Ucoeff_3x56]
@@ -203,10 +203,11 @@ cglobal %2 %+ 24ToUV, 3, 4, %1, dstU, dstV, src, w
%endif ; x86-32/64
%endif ; cpuflag(ssse3)
%if ARCH_X86_64
- movsxd wq, dword r4m
+ movsxd wq, dword r5m
%else ; x86-32
- mov wq, r4m
+ mov wq, r5m
%endif
+ add wq, wq
add dstUq, wq
add dstVq, wq
neg wq
@@ -264,23 +265,20 @@ cglobal %2 %+ 24ToUV, 3, 4, %1, dstU, dstV, src, w
paddd m2, m6 ; += rgb_UVrnd, i.e. (dword) { V[0-3] }
paddd m1, m6 ; += rgb_UVrnd, i.e. (dword) { U[4-7] }
paddd m4, m6 ; += rgb_UVrnd, i.e. (dword) { V[4-7] }
- psrad m0, 15
- psrad m2, 15
- psrad m1, 15
- psrad m4, 15
+ psrad m0, 9
+ psrad m2, 9
+ psrad m1, 9
+ psrad m4, 9
packssdw m0, m1 ; (word) { U[0-7] }
packssdw m2, m4 ; (word) { V[0-7] }
%if mmsize == 8
- packuswb m0, m0 ; (byte) { U[0-3] }
- packuswb m2, m2 ; (byte) { V[0-3] }
- movh [dstUq+wq], m0
- movh [dstVq+wq], m2
+ mova [dstUq+wq], m0
+ mova [dstVq+wq], m2
%else ; mmsize == 16
- packuswb m0, m2 ; (byte) { U[0-7], V[0-7] }
- movh [dstUq+wq], m0
- movhps [dstVq+wq], m0
+ mova [dstUq+wq], m0
+ mova [dstVq+wq], m2
%endif ; mmsize == 8/16
- add wq, mmsize / 2
+ add wq, mmsize
jl .loop
REP_RET
%endif ; ARCH_X86_64 && %0 == 3
@@ -306,13 +304,15 @@ RGB24_FUNCS 10, 12
INIT_XMM ssse3
RGB24_FUNCS 11, 13
+%if HAVE_AVX
INIT_XMM avx
RGB24_FUNCS 11, 13
+%endif
; %1 = nr. of XMM registers
; %2-5 = rgba, bgra, argb or abgr (in individual characters)
%macro RGB32_TO_Y_FN 5-6
-cglobal %2%3%4%5 %+ ToY, 3, 3, %1, dst, src, w
+cglobal %2%3%4%5 %+ ToY, 6, 6, %1, dst, src, u1, u2, w, u3
mova m5, [rgba_Ycoeff_%2%4]
mova m6, [rgba_Ycoeff_%3%5]
%if %0 == 6
@@ -323,6 +323,7 @@ cglobal %2%3%4%5 %+ ToY, 3, 3, %1, dst, src, w
movsxd wq, wd
%endif
lea srcq, [srcq+wq*4]
+ add wq, wq
add dstq, wq
neg wq
mova m4, [rgb_Yrnd]
@@ -330,8 +331,8 @@ cglobal %2%3%4%5 %+ ToY, 3, 3, %1, dst, src, w
psrlw m7, 8 ; (word) { 0x00ff } x4
.loop:
; FIXME check alignment and use mova
- movu m0, [srcq+wq*4+0] ; (byte) { Bx, Gx, Rx, xx }[0-3]
- movu m2, [srcq+wq*4+mmsize] ; (byte) { Bx, Gx, Rx, xx }[4-7]
+ movu m0, [srcq+wq*2+0] ; (byte) { Bx, Gx, Rx, xx }[0-3]
+ movu m2, [srcq+wq*2+mmsize] ; (byte) { Bx, Gx, Rx, xx }[4-7]
DEINTB 1, 0, 3, 2, 7 ; (word) { Gx, xx (m0/m2) or Bx, Rx (m1/m3) }[0-3]/[4-7]
pmaddwd m1, m5 ; (dword) { Bx*BY + Rx*RY }[0-3]
pmaddwd m0, m6 ; (dword) { Gx*GY }[0-3]
@@ -341,12 +342,11 @@ cglobal %2%3%4%5 %+ ToY, 3, 3, %1, dst, src, w
paddd m2, m4 ; += rgb_Yrnd
paddd m0, m1 ; (dword) { Y[0-3] }
paddd m2, m3 ; (dword) { Y[4-7] }
- psrad m0, 15
- psrad m2, 15
+ psrad m0, 9
+ psrad m2, 9
packssdw m0, m2 ; (word) { Y[0-7] }
- packuswb m0, m0 ; (byte) { Y[0-7] }
- movh [dstq+wq], m0
- add wq, mmsize / 2
+ mova [dstq+wq], m0
+ add wq, mmsize
jl .loop
REP_RET
%endif ; %0 == 3
@@ -355,7 +355,7 @@ cglobal %2%3%4%5 %+ ToY, 3, 3, %1, dst, src, w
; %1 = nr. of XMM registers
; %2-5 = rgba, bgra, argb or abgr (in individual characters)
%macro RGB32_TO_UV_FN 5-6
-cglobal %2%3%4%5 %+ ToUV, 3, 4, %1, dstU, dstV, src, w
+cglobal %2%3%4%5 %+ ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, u3
%if ARCH_X86_64
mova m8, [rgba_Ucoeff_%2%4]
mova m9, [rgba_Ucoeff_%3%5]
@@ -376,21 +376,22 @@ cglobal %2%3%4%5 %+ ToUV, 3, 4, %1, dstU, dstV, src, w
%else ; ARCH_X86_64 && %0 == 6
.body:
%if ARCH_X86_64
- movsxd wq, dword r4m
+ movsxd wq, dword r5m
%else ; x86-32
- mov wq, r4m
+ mov wq, r5m
%endif
+ add wq, wq
add dstUq, wq
add dstVq, wq
- lea srcq, [srcq+wq*4]
+ lea srcq, [srcq+wq*2]
neg wq
pcmpeqb m7, m7
psrlw m7, 8 ; (word) { 0x00ff } x4
mova m6, [rgb_UVrnd]
.loop:
; FIXME check alignment and use mova
- movu m0, [srcq+wq*4+0] ; (byte) { Bx, Gx, Rx, xx }[0-3]
- movu m4, [srcq+wq*4+mmsize] ; (byte) { Bx, Gx, Rx, xx }[4-7]
+ movu m0, [srcq+wq*2+0] ; (byte) { Bx, Gx, Rx, xx }[0-3]
+ movu m4, [srcq+wq*2+mmsize] ; (byte) { Bx, Gx, Rx, xx }[4-7]
DEINTB 1, 0, 5, 4, 7 ; (word) { Gx, xx (m0/m4) or Bx, Rx (m1/m5) }[0-3]/[4-7]
pmaddwd m3, m1, coeffV1 ; (dword) { Bx*BV + Rx*RV }[0-3]
pmaddwd m2, m0, coeffV2 ; (dword) { Gx*GV }[0-3]
@@ -406,25 +407,22 @@ cglobal %2%3%4%5 %+ ToUV, 3, 4, %1, dstU, dstV, src, w
pmaddwd m4, coeffU2 ; (dword) { Gx*GU }[4-7]
paddd m3, m6 ; += rgb_UVrnd
paddd m5, m6 ; += rgb_UVrnd
- psrad m0, 15
+ psrad m0, 9
paddd m1, m3 ; (dword) { V[4-7] }
paddd m4, m5 ; (dword) { U[4-7] }
- psrad m2, 15
- psrad m4, 15
- psrad m1, 15
+ psrad m2, 9
+ psrad m4, 9
+ psrad m1, 9
packssdw m0, m4 ; (word) { U[0-7] }
packssdw m2, m1 ; (word) { V[0-7] }
%if mmsize == 8
- packuswb m0, m0 ; (byte) { U[0-7] }
- packuswb m2, m2 ; (byte) { V[0-7] }
- movh [dstUq+wq], m0
- movh [dstVq+wq], m2
+ mova [dstUq+wq], m0
+ mova [dstVq+wq], m2
%else ; mmsize == 16
- packuswb m0, m2 ; (byte) { U[0-7], V[0-7] }
- movh [dstUq+wq], m0
- movhps [dstVq+wq], m0
+ mova [dstUq+wq], m0
+ mova [dstVq+wq], m2
%endif ; mmsize == 8/16
- add wq, mmsize / 2
+ add wq, mmsize
jl .loop
REP_RET
%endif ; ARCH_X86_64 && %0 == 3
@@ -452,8 +450,10 @@ RGB32_FUNCS 0, 0
INIT_XMM sse2
RGB32_FUNCS 8, 12
+%if HAVE_AVX
INIT_XMM avx
RGB32_FUNCS 8, 12
+%endif
;-----------------------------------------------------------------------------
; YUYV/UYVY/NV12/NV21 packed pixel shuffling.
@@ -490,7 +490,7 @@ RGB32_FUNCS 8, 12
; will be the same (i.e. YUYV+AVX), and thus we don't need to
; split the loop in an aligned and unaligned case
%macro YUYV_TO_Y_FN 2-3
-cglobal %2ToY, 3, 3, %1, dst, src, w
+cglobal %2ToY, 5, 5, %1, dst, unused0, unused1, src, w
%if ARCH_X86_64
movsxd wq, wd
%endif
@@ -560,11 +560,11 @@ cglobal %2ToY, 3, 3, %1, dst, src, w
; will be the same (i.e. UYVY+AVX), and thus we don't need to
; split the loop in an aligned and unaligned case
%macro YUYV_TO_UV_FN 2-3
-cglobal %2ToUV, 3, 4, %1, dstU, dstV, src, w
+cglobal %2ToUV, 4, 5, %1, dstU, dstV, unused, src, w
%if ARCH_X86_64
- movsxd wq, dword r4m
+ movsxd wq, dword r5m
%else ; x86-32
- mov wq, r4m
+ mov wq, r5m
%endif
add dstUq, wq
add dstVq, wq
@@ -594,8 +594,8 @@ cglobal %2ToUV, 3, 4, %1, dstU, dstV, src, w
.loop_%1:
mov%1 m0, [srcq+wq*2] ; (byte) { U0, V0, U1, V1, ... }
mov%1 m1, [srcq+wq*2+mmsize] ; (byte) { U8, V8, U9, V9, ... }
- pand m2, m0, m4 ; (word) { U0, U1, ..., U7 }
- pand m3, m1, m4 ; (word) { U8, U9, ..., U15 }
+ pand m2, m0, m5 ; (word) { U0, U1, ..., U7 }
+ pand m3, m1, m5 ; (word) { U8, U9, ..., U15 }
psrlw m0, 8 ; (word) { V0, V1, ..., V7 }
psrlw m1, 8 ; (word) { V8, V9, ..., V15 }
packuswb m2, m3 ; (byte) { U0, ..., U15 }
@@ -615,11 +615,11 @@ cglobal %2ToUV, 3, 4, %1, dstU, dstV, src, w
; %1 = nr. of XMM registers
; %2 = nv12 or nv21
%macro NVXX_TO_UV_FN 2
-cglobal %2ToUV, 3, 4, %1, dstU, dstV, src, w
+cglobal %2ToUV, 4, 5, %1, dstU, dstV, unused, src, w
%if ARCH_X86_64
- movsxd wq, dword r4m
+ movsxd wq, dword r5m
%else ; x86-32
- mov wq, r4m
+ mov wq, r5m
%endif
add dstUq, wq
add dstVq, wq
@@ -627,8 +627,8 @@ cglobal %2ToUV, 3, 4, %1, dstU, dstV, src, w
test srcq, 15
%endif
lea srcq, [srcq+wq*2]
- pcmpeqb m4, m4 ; (byte) { 0xff } x 16
- psrlw m4, 8 ; (word) { 0x00ff } x 8
+ pcmpeqb m5, m5 ; (byte) { 0xff } x 16
+ psrlw m5, 8 ; (word) { 0x00ff } x 8
%if mmsize == 16
jnz .loop_u_start
neg wq
@@ -660,6 +660,7 @@ YUYV_TO_UV_FN 3, uyvy
NVXX_TO_UV_FN 5, nv12
NVXX_TO_UV_FN 5, nv21
+%if HAVE_AVX
INIT_XMM avx
; in theory, we could write a yuy2-to-y using vpand (i.e. AVX), but
; that's not faster in practice
@@ -667,3 +668,4 @@ YUYV_TO_UV_FN 3, yuyv
YUYV_TO_UV_FN 3, uyvy, 1
NVXX_TO_UV_FN 5, nv12
NVXX_TO_UV_FN 5, nv21
+%endif
diff --git a/libswscale/x86/output.asm b/libswscale/x86/output.asm
index 9b0b01253a..01a946f7b3 100644
--- a/libswscale/x86/output.asm
+++ b/libswscale/x86/output.asm
@@ -267,10 +267,12 @@ yuv2planeX_fn 9, 7, 5
yuv2planeX_fn 10, 7, 5
yuv2planeX_fn 16, 8, 5
+%if HAVE_AVX
INIT_XMM avx
yuv2planeX_fn 8, 10, 7
yuv2planeX_fn 9, 7, 5
yuv2planeX_fn 10, 7, 5
+%endif
; %1=outout-bpc, %2=alignment (u/a)
%macro yuv2plane1_mainloop 2
@@ -405,8 +407,10 @@ yuv2plane1_fn 16, 6, 3
INIT_XMM sse4
yuv2plane1_fn 16, 5, 3
+%if HAVE_AVX
INIT_XMM avx
yuv2plane1_fn 8, 5, 5
yuv2plane1_fn 9, 5, 3
yuv2plane1_fn 10, 5, 3
yuv2plane1_fn 16, 5, 3
+%endif
diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c
index f201281fac..24b284eec7 100644
--- a/libswscale/x86/rgb2rgb.c
+++ b/libswscale/x86/rgb2rgb.c
@@ -6,20 +6,20 @@
* Written by Nick Kurshev.
* palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -71,6 +71,9 @@ DECLARE_ASM_CONST(8, uint64_t, blue_16mask) = 0x0000001f0000001fULL;
DECLARE_ASM_CONST(8, uint64_t, red_15mask) = 0x00007c0000007c00ULL;
DECLARE_ASM_CONST(8, uint64_t, green_15mask) = 0x000003e0000003e0ULL;
DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL;
+DECLARE_ASM_CONST(8, uint64_t, mul15_mid) = 0x4200420042004200ULL;
+DECLARE_ASM_CONST(8, uint64_t, mul15_hi) = 0x0210021002100210ULL;
+DECLARE_ASM_CONST(8, uint64_t, mul16_mid) = 0x2080208020802080ULL;
#define RGB2YUV_SHIFT 8
#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c
index 7a641e1814..7e5ffdf8d1 100644
--- a/libswscale/x86/rgb2rgb_template.c
+++ b/libswscale/x86/rgb2rgb_template.c
@@ -7,20 +7,20 @@
* palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
* lot of big-endian byte order fixes by Alex Beregszaszi
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -713,27 +713,6 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_s
}
}
-/*
- I use less accurate approximation here by simply left-shifting the input
- value and filling the low order bits with zeroes. This method improves PNG
- compression but this scheme cannot reproduce white exactly, since it does
- not generate an all-ones maximum value; the net effect is to darken the
- image slightly.
-
- The better method should be "left bit replication":
-
- 4 3 2 1 0
- ---------
- 1 1 0 1 1
-
- 7 6 5 4 3 2 1 0
- ----------------
- 1 1 0 1 1 1 1 0
- |=======| |===|
- | leftmost bits repeated to fill open bits
- |
- original bits
-*/
static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
{
const uint16_t *end;
@@ -752,9 +731,10 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
- "psllq $3, %%mm0 \n\t"
- "psrlq $2, %%mm1 \n\t"
- "psrlq $7, %%mm2 \n\t"
+ "psllq $5, %%mm0 \n\t"
+ "pmulhw "MANGLE(mul15_mid)", %%mm0 \n\t"
+ "pmulhw "MANGLE(mul15_mid)", %%mm1 \n\t"
+ "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t"
"movq %%mm0, %%mm3 \n\t"
"movq %%mm1, %%mm4 \n\t"
"movq %%mm2, %%mm5 \n\t"
@@ -782,9 +762,10 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
- "psllq $3, %%mm0 \n\t"
- "psrlq $2, %%mm1 \n\t"
- "psrlq $7, %%mm2 \n\t"
+ "psllq $5, %%mm0 \n\t"
+ "pmulhw "MANGLE(mul15_mid)", %%mm0 \n\t"
+ "pmulhw "MANGLE(mul15_mid)", %%mm1 \n\t"
+ "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t"
"movq %%mm0, %%mm3 \n\t"
"movq %%mm1, %%mm4 \n\t"
"movq %%mm2, %%mm5 \n\t"
@@ -830,9 +811,9 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
while (s < end) {
register uint16_t bgr;
bgr = *s++;
- *d++ = (bgr&0x1F)<<3;
- *d++ = (bgr&0x3E0)>>2;
- *d++ = (bgr&0x7C00)>>7;
+ *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
+ *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7);
+ *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
}
}
@@ -854,9 +835,11 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
- "psllq $3, %%mm0 \n\t"
- "psrlq $3, %%mm1 \n\t"
- "psrlq $8, %%mm2 \n\t"
+ "psllq $5, %%mm0 \n\t"
+ "psrlq $1, %%mm2 \n\t"
+ "pmulhw "MANGLE(mul15_mid)", %%mm0 \n\t"
+ "pmulhw "MANGLE(mul16_mid)", %%mm1 \n\t"
+ "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t"
"movq %%mm0, %%mm3 \n\t"
"movq %%mm1, %%mm4 \n\t"
"movq %%mm2, %%mm5 \n\t"
@@ -884,9 +867,11 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
- "psllq $3, %%mm0 \n\t"
- "psrlq $3, %%mm1 \n\t"
- "psrlq $8, %%mm2 \n\t"
+ "psllq $5, %%mm0 \n\t"
+ "psrlq $1, %%mm2 \n\t"
+ "pmulhw "MANGLE(mul15_mid)", %%mm0 \n\t"
+ "pmulhw "MANGLE(mul16_mid)", %%mm1 \n\t"
+ "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t"
"movq %%mm0, %%mm3 \n\t"
"movq %%mm1, %%mm4 \n\t"
"movq %%mm2, %%mm5 \n\t"
@@ -931,9 +916,9 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
while (s < end) {
register uint16_t bgr;
bgr = *s++;
- *d++ = (bgr&0x1F)<<3;
- *d++ = (bgr&0x7E0)>>3;
- *d++ = (bgr&0xF800)>>8;
+ *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
+ *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9);
+ *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
}
}
@@ -976,11 +961,12 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
- "psllq $3, %%mm0 \n\t"
- "psrlq $2, %%mm1 \n\t"
- "psrlq $7, %%mm2 \n\t"
+ "psllq $5, %%mm0 \n\t"
+ "pmulhw %5, %%mm0 \n\t"
+ "pmulhw %5, %%mm1 \n\t"
+ "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t"
PACK_RGB32
- ::"r"(d),"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r)
+ ::"r"(d),"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r) ,"m"(mul15_mid)
:"memory");
d += 16;
s += 4;
@@ -990,9 +976,9 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s
while (s < end) {
register uint16_t bgr;
bgr = *s++;
- *d++ = (bgr&0x1F)<<3;
- *d++ = (bgr&0x3E0)>>2;
- *d++ = (bgr&0x7C00)>>7;
+ *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
+ *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7);
+ *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
*d++ = 255;
}
}
@@ -1017,11 +1003,13 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s
"pand %2, %%mm0 \n\t"
"pand %3, %%mm1 \n\t"
"pand %4, %%mm2 \n\t"
- "psllq $3, %%mm0 \n\t"
- "psrlq $3, %%mm1 \n\t"
- "psrlq $8, %%mm2 \n\t"
+ "psllq $5, %%mm0 \n\t"
+ "psrlq $1, %%mm2 \n\t"
+ "pmulhw %5, %%mm0 \n\t"
+ "pmulhw "MANGLE(mul16_mid)", %%mm1 \n\t"
+ "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t"
PACK_RGB32
- ::"r"(d),"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r)
+ ::"r"(d),"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mul15_mid)
:"memory");
d += 16;
s += 4;
@@ -1031,9 +1019,9 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s
while (s < end) {
register uint16_t bgr;
bgr = *s++;
- *d++ = (bgr&0x1F)<<3;
- *d++ = (bgr&0x7E0)>>3;
- *d++ = (bgr&0xF800)>>8;
+ *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
+ *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9);
+ *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
*d++ = 255;
}
}
diff --git a/libswscale/x86/scale.asm b/libswscale/x86/scale.asm
index d56e253afa..6282ab2c69 100644
--- a/libswscale/x86/scale.asm
+++ b/libswscale/x86/scale.asm
@@ -408,11 +408,15 @@ SCALE_FUNC %1, %2, X, X8, 7, %3
SCALE_FUNCS 8, 15, %1
SCALE_FUNCS 9, 15, %2
SCALE_FUNCS 10, 15, %2
+SCALE_FUNCS 12, 15, %2
+SCALE_FUNCS 14, 15, %2
SCALE_FUNCS 16, 15, %3
%endif ; !sse4
SCALE_FUNCS 8, 19, %1
SCALE_FUNCS 9, 19, %2
SCALE_FUNCS 10, 19, %2
+SCALE_FUNCS 12, 19, %2
+SCALE_FUNCS 14, 19, %2
SCALE_FUNCS 16, 19, %3
%endmacro
diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c
index 9683c0cedd..67cda51ca5 100644
--- a/libswscale/x86/swscale.c
+++ b/libswscale/x86/swscale.c
@@ -1,20 +1,20 @@
/*
- * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -23,6 +23,7 @@
#include "libswscale/swscale.h"
#include "libswscale/swscale_internal.h"
#include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/x86/asm.h"
#include "libavutil/cpu.h"
@@ -69,6 +70,7 @@ DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset) = 0x1010101010101010ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL;
+
//MMX versions
#if HAVE_MMX
#undef RENAME
@@ -116,9 +118,9 @@ void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufI
c->greenDither= ff_dither4[dstY&1];
c->redDither= ff_dither8[(dstY+1)&1];
if (dstY < dstH - 2) {
- const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
- const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
- const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
+ const int16_t **lumSrcPtr= (const int16_t **)(void*) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
+ const int16_t **chrUSrcPtr= (const int16_t **)(void*) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
+ const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)(void*) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
int i;
if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->srcH) {
@@ -202,6 +204,67 @@ void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufI
}
}
+#if HAVE_MMX2
+static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
+ const int16_t **src, uint8_t *dest, int dstW,
+ const uint8_t *dither, int offset)
+{
+ if(((int)dest) & 15){
+ return yuv2yuvX_MMX2(filter, filterSize, src, dest, dstW, dither, offset);
+ }
+ if (offset) {
+ __asm__ volatile("movq (%0), %%xmm3\n\t"
+ "movdqa %%xmm3, %%xmm4\n\t"
+ "psrlq $24, %%xmm3\n\t"
+ "psllq $40, %%xmm4\n\t"
+ "por %%xmm4, %%xmm3\n\t"
+ :: "r"(dither)
+ );
+ } else {
+ __asm__ volatile("movq (%0), %%xmm3\n\t"
+ :: "r"(dither)
+ );
+ }
+ __asm__ volatile(
+ "pxor %%xmm0, %%xmm0\n\t"
+ "punpcklbw %%xmm0, %%xmm3\n\t"
+ "psraw $4, %%xmm3\n\t"
+ "movdqa %%xmm3, %%xmm4\n\t"
+ "movdqa %%xmm3, %%xmm7\n\t"
+ "movl %3, %%ecx\n\t"
+ "mov %0, %%"REG_d" \n\t"\
+ "mov (%%"REG_d"), %%"REG_S" \n\t"\
+ ".p2align 4 \n\t" /* FIXME Unroll? */\
+ "1: \n\t"\
+ "movddup 8(%%"REG_d"), %%xmm0 \n\t" /* filterCoeff */\
+ "movdqa (%%"REG_S", %%"REG_c", 2), %%xmm2 \n\t" /* srcData */\
+ "movdqa 16(%%"REG_S", %%"REG_c", 2), %%xmm5 \n\t" /* srcData */\
+ "add $16, %%"REG_d" \n\t"\
+ "mov (%%"REG_d"), %%"REG_S" \n\t"\
+ "test %%"REG_S", %%"REG_S" \n\t"\
+ "pmulhw %%xmm0, %%xmm2 \n\t"\
+ "pmulhw %%xmm0, %%xmm5 \n\t"\
+ "paddw %%xmm2, %%xmm3 \n\t"\
+ "paddw %%xmm5, %%xmm4 \n\t"\
+ " jnz 1b \n\t"\
+ "psraw $3, %%xmm3 \n\t"\
+ "psraw $3, %%xmm4 \n\t"\
+ "packuswb %%xmm4, %%xmm3 \n\t"
+ "movntdq %%xmm3, (%1, %%"REG_c")\n\t"
+ "add $16, %%"REG_c" \n\t"\
+ "cmp %2, %%"REG_c" \n\t"\
+ "movdqa %%xmm7, %%xmm3\n\t"
+ "movdqa %%xmm7, %%xmm4\n\t"
+ "mov %0, %%"REG_d" \n\t"\
+ "mov (%%"REG_d"), %%"REG_S" \n\t"\
+ "jb 1b \n\t"\
+ :: "g" (filter),
+ "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset)
+ : "%"REG_d, "%"REG_S, "%"REG_c
+ );
+}
+#endif
+
#endif /* HAVE_INLINE_ASM */
#define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
@@ -215,10 +278,14 @@ extern void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt(
SCALE_FUNC(filter_n, 8, 15, opt); \
SCALE_FUNC(filter_n, 9, 15, opt); \
SCALE_FUNC(filter_n, 10, 15, opt); \
+ SCALE_FUNC(filter_n, 12, 15, opt); \
+ SCALE_FUNC(filter_n, 14, 15, opt); \
SCALE_FUNC(filter_n, 16, 15, opt); \
SCALE_FUNC(filter_n, 8, 19, opt); \
SCALE_FUNC(filter_n, 9, 19, opt); \
SCALE_FUNC(filter_n, 10, 19, opt); \
+ SCALE_FUNC(filter_n, 12, 19, opt); \
+ SCALE_FUNC(filter_n, 14, 19, opt); \
SCALE_FUNC(filter_n, 16, 19, opt)
#define SCALE_FUNCS_MMX(opt) \
@@ -311,22 +378,33 @@ av_cold void ff_sws_init_swScale_mmx(SwsContext *c)
#if HAVE_MMXEXT
if (cpu_flags & AV_CPU_FLAG_MMXEXT)
sws_init_swScale_MMX2(c);
+ if (cpu_flags & AV_CPU_FLAG_SSE3){
+ if(c->use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND))
+ c->yuv2planeX = yuv2yuvX_sse3;
+ }
#endif
#endif /* HAVE_INLINE_ASM */
#if HAVE_YASM
#define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \
if (c->srcBpc == 8) { \
- hscalefn = c->dstBpc <= 10 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \
+ hscalefn = c->dstBpc <= 14 ? ff_hscale8to15_ ## filtersize ## _ ## opt2 : \
ff_hscale8to19_ ## filtersize ## _ ## opt1; \
} else if (c->srcBpc == 9) { \
- hscalefn = c->dstBpc <= 10 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \
+ hscalefn = c->dstBpc <= 14 ? ff_hscale9to15_ ## filtersize ## _ ## opt2 : \
ff_hscale9to19_ ## filtersize ## _ ## opt1; \
} else if (c->srcBpc == 10) { \
- hscalefn = c->dstBpc <= 10 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \
+ hscalefn = c->dstBpc <= 14 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \
ff_hscale10to19_ ## filtersize ## _ ## opt1; \
- } else /* c->srcBpc == 16 */ { \
- hscalefn = c->dstBpc <= 10 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \
+ } else if (c->srcBpc == 12) { \
+ hscalefn = c->dstBpc <= 14 ? ff_hscale12to15_ ## filtersize ## _ ## opt2 : \
+ ff_hscale12to19_ ## filtersize ## _ ## opt1; \
+ } else if (c->srcBpc == 14 || ((c->srcFormat==PIX_FMT_PAL8||isAnyRGB(c->srcFormat)) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)) { \
+ hscalefn = c->dstBpc <= 14 ? ff_hscale14to15_ ## filtersize ## _ ## opt2 : \
+ ff_hscale14to19_ ## filtersize ## _ ## opt1; \
+ } else { /* c->srcBpc == 16 */ \
+ av_assert0(c->srcBpc == 16);\
+ hscalefn = c->dstBpc <= 14 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \
ff_hscale16to19_ ## filtersize ## _ ## opt1; \
} \
} while (0)
@@ -341,14 +419,15 @@ switch(c->dstBpc){ \
case 16: do_16_case; break; \
case 10: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_10_ ## opt; break; \
case 9: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2planeX_9_ ## opt; break; \
- default: if (condition_8bit) vscalefn = ff_yuv2planeX_8_ ## opt; break; \
+ default: if (condition_8bit) /*vscalefn = ff_yuv2planeX_8_ ## opt;*/ break; \
}
#define ASSIGN_VSCALE_FUNC(vscalefn, opt1, opt2, opt2chk) \
switch(c->dstBpc){ \
case 16: if (!isBE(c->dstFormat)) vscalefn = ff_yuv2plane1_16_ ## opt1; break; \
case 10: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_10_ ## opt2; break; \
case 9: if (!isBE(c->dstFormat) && opt2chk) vscalefn = ff_yuv2plane1_9_ ## opt2; break; \
- default: vscalefn = ff_yuv2plane1_8_ ## opt1; break; \
+ case 8: vscalefn = ff_yuv2plane1_8_ ## opt1; break; \
+ default: av_assert0(c->dstBpc>8); \
}
#define case_rgb(x, X, opt) \
case PIX_FMT_ ## X: \
@@ -462,7 +541,7 @@ switch(c->dstBpc){ \
c->yuv2plane1 = ff_yuv2plane1_16_sse4;
}
- if (cpu_flags & AV_CPU_FLAG_AVX) {
+ if (HAVE_AVX && cpu_flags & AV_CPU_FLAG_AVX) {
ASSIGN_VSCALEX_FUNC(c->yuv2planeX, avx, ,
HAVE_ALIGNED_STACK || ARCH_X86_64);
ASSIGN_VSCALE_FUNC(c->yuv2plane1, avx, avx, 1);
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index e9816cf0a6..370a0ebe1b 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -1,25 +1,26 @@
/*
- * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (C) 2001-2011 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#undef REAL_MOVNTQ
#undef MOVNTQ
+#undef MOVNTQ2
#undef PREFETCH
#if COMPILE_TEMPLATE_MMXEXT
@@ -30,11 +31,84 @@
#if COMPILE_TEMPLATE_MMXEXT
#define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
+#define MOVNTQ2 "movntq "
#else
#define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
+#define MOVNTQ2 "movq "
#endif
#define MOVNTQ(a,b) REAL_MOVNTQ(a,b)
+#if !COMPILE_TEMPLATE_MMXEXT
+static av_always_inline void
+dither_8to16(const uint8_t *srcDither, int rot)
+{
+ if (rot) {
+ __asm__ volatile("pxor %%mm0, %%mm0\n\t"
+ "movq (%0), %%mm3\n\t"
+ "movq %%mm3, %%mm4\n\t"
+ "psrlq $24, %%mm3\n\t"
+ "psllq $40, %%mm4\n\t"
+ "por %%mm4, %%mm3\n\t"
+ "movq %%mm3, %%mm4\n\t"
+ "punpcklbw %%mm0, %%mm3\n\t"
+ "punpckhbw %%mm0, %%mm4\n\t"
+ :: "r"(srcDither)
+ );
+ } else {
+ __asm__ volatile("pxor %%mm0, %%mm0\n\t"
+ "movq (%0), %%mm3\n\t"
+ "movq %%mm3, %%mm4\n\t"
+ "punpcklbw %%mm0, %%mm3\n\t"
+ "punpckhbw %%mm0, %%mm4\n\t"
+ :: "r"(srcDither)
+ );
+ }
+}
+#endif
+
+static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize,
+ const int16_t **src, uint8_t *dest, int dstW,
+ const uint8_t *dither, int offset)
+{
+ dither_8to16(dither, offset);
+ __asm__ volatile(\
+ "psraw $4, %%mm3\n\t"
+ "psraw $4, %%mm4\n\t"
+ "movq %%mm3, %%mm6\n\t"
+ "movq %%mm4, %%mm7\n\t"
+ "movl %3, %%ecx\n\t"
+ "mov %0, %%"REG_d" \n\t"\
+ "mov (%%"REG_d"), %%"REG_S" \n\t"\
+ ".p2align 4 \n\t" /* FIXME Unroll? */\
+ "1: \n\t"\
+ "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
+ "movq (%%"REG_S", %%"REG_c", 2), %%mm2 \n\t" /* srcData */\
+ "movq 8(%%"REG_S", %%"REG_c", 2), %%mm5 \n\t" /* srcData */\
+ "add $16, %%"REG_d" \n\t"\
+ "mov (%%"REG_d"), %%"REG_S" \n\t"\
+ "test %%"REG_S", %%"REG_S" \n\t"\
+ "pmulhw %%mm0, %%mm2 \n\t"\
+ "pmulhw %%mm0, %%mm5 \n\t"\
+ "paddw %%mm2, %%mm3 \n\t"\
+ "paddw %%mm5, %%mm4 \n\t"\
+ " jnz 1b \n\t"\
+ "psraw $3, %%mm3 \n\t"\
+ "psraw $3, %%mm4 \n\t"\
+ "packuswb %%mm4, %%mm3 \n\t"
+ MOVNTQ2 " %%mm3, (%1, %%"REG_c")\n\t"
+ "add $8, %%"REG_c" \n\t"\
+ "cmp %2, %%"REG_c" \n\t"\
+ "movq %%mm6, %%mm3\n\t"
+ "movq %%mm7, %%mm4\n\t"
+ "mov %0, %%"REG_d" \n\t"\
+ "mov (%%"REG_d"), %%"REG_S" \n\t"\
+ "jb 1b \n\t"\
+ :: "g" (filter),
+ "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset)
+ : "%"REG_d, "%"REG_S, "%"REG_c
+ );
+}
+
#define YSCALEYUV2PACKEDX_UV \
__asm__ volatile(\
"xor %%"REG_a", %%"REG_a" \n\t"\
@@ -260,7 +334,7 @@ static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off_byte;
+ x86_reg uv_off = c->uv_offx2;
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
YSCALEYUV2PACKEDX_ACCURATE
@@ -293,7 +367,7 @@ static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off_byte;
+ x86_reg uv_off = c->uv_offx2;
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
YSCALEYUV2PACKEDX
@@ -350,7 +424,7 @@ static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off_byte;
+ x86_reg uv_off = c->uv_offx2;
YSCALEYUV2PACKEDX_ACCURATE
YSCALEYUV2RGBX
@@ -374,7 +448,7 @@ static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off_byte;
+ x86_reg uv_off = c->uv_offx2;
YSCALEYUV2PACKEDX
YSCALEYUV2RGBX
@@ -427,7 +501,7 @@ static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off_byte;
+ x86_reg uv_off = c->uv_offx2;
YSCALEYUV2PACKEDX_ACCURATE
YSCALEYUV2RGBX
@@ -451,7 +525,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off_byte;
+ x86_reg uv_off = c->uv_offx2;
YSCALEYUV2PACKEDX
YSCALEYUV2RGBX
@@ -584,7 +658,7 @@ static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off_byte;
+ x86_reg uv_off = c->uv_offx2;
YSCALEYUV2PACKEDX_ACCURATE
YSCALEYUV2RGBX
@@ -608,7 +682,7 @@ static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off_byte;
+ x86_reg uv_off = c->uv_offx2;
YSCALEYUV2PACKEDX
YSCALEYUV2RGBX
@@ -649,7 +723,7 @@ static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off_byte;
+ x86_reg uv_off = c->uv_offx2;
YSCALEYUV2PACKEDX_ACCURATE
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
@@ -670,7 +744,7 @@ static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_off_byte;
+ x86_reg uv_off = c->uv_offx2;
YSCALEYUV2PACKEDX
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
@@ -786,8 +860,8 @@ static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2],
: "%r8"
);
#else
- *(const uint16_t **)(&c->u_temp)=abuf0;
- *(const uint16_t **)(&c->v_temp)=abuf1;
+ c->u_temp=(intptr_t)abuf0;
+ c->v_temp=(intptr_t)abuf1;
__asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t"
@@ -1559,9 +1633,9 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
{
enum PixelFormat dstFormat = c->dstFormat;
- if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) &&
- dstFormat != PIX_FMT_NV12 && dstFormat != PIX_FMT_NV21) {
- if (!(c->flags & SWS_BITEXACT)) {
+ c->use_mmx_vfilter= 0;
+ if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) && dstFormat != PIX_FMT_NV12
+ && dstFormat != PIX_FMT_NV21 && !(c->flags & SWS_BITEXACT)) {
if (c->flags & SWS_ACCURATE_RND) {
if (!(c->flags & SWS_FULL_CHR_H_INT)) {
switch (c->dstFormat) {
@@ -1574,6 +1648,8 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
}
}
} else {
+ c->use_mmx_vfilter= 1;
+ c->yuv2planeX = RENAME(yuv2yuvX );
if (!(c->flags & SWS_FULL_CHR_H_INT)) {
switch (c->dstFormat) {
case PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X); break;
@@ -1585,7 +1661,6 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
}
}
}
- }
if (!(c->flags & SWS_FULL_CHR_H_INT)) {
switch (c->dstFormat) {
case PIX_FMT_RGB32:
@@ -1614,7 +1689,7 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
}
}
- if (c->srcBpc == 8 && c->dstBpc <= 10) {
+ if (c->srcBpc == 8 && c->dstBpc <= 14) {
// Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one).
#if COMPILE_TEMPLATE_MMXEXT
if (c->flags & SWS_FAST_BILINEAR && c->canMMX2BeUsed)
diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c
index 93755493ab..9445d08e84 100644
--- a/libswscale/x86/yuv2rgb.c
+++ b/libswscale/x86/yuv2rgb.c
@@ -7,20 +7,20 @@
* 1,4,8bpp support and context / deglobalize stuff
* by Michael Niedermayer (michaelni@gmx.at)
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -74,10 +74,6 @@ av_cold SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c)
#if HAVE_INLINE_ASM
int cpu_flags = av_get_cpu_flags();
- if (c->srcFormat != PIX_FMT_YUV420P &&
- c->srcFormat != PIX_FMT_YUVA420P)
- return NULL;
-
#if HAVE_MMXEXT
if (cpu_flags & AV_CPU_FLAG_MMXEXT) {
switch (c->dstFormat) {
diff --git a/libswscale/x86/yuv2rgb_template.c b/libswscale/x86/yuv2rgb_template.c
index a71fd13862..79e48fd7f0 100644
--- a/libswscale/x86/yuv2rgb_template.c
+++ b/libswscale/x86/yuv2rgb_template.c
@@ -4,20 +4,20 @@
* Copyright (C) 2001-2007 Michael Niedermayer
* (c) 2010 Konstantin Shishkov
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -43,17 +43,14 @@
if (h_size * depth > FFABS(dstStride[0])) \
h_size -= 8; \
\
- if (c->srcFormat == PIX_FMT_YUV422P) { \
- srcStride[1] *= 2; \
- srcStride[2] *= 2; \
- } \
+ vshift = c->srcFormat != PIX_FMT_YUV422P; \
\
__asm__ volatile ("pxor %mm4, %mm4\n\t"); \
for (y = 0; y < srcSliceH; y++) { \
uint8_t *image = dst[0] + (y + srcSliceY) * dstStride[0]; \
const uint8_t *py = src[0] + y * srcStride[0]; \
- const uint8_t *pu = src[1] + (y >> 1) * srcStride[1]; \
- const uint8_t *pv = src[2] + (y >> 1) * srcStride[2]; \
+ const uint8_t *pu = src[1] + (y >> vshift) * srcStride[1]; \
+ const uint8_t *pv = src[2] + (y >> vshift) * srcStride[2]; \
x86_reg index = -h_size / 2; \
#define YUV2RGB_INITIAL_LOAD \
@@ -141,6 +138,7 @@
: "+r" (index), "+r" (image) \
: "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \
"r" (py - 2*index) \
+ : "memory" \
); \
} \
@@ -148,6 +146,7 @@
: "+r" (index), "+r" (image) \
: "r" (pu - index), "r" (pv - index), "r"(&c->redDither), \
"r" (py - 2*index), "r" (pa - 2*index) \
+ : "memory" \
); \
} \
@@ -188,7 +187,7 @@ static inline int RENAME(yuv420_rgb15)(SwsContext *c, const uint8_t *src[],
int srcSliceY, int srcSliceH,
uint8_t *dst[], int dstStride[])
{
- int y, h_size;
+ int y, h_size, vshift;
YUV2RGB_LOOP(2)
@@ -216,7 +215,7 @@ static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[],
int srcSliceY, int srcSliceH,
uint8_t *dst[], int dstStride[])
{
- int y, h_size;
+ int y, h_size, vshift;
YUV2RGB_LOOP(2)
@@ -306,7 +305,7 @@ static inline int RENAME(yuv420_rgb24)(SwsContext *c, const uint8_t *src[],
int srcSliceY, int srcSliceH,
uint8_t *dst[], int dstStride[])
{
- int y, h_size;
+ int y, h_size, vshift;
YUV2RGB_LOOP(3)
@@ -324,7 +323,7 @@ static inline int RENAME(yuv420_bgr24)(SwsContext *c, const uint8_t *src[],
int srcSliceY, int srcSliceH,
uint8_t *dst[], int dstStride[])
{
- int y, h_size;
+ int y, h_size, vshift;
YUV2RGB_LOOP(3)
@@ -368,7 +367,7 @@ static inline int RENAME(yuv420_rgb32)(SwsContext *c, const uint8_t *src[],
int srcSliceY, int srcSliceH,
uint8_t *dst[], int dstStride[])
{
- int y, h_size;
+ int y, h_size, vshift;
YUV2RGB_LOOP(4)
@@ -389,7 +388,7 @@ static inline int RENAME(yuva420_rgb32)(SwsContext *c, const uint8_t *src[],
int srcSliceY, int srcSliceH,
uint8_t *dst[], int dstStride[])
{
- int y, h_size;
+ int y, h_size, vshift;
YUV2RGB_LOOP(4)
@@ -411,7 +410,7 @@ static inline int RENAME(yuv420_bgr32)(SwsContext *c, const uint8_t *src[],
int srcSliceY, int srcSliceH,
uint8_t *dst[], int dstStride[])
{
- int y, h_size;
+ int y, h_size, vshift;
YUV2RGB_LOOP(4)
@@ -432,7 +431,7 @@ static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t *src[],
int srcSliceY, int srcSliceH,
uint8_t *dst[], int dstStride[])
{
- int y, h_size;
+ int y, h_size, vshift;
YUV2RGB_LOOP(4)
diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c
index 1c44a2f544..67cf19ec03 100644
--- a/libswscale/yuv2rgb.c
+++ b/libswscale/yuv2rgb.c
@@ -6,27 +6,26 @@
* 1,4,8bpp support and context / deglobalize stuff
* by Michael Niedermayer (michaelni@gmx.at)
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdio.h>
#include <stdlib.h>
#include <inttypes.h>
-#include <assert.h>
#include "libavutil/cpu.h"
#include "libavutil/bswap.h"
@@ -34,7 +33,10 @@
#include "rgb2rgb.h"
#include "swscale.h"
#include "swscale_internal.h"
+#include "libavutil/pixdesc.h"
+extern const uint8_t dither_2x2_4[2][8];
+extern const uint8_t dither_2x2_8[2][8];
extern const uint8_t dither_4x4_16[4][8];
extern const uint8_t dither_8x8_32[8][8];
extern const uint8_t dither_8x8_73[8][8];
@@ -61,9 +63,9 @@ const int *sws_getCoefficients(int colorspace)
#define LOADCHROMA(i) \
U = pu[i]; \
V = pv[i]; \
- r = (void *)c->table_rV[V]; \
- g = (void *)(c->table_gU[U] + c->table_gV[V]); \
- b = (void *)c->table_bU[U];
+ r = (void *)c->table_rV[V+YUVRGB_TABLE_HEADROOM]; \
+ g = (void *)(c->table_gU[U+YUVRGB_TABLE_HEADROOM] + c->table_gV[V+YUVRGB_TABLE_HEADROOM]); \
+ b = (void *)c->table_bU[U+YUVRGB_TABLE_HEADROOM];
#define PUTRGB(dst, src, i) \
Y = src[2 * i]; \
@@ -355,24 +357,65 @@ ENDYUV2RGBLINE(24)
PUTBGR24(dst_1, py_1, 1);
ENDYUV2RGBFUNC()
-// This is exactly the same code as yuv2rgb_c_32 except for the types of
-// r, g, b, dst_1, dst_2
-YUV2RGBFUNC(yuv2rgb_c_16, uint16_t, 0)
+YUV2RGBFUNC(yuv2rgb_c_16_ordered_dither, uint16_t, 0)
+ const uint8_t *d16 = dither_2x2_8[y & 1];
+ const uint8_t *e16 = dither_2x2_4[y & 1];
+ const uint8_t *f16 = dither_2x2_8[(y & 1)^1];
+
+#define PUTRGB16(dst, src, i, o) \
+ Y = src[2 * i]; \
+ dst[2 * i] = r[Y + d16[0 + o]] + \
+ g[Y + e16[0 + o]] + \
+ b[Y + f16[0 + o]]; \
+ Y = src[2 * i + 1]; \
+ dst[2 * i + 1] = r[Y + d16[1 + o]] + \
+ g[Y + e16[1 + o]] + \
+ b[Y + f16[1 + o]];
LOADCHROMA(0);
- PUTRGB(dst_1, py_1, 0);
- PUTRGB(dst_2, py_2, 0);
+ PUTRGB16(dst_1, py_1, 0, 0);
+ PUTRGB16(dst_2, py_2, 0, 0 + 8);
LOADCHROMA(1);
- PUTRGB(dst_2, py_2, 1);
- PUTRGB(dst_1, py_1, 1);
+ PUTRGB16(dst_2, py_2, 1, 2 + 8);
+ PUTRGB16(dst_1, py_1, 1, 2);
LOADCHROMA(2);
- PUTRGB(dst_1, py_1, 2);
- PUTRGB(dst_2, py_2, 2);
+ PUTRGB16(dst_1, py_1, 2, 4);
+ PUTRGB16(dst_2, py_2, 2, 4 + 8);
LOADCHROMA(3);
- PUTRGB(dst_2, py_2, 3);
- PUTRGB(dst_1, py_1, 3);
+ PUTRGB16(dst_2, py_2, 3, 6 + 8);
+ PUTRGB16(dst_1, py_1, 3, 6);
+CLOSEYUV2RGBFUNC(8)
+
+YUV2RGBFUNC(yuv2rgb_c_15_ordered_dither, uint16_t, 0)
+ const uint8_t *d16 = dither_2x2_8[y & 1];
+ const uint8_t *e16 = dither_2x2_8[(y & 1)^1];
+
+#define PUTRGB15(dst, src, i, o) \
+ Y = src[2 * i]; \
+ dst[2 * i] = r[Y + d16[0 + o]] + \
+ g[Y + d16[1 + o]] + \
+ b[Y + e16[0 + o]]; \
+ Y = src[2 * i + 1]; \
+ dst[2 * i + 1] = r[Y + d16[1 + o]] + \
+ g[Y + d16[0 + o]] + \
+ b[Y + e16[1 + o]];
+ LOADCHROMA(0);
+ PUTRGB15(dst_1, py_1, 0, 0);
+ PUTRGB15(dst_2, py_2, 0, 0 + 8);
+
+ LOADCHROMA(1);
+ PUTRGB15(dst_2, py_2, 1, 2 + 8);
+ PUTRGB15(dst_1, py_1, 1, 2);
+
+ LOADCHROMA(2);
+ PUTRGB15(dst_1, py_1, 2, 4);
+ PUTRGB15(dst_2, py_2, 2, 4 + 8);
+
+ LOADCHROMA(3);
+ PUTRGB15(dst_2, py_2, 3, 6 + 8);
+ PUTRGB15(dst_1, py_1, 3, 6);
CLOSEYUV2RGBFUNC(8)
// r, g, b, dst_1, dst_2
@@ -505,7 +548,7 @@ CLOSEYUV2RGBFUNC(8)
YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t, 0)
const uint8_t *d128 = dither_8x8_220[y & 7];
char out_1 = 0, out_2 = 0;
- g = c->table_gU[128] + c->table_gV[128];
+ g = c->table_gU[128 + YUVRGB_TABLE_HEADROOM] + c->table_gV[128 + YUVRGB_TABLE_HEADROOM];
#define PUTRGB1(out, src, i, o) \
Y = src[2 * i]; \
@@ -547,7 +590,7 @@ SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
av_log(c, AV_LOG_WARNING,
"No accelerated colorspace conversion found from %s to %s.\n",
- sws_format_name(c->srcFormat), sws_format_name(c->dstFormat));
+ av_get_pix_fmt_name(c->srcFormat), av_get_pix_fmt_name(c->dstFormat));
switch (c->dstFormat) {
case PIX_FMT_BGR48BE:
@@ -558,23 +601,21 @@ SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
return yuv2rgb_c_48;
case PIX_FMT_ARGB:
case PIX_FMT_ABGR:
- if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P)
+ if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat))
return yuva2argb_c;
case PIX_FMT_RGBA:
case PIX_FMT_BGRA:
- if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P)
- return yuva2rgba_c;
- else
- return yuv2rgb_c_32;
+ return (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat)) ? yuva2rgba_c : yuv2rgb_c_32;
case PIX_FMT_RGB24:
return yuv2rgb_c_24_rgb;
case PIX_FMT_BGR24:
return yuv2rgb_c_24_bgr;
case PIX_FMT_RGB565:
case PIX_FMT_BGR565:
+ return yuv2rgb_c_16_ordered_dither;
case PIX_FMT_RGB555:
case PIX_FMT_BGR555:
- return yuv2rgb_c_16;
+ return yuv2rgb_c_15_ordered_dither;
case PIX_FMT_RGB444:
case PIX_FMT_BGR444:
return yuv2rgb_c_12_ordered_dither;
@@ -589,36 +630,32 @@ SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
return yuv2rgb_c_4b_ordered_dither;
case PIX_FMT_MONOBLACK:
return yuv2rgb_c_1_ordered_dither;
- default:
- assert(0);
}
return NULL;
}
-static void fill_table(uint8_t *table[256], const int elemsize,
+static void fill_table(uint8_t* table[256 + 2*YUVRGB_TABLE_HEADROOM], const int elemsize,
const int inc, void *y_tab)
{
int i;
- int64_t cb = 0;
uint8_t *y_table = y_tab;
y_table -= elemsize * (inc >> 9);
- for (i = 0; i < 256; i++) {
+ for (i = 0; i < 256 + 2*YUVRGB_TABLE_HEADROOM; i++) {
+ int64_t cb = av_clip(i-YUVRGB_TABLE_HEADROOM, 0, 255)*inc;
table[i] = y_table + elemsize * (cb >> 16);
- cb += inc;
}
}
-static void fill_gv_table(int table[256], const int elemsize, const int inc)
+static void fill_gv_table(int table[256 + 2*YUVRGB_TABLE_HEADROOM], const int elemsize, const int inc)
{
int i;
- int64_t cb = 0;
int off = -(inc >> 9);
- for (i = 0; i < 256; i++) {
+ for (i = 0; i < 256 + 2*YUVRGB_TABLE_HEADROOM; i++) {
+ int64_t cb = av_clip(i-YUVRGB_TABLE_HEADROOM, 0, 255)*inc;
table[i] = elemsize * (off + (cb >> 16));
- cb += inc;
}
}
@@ -661,7 +698,7 @@ av_cold int ff_yuv2rgb_c_init_tables(SwsContext *c, const int inv_table[4],
uint8_t *y_table;
uint16_t *y_table16;
uint32_t *y_table32;
- int i, base, rbase, gbase, bbase, abase, needAlpha;
+ int i, base, rbase, gbase, bbase, av_uninit(abase), needAlpha;
const int yoffs = fullRange ? 384 : 326;
int64_t crv = inv_table[0];