diff options
author | Diego Biurrun <diego@biurrun.de> | 2014-05-30 03:44:12 -0700 |
---|---|---|
committer | Diego Biurrun <diego@biurrun.de> | 2014-06-02 08:41:47 -0700 |
commit | 880e2aa23645ed9871c66ee1cbd00f93c72d2d73 (patch) | |
tree | b1ef322579f20b7e7b43473989486c3dd709539b /libswscale | |
parent | b88cc5cca111132b42c2ee99662bfefe7652e3da (diff) | |
download | ffmpeg-880e2aa23645ed9871c66ee1cbd00f93c72d2d73.tar.gz |
Remove all Blackfin architecture optimizations
Blackfin is a painful platform to work with, no test machines are available
and the range of multimedia applications is dubious. Thus it only represents
a maintenance burden.
Diffstat (limited to 'libswscale')
-rw-r--r-- | libswscale/bfin/Makefile | 3 | ||||
-rw-r--r-- | libswscale/bfin/internal_bfin.S | 599 | ||||
-rw-r--r-- | libswscale/bfin/swscale_bfin.c | 84 | ||||
-rw-r--r-- | libswscale/bfin/yuv2rgb_bfin.c | 197 | ||||
-rw-r--r-- | libswscale/swscale.h | 2 | ||||
-rw-r--r-- | libswscale/swscale_internal.h | 16 | ||||
-rw-r--r-- | libswscale/swscale_unscaled.c | 2 | ||||
-rw-r--r-- | libswscale/version.h | 3 | ||||
-rw-r--r-- | libswscale/yuv2rgb.c | 2 |
9 files changed, 5 insertions, 903 deletions
diff --git a/libswscale/bfin/Makefile b/libswscale/bfin/Makefile deleted file mode 100644 index 5f34550427..0000000000 --- a/libswscale/bfin/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -OBJS += bfin/internal_bfin.o \ - bfin/swscale_bfin.o \ - bfin/yuv2rgb_bfin.o \ diff --git a/libswscale/bfin/internal_bfin.S b/libswscale/bfin/internal_bfin.S deleted file mode 100644 index dca8448a3f..0000000000 --- a/libswscale/bfin/internal_bfin.S +++ /dev/null @@ -1,599 +0,0 @@ -/* - * Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com> - * April 20, 2007 - * - * Blackfin video color space converter operations - * convert I420 YV12 to RGB in various formats - * - * This file is part of Libav. - * - * Libav is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * Libav is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - - -/* -YUV420 to RGB565 conversion. This routine takes a YUV 420 planar macroblock -and converts it to RGB565. R:5 bits, G:6 bits, B:5 bits.. packed into shorts. - - -The following calculation is used for the conversion: - - r = clipz((y - oy) * cy + crv * (v - 128)) - g = clipz((y - oy) * cy + cgv * (v - 128) + cgu * (u - 128)) - b = clipz((y - oy) * cy + cbu * (u - 128)) - -y, u, v are prescaled by a factor of 4 i.e. left-shifted to gain precision. - - -New factorization to eliminate the truncation error which was -occurring due to the byteop3p. - - -1) Use the bytop16m to subtract quad bytes we use this in U8 this - then so the offsets need to be renormalized to 8bits. - -2) Scale operands up by a factor of 4 not 8 because Blackfin - multiplies include a shift. - -3) Compute into the accumulators cy * yx0, cy * yx1. - -4) Compute each of the linear equations: - r = clipz((y - oy) * cy + crv * (v - 128)) - - g = clipz((y - oy) * cy + cgv * (v - 128) + cgu * (u - 128)) - - b = clipz((y - oy) * cy + cbu * (u - 128)) - - Reuse of the accumulators requires that we actually multiply - twice once with addition and the second time with a subtraction. - - Because of this we need to compute the equations in the order R B - then G saving the writes for B in the case of 24/32 bit color - formats. - - API: yuv2rgb_kind (uint8_t *Y, uint8_t *U, uint8_t *V, int *out, - int dW, uint32_t *coeffs); - - A B - --- --- - i2 = cb i3 = cr - i1 = coeff i0 = y - -Where coeffs have the following layout in memory. - -uint32_t oy, oc, zero, cy, crv, rmask, cbu, bmask, cgu, cgv; - -coeffs is a pointer to oy. - -The {rgb} masks are only utilized by the 565 packing algorithm. Note the data -replication is used to simplify the internal algorithms for the dual Mac -architecture of BlackFin. - -All routines are exported with _ff_bfin_ as a symbol prefix. - -Rough performance gain compared against -O3: - -2779809/1484290 187.28% - -which translates to ~33c/pel to ~57c/pel for the reference vs 17.5 -c/pel for the optimized implementations. Not sure why there is such a -huge variation on the reference codes on Blackfin I guess it must have -to do with the memory system. -*/ - -#include "libavutil/bfin/asm.h" - -#define MEM mL1 - - -.text - -#define COEFF_LEN 11*4 -#define COEFF_REL_CY_OFF 4*4 - -#define ARG_OUT 20 -#define ARG_W 24 -#define ARG_COEFF 28 - -DEFUN(yuv2rgb565_line,MEM, - (uint8_t *Y, uint8_t *U, uint8_t *V, int *out, int dW, uint32_t *coeffs)): - link 0; - [--sp] = (r7:4); - p1 = [fp+ARG_OUT]; - r3 = [fp+ARG_W]; - - i0 = r0; - i2 = r1; - i3 = r2; - - r0 = [fp+ARG_COEFF]; - i1 = r0; - b1 = i1; - l1 = COEFF_LEN; - m0 = COEFF_REL_CY_OFF; - p0 = r3; - - r0 = [i0++]; // 2Y - r1.l = w[i2++]; // 2u - r1.h = w[i3++]; // 2v - p0 = p0>>2; - - lsetup (.L0565, .L1565) lc0 = p0; - - /* - uint32_t oy,oc,zero,cy,crv,rmask,cbu,bmask,cgu,cgv - r0 -- used to load 4ys - r1 -- used to load 2us,2vs - r4 -- y3,y2 - r5 -- y1,y0 - r6 -- u1,u0 - r7 -- v1,v0 - */ - r2=[i1++]; // oy -.L0565: - /* - rrrrrrrr gggggggg bbbbbbbb - 5432109876543210 - bbbbb >>3 - gggggggg <<3 - rrrrrrrr <<8 - rrrrrggggggbbbbb - */ - (r4,r5) = byteop16m (r1:0, r3:2) || r3=[i1++]; // oc - (r7,r6) = byteop16m (r1:0, r3:2) (r); - r5 = r5 << 2 (v); // y1,y0 - r4 = r4 << 2 (v); // y3,y2 - r6 = r6 << 2 (v) || r0=[i1++]; // u1,u0, r0=zero - r7 = r7 << 2 (v) || r1=[i1++]; // v1,v0 r1=cy - /* Y' = y*cy */ - a1 = r1.h*r5.h, a0 = r1.l*r5.l || r1=[i1++]; // crv - - /* R = Y+ crv*(Cr-128) */ - r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l); - a1 -= r1.h*r7.l, a0 -= r1.l*r7.l || r5=[i1++]; // rmask - r2 = byteop3p(r3:2, r1:0)(LO) || r1=[i1++]; // cbu - r2 = r2 >> 3 (v); - r3 = r2 & r5; - - /* B = Y+ cbu*(Cb-128) */ - r2.h = (a1 += r1.h*r6.l), r2.l = (a0 += r1.l*r6.l); - a1 -= r1.h*r6.l, a0 -= r1.l*r6.l || r5=[i1++]; // bmask - r2 = byteop3p(r3:2, r1:0)(LO) || r1=[i1++]; // cgu - r2 = r2 << 8 (v); - r2 = r2 & r5; - r3 = r3 | r2; - - /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */ - a1 += r1.h*r6.l, a0 += r1.l*r6.l || r1=[i1++]; // cgv - r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l); - r2 = byteop3p(r3:2, r1:0)(LO) || r5=[i1++m0]; // gmask - r2 = r2 << 3 (v); - r2 = r2 & r5; - r3 = r3 | r2; - [p1++]=r3 || r1=[i1++]; // cy - - /* Y' = y*cy */ - - a1 = r1.h*r4.h, a0 = r1.l*r4.l || r1=[i1++]; // crv - - /* R = Y+ crv*(Cr-128) */ - r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h); - a1 -= r1.h*r7.h, a0 -= r1.l*r7.h || r5=[i1++]; // rmask - r2 = byteop3p(r3:2, r1:0)(LO) || r1=[i1++]; // cbu - r2 = r2 >> 3 (v); - r3 = r2 & r5; - - /* B = Y+ cbu*(Cb-128) */ - r2.h = (a1 += r1.h*r6.h), r2.l = (a0 += r1.l*r6.h); - a1 -= r1.h*r6.h, a0 -= r1.l*r6.h || r5=[i1++]; // bmask - r2 = byteop3p(r3:2, r1:0)(LO) || r1=[i1++]; // cgu - r2 = r2 << 8 (v); - r2 = r2 & r5; - r3 = r3 | r2; - - /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */ - a1 += r1.h*r6.h, a0 += r1.l*r6.h || r1=[i1++]; // cgv - r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h) || r5=[i1++]; // gmask - r2 = byteop3p(r3:2, r1:0)(LO) || r0 = [i0++]; // 2Y - r2 = r2 << 3 (v) || r1.l = w[i2++]; // 2u - r2 = r2 & r5; - r3 = r3 | r2; - [p1++]=r3 || r1.h = w[i3++]; // 2v -.L1565: r2=[i1++]; // oy - - l1 = 0; - - (r7:4) = [sp++]; - unlink; - rts; -DEFUN_END(yuv2rgb565_line) - -DEFUN(yuv2rgb555_line,MEM, - (uint8_t *Y, uint8_t *U, uint8_t *V, int *out, int dW, uint32_t *coeffs)): - link 0; - [--sp] = (r7:4); - p1 = [fp+ARG_OUT]; - r3 = [fp+ARG_W]; - - i0 = r0; - i2 = r1; - i3 = r2; - - r0 = [fp+ARG_COEFF]; - i1 = r0; - b1 = i1; - l1 = COEFF_LEN; - m0 = COEFF_REL_CY_OFF; - p0 = r3; - - r0 = [i0++]; // 2Y - r1.l = w[i2++]; // 2u - r1.h = w[i3++]; // 2v - p0 = p0>>2; - - lsetup (.L0555, .L1555) lc0 = p0; - - /* - uint32_t oy,oc,zero,cy,crv,rmask,cbu,bmask,cgu,cgv - r0 -- used to load 4ys - r1 -- used to load 2us,2vs - r4 -- y3,y2 - r5 -- y1,y0 - r6 -- u1,u0 - r7 -- v1,v0 - */ - r2=[i1++]; // oy -.L0555: - /* - rrrrrrrr gggggggg bbbbbbbb - 5432109876543210 - bbbbb >>3 - gggggggg <<2 - rrrrrrrr <<7 - xrrrrrgggggbbbbb - */ - - (r4,r5) = byteop16m (r1:0, r3:2) || r3=[i1++]; // oc - (r7,r6) = byteop16m (r1:0, r3:2) (r); - r5 = r5 << 2 (v); // y1,y0 - r4 = r4 << 2 (v); // y3,y2 - r6 = r6 << 2 (v) || r0=[i1++]; // u1,u0, r0=zero - r7 = r7 << 2 (v) || r1=[i1++]; // v1,v0 r1=cy - /* Y' = y*cy */ - a1 = r1.h*r5.h, a0 = r1.l*r5.l || r1=[i1++]; // crv - - /* R = Y+ crv*(Cr-128) */ - r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l); - a1 -= r1.h*r7.l, a0 -= r1.l*r7.l || r5=[i1++]; // rmask - r2 = byteop3p(r3:2, r1:0)(LO) || r1=[i1++]; // cbu - r2 = r2 >> 3 (v); - r3 = r2 & r5; - - /* B = Y+ cbu*(Cb-128) */ - r2.h = (a1 += r1.h*r6.l), r2.l = (a0 += r1.l*r6.l); - a1 -= r1.h*r6.l, a0 -= r1.l*r6.l || r5=[i1++]; // bmask - r2 = byteop3p(r3:2, r1:0)(LO) || r1=[i1++]; // cgu - r2 = r2 << 7 (v); - r2 = r2 & r5; - r3 = r3 | r2; - - /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */ - a1 += r1.h*r6.l, a0 += r1.l*r6.l || r1=[i1++]; // cgv - r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l); - r2 = byteop3p(r3:2, r1:0)(LO) || r5=[i1++m0]; // gmask - r2 = r2 << 2 (v); - r2 = r2 & r5; - r3 = r3 | r2; - [p1++]=r3 || r1=[i1++]; // cy - - /* Y' = y*cy */ - - a1 = r1.h*r4.h, a0 = r1.l*r4.l || r1=[i1++]; // crv - - /* R = Y+ crv*(Cr-128) */ - r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h); - a1 -= r1.h*r7.h, a0 -= r1.l*r7.h || r5=[i1++]; // rmask - r2 = byteop3p(r3:2, r1:0)(LO) || r1=[i1++]; // cbu - r2 = r2 >> 3 (v); - r3 = r2 & r5; - - /* B = Y+ cbu*(Cb-128) */ - r2.h = (a1 += r1.h*r6.h), r2.l = (a0 += r1.l*r6.h); - a1 -= r1.h*r6.h, a0 -= r1.l*r6.h || r5=[i1++]; // bmask - r2 = byteop3p(r3:2, r1:0)(LO) || r1=[i1++]; // cgu - r2 = r2 << 7 (v); - r2 = r2 & r5; - r3 = r3 | r2; - - /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */ - a1 += r1.h*r6.h, a0 += r1.l*r6.h || r1=[i1++]; // cgv - r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h) || r5=[i1++]; // gmask - r2 = byteop3p(r3:2, r1:0)(LO) || r0=[i0++]; // 4Y - r2 = r2 << 2 (v) || r1.l=w[i2++]; // 2u - r2 = r2 & r5; - r3 = r3 | r2; - [p1++]=r3 || r1.h=w[i3++]; // 2v - -.L1555: r2=[i1++]; // oy - - l1 = 0; - - (r7:4) = [sp++]; - unlink; - rts; -DEFUN_END(yuv2rgb555_line) - -DEFUN(yuv2rgb24_line,MEM, - (uint8_t *Y, uint8_t *U, uint8_t *V, int *out, int dW, uint32_t *coeffs)): - link 0; - [--sp] = (r7:4); - p1 = [fp+ARG_OUT]; - r3 = [fp+ARG_W]; - p2 = p1; - p2 += 3; - - i0 = r0; - i2 = r1; - i3 = r2; - - r0 = [fp+ARG_COEFF]; // coeff buffer - i1 = r0; - b1 = i1; - l1 = COEFF_LEN; - m0 = COEFF_REL_CY_OFF; - p0 = r3; - - r0 = [i0++]; // 2Y - r1.l = w[i2++]; // 2u - r1.h = w[i3++]; // 2v - p0 = p0>>2; - - lsetup (.L0888, .L1888) lc0 = p0; - - /* - uint32_t oy,oc,zero,cy,crv,rmask,cbu,bmask,cgu,cgv - r0 -- used to load 4ys - r1 -- used to load 2us,2vs - r4 -- y3,y2 - r5 -- y1,y0 - r6 -- u1,u0 - r7 -- v1,v0 - */ - r2=[i1++]; // oy -.L0888: - (r4,r5) = byteop16m (r1:0, r3:2) || r3=[i1++]; // oc - (r7,r6) = byteop16m (r1:0, r3:2) (r); - r5 = r5 << 2 (v); // y1,y0 - r4 = r4 << 2 (v); // y3,y2 - r6 = r6 << 2 (v) || r0=[i1++]; // u1,u0, r0=zero - r7 = r7 << 2 (v) || r1=[i1++]; // v1,v0 r1=cy - - /* Y' = y*cy */ - a1 = r1.h*r5.h, a0 = r1.l*r5.l || r1=[i1++]; // crv - - /* R = Y+ crv*(Cr-128) */ - r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l); - a1 -= r1.h*r7.l, a0 -= r1.l*r7.l || r5=[i1++]; // rmask - r2 = byteop3p(r3:2, r1:0)(LO) || r1=[i1++]; // cbu - r2=r2>>16 || B[p1++]=r2; - B[p2++]=r2; - - /* B = Y+ cbu*(Cb-128) */ - r2.h = (a1 += r1.h*r6.l), r2.l = (a0 += r1.l*r6.l); - a1 -= r1.h*r6.l, a0 -= r1.l*r6.l || r5=[i1++]; // bmask - r3 = byteop3p(r3:2, r1:0)(LO) || r1=[i1++]; // cgu - - /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */ - a1 += r1.h*r6.l, a0 += r1.l*r6.l || r1=[i1++]; // cgv - r2.h = (a1 += r1.h*r7.l), r2.l = (a0 += r1.l*r7.l); - r2 = byteop3p(r3:2, r1:0)(LO) || r5=[i1++m0]; // gmask, oy,cy,zero - - r2=r2>>16 || B[p1++]=r2; - B[p2++]=r2; - - r3=r3>>16 || B[p1++]=r3; - B[p2++]=r3 || r1=[i1++]; // cy - - p1+=3; - p2+=3; - /* Y' = y*cy */ - a1 = r1.h*r4.h, a0 = r1.l*r4.l || r1=[i1++]; // crv - - /* R = Y+ crv*(Cr-128) */ - r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h); - a1 -= r1.h*r7.h, a0 -= r1.l*r7.h || r5=[i1++]; // rmask - r2 = byteop3p(r3:2, r1:0)(LO) || r1=[i1++]; // cbu - r2=r2>>16 || B[p1++]=r2; - B[p2++]=r2; - - /* B = Y+ cbu*(Cb-128) */ - r2.h = (a1 += r1.h*r6.h), r2.l = (a0 += r1.l*r6.h); - a1 -= r1.h*r6.h, a0 -= r1.l*r6.h || r5=[i1++]; // bmask - r3 = byteop3p(r3:2, r1:0)(LO) || r1=[i1++]; // cgu - - /* G = Y+ cgu*(Cb-128)+cgv*(Cr-128) */ - a1 += r1.h*r6.h, a0 += r1.l*r6.h || r1=[i1++]; // cgv - r2.h = (a1 += r1.h*r7.h), r2.l = (a0 += r1.l*r7.h); - r2 = byteop3p(r3:2, r1:0)(LO) || r5=[i1++]; // gmask - r2=r2>>16 || B[p1++]=r2 || r0 = [i0++]; // 4y - B[p2++]=r2 || r1.l = w[i2++]; // 2u - r3=r3>>16 || B[p1++]=r3 || r1.h = w[i3++]; // 2v - B[p2++]=r3 || r2=[i1++]; // oy - - p1+=3; -.L1888: p2+=3; - - l1 = 0; - - (r7:4) = [sp++]; - unlink; - rts; -DEFUN_END(yuv2rgb24_line) - - - -#define ARG_vdst 20 -#define ARG_width 24 -#define ARG_height 28 -#define ARG_lumStride 32 -#define ARG_chromStride 36 -#define ARG_srcStride 40 - -DEFUN(uyvytoyv12, mL3, (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - int width, int height, - int lumStride, int chromStride, int srcStride)): - link 0; - [--sp] = (r7:4,p5:4); - - p0 = r1; // Y top even - - i2 = r2; // *u - r2 = [fp + ARG_vdst]; - i3 = r2; // *v - - r1 = [fp + ARG_srcStride]; - r2 = r0 + r1; - i0 = r0; // uyvy_T even - i1 = r2; // uyvy_B odd - - p2 = [fp + ARG_lumStride]; - p1 = p0 + p2; // Y bot odd - - p5 = [fp + ARG_width]; - p4 = [fp + ARG_height]; - r0 = p5; - p4 = p4 >> 1; - p5 = p5 >> 2; - - r2 = r0 << 1; - r1 = r1 << 1; - r1 = r1 - r2; // srcStride + (srcStride - 2*width) - r1 += -8; // i0,i1 is pre read need to correct - m0 = r1; - - r2 = [fp + ARG_chromStride]; - r0 = r0 >> 1; - r2 = r2 - r0; - m1 = r2; - - /* I0,I1 - src input line pointers - * p0,p1 - luma output line pointers - * I2 - dstU - * I3 - dstV - */ - - lsetup (0f, 1f) lc1 = p4; // H/2 -0: r0 = [i0++] || r2 = [i1++]; - r1 = [i0++] || r3 = [i1++]; - r4 = byteop1p(r1:0, r3:2); - r5 = byteop1p(r1:0, r3:2) (r); - lsetup (2f, 3f) lc0 = p5; // W/4 -2: r0 = r0 >> 8(v); - r1 = r1 >> 8(v); - r2 = r2 >> 8(v); - r3 = r3 >> 8(v); - r0 = bytepack(r0, r1); - r2 = bytepack(r2, r3) || [p0++] = r0; // yyyy - r6 = pack(r5.l, r4.l) || [p1++] = r2; // yyyy - r7 = pack(r5.h, r4.h) || r0 = [i0++] || r2 = [i1++]; - r6 = bytepack(r6, r7) || r1 = [i0++] || r3 = [i1++]; - r4 = byteop1p(r1:0, r3:2) || w[i2++] = r6.l; // uu -3: r5 = byteop1p(r1:0, r3:2) (r) || w[i3++] = r6.h; // vv - - i0 += m0; - i1 += m0; - i2 += m1; - i3 += m1; - p0 = p0 + p2; -1: p1 = p1 + p2; - - (r7:4,p5:4) = [sp++]; - unlink; - rts; -DEFUN_END(uyvytoyv12) - -DEFUN(yuyvtoyv12, mL3, (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, - int width, int height, - int lumStride, int chromStride, int srcStride)): - link 0; - [--sp] = (r7:4,p5:4); - - p0 = r1; // Y top even - - i2 = r2; // *u - r2 = [fp + ARG_vdst]; - i3 = r2; // *v - - r1 = [fp + ARG_srcStride]; - r2 = r0 + r1; - - i0 = r0; // uyvy_T even - i1 = r2; // uyvy_B odd - - p2 = [fp + ARG_lumStride]; - p1 = p0 + p2; // Y bot odd - - p5 = [fp + ARG_width]; - p4 = [fp + ARG_height]; - r0 = p5; - p4 = p4 >> 1; - p5 = p5 >> 2; - - r2 = r0 << 1; - r1 = r1 << 1; - r1 = r1 - r2; // srcStride + (srcStride - 2*width) - r1 += -8; // i0,i1 is pre read need to correct - m0 = r1; - - r2 = [fp + ARG_chromStride]; - r0 = r0 >> 1; - r2 = r2 - r0; - m1 = r2; - - /* I0,I1 - src input line pointers - * p0,p1 - luma output line pointers - * I2 - dstU - * I3 - dstV - */ - - lsetup (0f, 1f) lc1 = p4; // H/2 -0: r0 = [i0++] || r2 = [i1++]; - r1 = [i0++] || r3 = [i1++]; - r4 = bytepack(r0, r1); - r5 = bytepack(r2, r3); - lsetup (2f, 3f) lc0 = p5; // W/4 -2: r0 = r0 >> 8(v) || [p0++] = r4; // yyyy-even - r1 = r1 >> 8(v) || [p1++] = r5; // yyyy-odd - r2 = r2 >> 8(v); - r3 = r3 >> 8(v); - r4 = byteop1p(r1:0, r3:2); - r5 = byteop1p(r1:0, r3:2) (r); - r6 = pack(r5.l, r4.l); - r7 = pack(r5.h, r4.h) || r0 = [i0++] || r2 = [i1++]; - r6 = bytepack(r6, r7) || r1 = [i0++] || r3 = [i1++]; - r4 = bytepack(r0, r1) || w[i2++] = r6.l; // uu -3: r5 = bytepack(r2, r3) || w[i3++] = r6.h; // vv - - i0 += m0; - i1 += m0; - i2 += m1; - i3 += m1; - p0 = p0 + p2; -1: p1 = p1 + p2; - - (r7:4,p5:4) = [sp++]; - unlink; - rts; -DEFUN_END(yuyvtoyv12) diff --git a/libswscale/bfin/swscale_bfin.c b/libswscale/bfin/swscale_bfin.c deleted file mode 100644 index 97270b8756..0000000000 --- a/libswscale/bfin/swscale_bfin.c +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com> - * - * Blackfin software video scaler operations - * - * This file is part of Libav. - * - * Libav is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * Libav is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include <stdint.h> - -#include "config.h" -#include "libavutil/attributes.h" -#include "libavutil/bfin/attributes.h" -#include "libswscale/swscale_internal.h" - -int ff_bfin_uyvytoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, - uint8_t *vdst, int width, int height, - int lumStride, int chromStride, - int srcStride) attribute_l1_text; - -int ff_bfin_yuyvtoyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, - uint8_t *vdst, int width, int height, - int lumStride, int chromStride, - int srcStride) attribute_l1_text; - -static int uyvytoyv12_unscaled(SwsContext *c, const uint8_t *src[], - int srcStride[], int srcSliceY, int srcSliceH, - uint8_t *dst[], int dstStride[]) -{ - uint8_t *dsty = dst[0] + dstStride[0] * srcSliceY; - uint8_t *dstu = dst[1] + dstStride[1] * srcSliceY / 2; - uint8_t *dstv = dst[2] + dstStride[2] * srcSliceY / 2; - const uint8_t *ip = src[0] + srcStride[0] * srcSliceY; - int w = dstStride[0]; - - ff_bfin_uyvytoyv12(ip, dsty, dstu, dstv, w, srcSliceH, - dstStride[0], dstStride[1], srcStride[0]); - - return srcSliceH; -} - -static int yuyvtoyv12_unscaled(SwsContext *c, const uint8_t *src[], - int srcStride[], int srcSliceY, int srcSliceH, - uint8_t *dst[], int dstStride[]) -{ - uint8_t *dsty = dst[0] + dstStride[0] * srcSliceY; - uint8_t *dstu = dst[1] + dstStride[1] * srcSliceY / 2; - uint8_t *dstv = dst[2] + dstStride[2] * srcSliceY / 2; - const uint8_t *ip = src[0] + srcStride[0] * srcSliceY; - int w = dstStride[0]; - - ff_bfin_yuyvtoyv12(ip, dsty, dstu, dstv, w, srcSliceH, - dstStride[0], dstStride[1], srcStride[0]); - - return srcSliceH; -} - -av_cold void ff_get_unscaled_swscale_bfin(SwsContext *c) -{ - if (c->dstFormat == AV_PIX_FMT_YUV420P && c->srcFormat == AV_PIX_FMT_UYVY422) { - av_log(NULL, AV_LOG_VERBOSE, - "selecting Blackfin optimized uyvytoyv12_unscaled\n"); - c->swscale = uyvytoyv12_unscaled; - } - if (c->dstFormat == AV_PIX_FMT_YUV420P && c->srcFormat == AV_PIX_FMT_YUYV422) { - av_log(NULL, AV_LOG_VERBOSE, - "selecting Blackfin optimized yuyvtoyv12_unscaled\n"); - c->swscale = yuyvtoyv12_unscaled; - } -} diff --git a/libswscale/bfin/yuv2rgb_bfin.c b/libswscale/bfin/yuv2rgb_bfin.c deleted file mode 100644 index 295dc286e4..0000000000 --- a/libswscale/bfin/yuv2rgb_bfin.c +++ /dev/null @@ -1,197 +0,0 @@ -/* - * Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com> - * - * Blackfin video color space converter operations - * convert I420 YV12 to RGB in various formats - * - * This file is part of Libav. - * - * Libav is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * Libav is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include <stdint.h> - -#include "config.h" -#include "libavutil/attributes.h" -#include "libavutil/bfin/attributes.h" -#include "libswscale/swscale_internal.h" - -void ff_bfin_yuv2rgb555_line(const uint8_t *Y, const uint8_t *U, - const uint8_t *V, uint8_t *out, - int w, uint32_t *coeffs) attribute_l1_text; - -void ff_bfin_yuv2rgb565_line(const uint8_t *Y, const uint8_t *U, - const uint8_t *V, uint8_t *out, - int w, uint32_t *coeffs) attribute_l1_text; - -void ff_bfin_yuv2rgb24_line(const uint8_t *Y, const uint8_t *U, - const uint8_t *V, uint8_t *out, - int w, uint32_t *coeffs) attribute_l1_text; - -typedef void (*ltransform)(const uint8_t *Y, const uint8_t *U, const uint8_t *V, - uint8_t *out, int w, uint32_t *coeffs); - -static void bfin_prepare_coefficients(SwsContext *c, int rgb, int masks) -{ - int oy; - oy = c->yOffset & 0xffff; - oy = oy >> 3; // keep everything U8.0 for offset calculation - - c->oc = 128 * 0x01010101U; - c->oy = oy * 0x01010101U; - - /* copy 64bit vector coeffs down to 32bit vector coeffs */ - c->cy = c->yCoeff; - c->zero = 0; - - if (rgb) { - c->crv = c->vrCoeff; - c->cbu = c->ubCoeff; - c->cgu = c->ugCoeff; - c->cgv = c->vgCoeff; - } else { - c->crv = c->ubCoeff; - c->cbu = c->vrCoeff; - c->cgu = c->vgCoeff; - c->cgv = c->ugCoeff; - } - - if (masks == 555) { - c->rmask = 0x001f * 0x00010001U; - c->gmask = 0x03e0 * 0x00010001U; - c->bmask = 0x7c00 * 0x00010001U; - } else if (masks == 565) { - c->rmask = 0x001f * 0x00010001U; - c->gmask = 0x07e0 * 0x00010001U; - c->bmask = 0xf800 * 0x00010001U; - } -} - -static int core_yuv420_rgb(SwsContext *c, const uint8_t **in, int *instrides, - int srcSliceY, int srcSliceH, uint8_t **oplanes, - int *outstrides, ltransform lcscf, - int rgb, int masks) -{ - const uint8_t *py, *pu, *pv; - uint8_t *op; - int w = instrides[0]; - int h2 = srcSliceH >> 1; - int i; - - bfin_prepare_coefficients(c, rgb, masks); - - py = in[0]; - pu = in[1 + (1 ^ rgb)]; - pv = in[1 + (0 ^ rgb)]; - - op = oplanes[0] + srcSliceY * outstrides[0]; - - for (i = 0; i < h2; i++) { - lcscf(py, pu, pv, op, w, &c->oy); - - py += instrides[0]; - op += outstrides[0]; - - lcscf(py, pu, pv, op, w, &c->oy); - - py += instrides[0]; - pu += instrides[1]; - pv += instrides[2]; - op += outstrides[0]; - } - - return srcSliceH; -} - -static int bfin_yuv420_rgb555(SwsContext *c, const uint8_t **in, int *instrides, - int srcSliceY, int srcSliceH, - uint8_t **oplanes, int *outstrides) -{ - return core_yuv420_rgb(c, in, instrides, srcSliceY, srcSliceH, oplanes, - outstrides, ff_bfin_yuv2rgb555_line, 1, 555); -} - -static int bfin_yuv420_bgr555(SwsContext *c, const uint8_t **in, int *instrides, - int srcSliceY, int srcSliceH, - uint8_t **oplanes, int *outstrides) -{ - return core_yuv420_rgb(c, in, instrides, srcSliceY, srcSliceH, oplanes, - outstrides, ff_bfin_yuv2rgb555_line, 0, 555); -} - -static int bfin_yuv420_rgb24(SwsContext *c, const uint8_t **in, int *instrides, - int srcSliceY, int srcSliceH, - uint8_t **oplanes, int *outstrides) -{ - return core_yuv420_rgb(c, in, instrides, srcSliceY, srcSliceH, oplanes, - outstrides, ff_bfin_yuv2rgb24_line, 1, 888); -} - -static int bfin_yuv420_bgr24(SwsContext *c, const uint8_t **in, int *instrides, - int srcSliceY, int srcSliceH, - uint8_t **oplanes, int *outstrides) -{ - return core_yuv420_rgb(c, in, instrides, srcSliceY, srcSliceH, oplanes, - outstrides, ff_bfin_yuv2rgb24_line, 0, 888); -} - -static int bfin_yuv420_rgb565(SwsContext *c, const uint8_t **in, int *instrides, - int srcSliceY, int srcSliceH, - uint8_t **oplanes, int *outstrides) -{ - return core_yuv420_rgb(c, in, instrides, srcSliceY, srcSliceH, oplanes, - outstrides, ff_bfin_yuv2rgb565_line, 1, 565); -} - -static int bfin_yuv420_bgr565(SwsContext *c, const uint8_t **in, int *instrides, - int srcSliceY, int srcSliceH, - uint8_t **oplanes, int *outstrides) -{ - return core_yuv420_rgb(c, in, instrides, srcSliceY, srcSliceH, oplanes, - outstrides, ff_bfin_yuv2rgb565_line, 0, 565); -} - -av_cold SwsFunc ff_yuv2rgb_init_bfin(SwsContext *c) -{ - SwsFunc f; - - switch (c->dstFormat) { - case AV_PIX_FMT_RGB555: - f = bfin_yuv420_rgb555; - break; - case AV_PIX_FMT_BGR555: - f = bfin_yuv420_bgr555; - break; - case AV_PIX_FMT_RGB565: - f = bfin_yuv420_rgb565; - break; - case AV_PIX_FMT_BGR565: - f = bfin_yuv420_bgr565; - break; - case AV_PIX_FMT_RGB24: - f = bfin_yuv420_rgb24; - break; - case AV_PIX_FMT_BGR24: - f = bfin_yuv420_bgr24; - break; - default: - return 0; - } - - av_log(c, AV_LOG_INFO, "BlackFin accelerated color space converter %s\n", - sws_format_name(c->dstFormat)); - - return f; -} diff --git a/libswscale/swscale.h b/libswscale/swscale.h index 8fe27dfece..715f559590 100644 --- a/libswscale/swscale.h +++ b/libswscale/swscale.h @@ -92,7 +92,9 @@ const char *swscale_license(void); #define SWS_CPU_CAPS_MMX2 0x20000000 #define SWS_CPU_CAPS_3DNOW 0x40000000 #define SWS_CPU_CAPS_ALTIVEC 0x10000000 +#if FF_API_ARCH_BFIN #define SWS_CPU_CAPS_BFIN 0x01000000 +#endif #define SWS_CPU_CAPS_SSE2 0x02000000 #endif diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h index bd57b20d6f..7ecf222e7f 100644 --- a/libswscale/swscale_internal.h +++ b/libswscale/swscale_internal.h @@ -436,20 +436,6 @@ typedef struct SwsContext { vector signed short *vYCoeffsBank, *vCCoeffsBank; #endif -#if ARCH_BFIN - DECLARE_ALIGNED(4, uint32_t, oy); - DECLARE_ALIGNED(4, uint32_t, oc); - DECLARE_ALIGNED(4, uint32_t, zero); - DECLARE_ALIGNED(4, uint32_t, cy); - DECLARE_ALIGNED(4, uint32_t, crv); - DECLARE_ALIGNED(4, uint32_t, rmask); - DECLARE_ALIGNED(4, uint32_t, cbu); - DECLARE_ALIGNED(4, uint32_t, bmask); - DECLARE_ALIGNED(4, uint32_t, cgu); - DECLARE_ALIGNED(4, uint32_t, cgv); - DECLARE_ALIGNED(4, uint32_t, gmask); -#endif - /* function pointers for swscale() */ yuv2planar1_fn yuv2plane1; yuv2planarX_fn yuv2planeX; @@ -568,7 +554,6 @@ void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufI SwsFunc ff_yuv2rgb_init_x86(SwsContext *c); SwsFunc ff_yuv2rgb_init_ppc(SwsContext *c); -SwsFunc ff_yuv2rgb_init_bfin(SwsContext *c); const char *sws_format_name(enum AVPixelFormat format); @@ -733,7 +718,6 @@ extern const AVClass sws_context_class; * source and destination formats, bit depths, flags, etc. */ void ff_get_unscaled_swscale(SwsContext *c); -void ff_get_unscaled_swscale_bfin(SwsContext *c); void ff_get_unscaled_swscale_ppc(SwsContext *c); /** diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c index da1bde13b0..02850eac17 100644 --- a/libswscale/swscale_unscaled.c +++ b/libswscale/swscale_unscaled.c @@ -1117,8 +1117,6 @@ void ff_get_unscaled_swscale(SwsContext *c) c->swscale = planarCopyWrapper; } - if (ARCH_BFIN) - ff_get_unscaled_swscale_bfin(c); if (ARCH_PPC) ff_get_unscaled_swscale_ppc(c); } diff --git a/libswscale/version.h b/libswscale/version.h index 0f32c7ab03..d402c68ae1 100644 --- a/libswscale/version.h +++ b/libswscale/version.h @@ -52,5 +52,8 @@ #ifndef FF_API_SWS_CPU_CAPS #define FF_API_SWS_CPU_CAPS (LIBSWSCALE_VERSION_MAJOR < 3) #endif +#ifndef FF_API_ARCH_BFIN +#define FF_API_ARCH_BFIN (LIBSWSCALE_VERSION_MAJOR < 3) +#endif #endif /* SWSCALE_VERSION_H */ diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c index c8497dc238..480fbe3999 100644 --- a/libswscale/yuv2rgb.c +++ b/libswscale/yuv2rgb.c @@ -560,8 +560,6 @@ SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c) { SwsFunc t = NULL; - if (ARCH_BFIN) - t = ff_yuv2rgb_init_bfin(c); if (ARCH_PPC) t = ff_yuv2rgb_init_ppc(c); if (ARCH_X86) |