summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--libpostproc/postprocess_internal.h10
-rw-r--r--libpostproc/postprocess_template.c81
2 files changed, 46 insertions, 45 deletions
diff --git a/libpostproc/postprocess_internal.h b/libpostproc/postprocess_internal.h
index 1ebd974286..c1a306dd32 100644
--- a/libpostproc/postprocess_internal.h
+++ b/libpostproc/postprocess_internal.h
@@ -143,8 +143,11 @@ typedef struct PPContext{
DECLARE_ALIGNED(8, uint64_t, pQPb);
DECLARE_ALIGNED(8, uint64_t, pQPb2);
- DECLARE_ALIGNED(8, uint64_t, mmxDcOffset)[64];
- DECLARE_ALIGNED(8, uint64_t, mmxDcThreshold)[64];
+ DECLARE_ALIGNED(32, uint64_t, pQPb_block)[4];
+ DECLARE_ALIGNED(32, uint64_t, pQPb2_block)[4];
+
+ DECLARE_ALIGNED(32, uint64_t, mmxDcOffset)[64];
+ DECLARE_ALIGNED(32, uint64_t, mmxDcThreshold)[64];
QP_STORE_T *stdQPTable; ///< used to fix MPEG2 style qscale
QP_STORE_T *nonBQPTable;
@@ -153,6 +156,9 @@ typedef struct PPContext{
int QP;
int nonBQP;
+ DECLARE_ALIGNED(32, int, QP_block)[4];
+ DECLARE_ALIGNED(32, int, nonBQP_block)[4];
+
int frameNum;
int cpuCaps;
diff --git a/libpostproc/postprocess_template.c b/libpostproc/postprocess_template.c
index e153b13408..b7296c4da1 100644
--- a/libpostproc/postprocess_template.c
+++ b/libpostproc/postprocess_template.c
@@ -3479,7 +3479,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
#endif
const int8_t *QPptr= &QPs[(y>>qpVShift)*QPStride];
int8_t *nonBQPptr= &c.nonBQPTable[(y>>qpVShift)*FFABS(QPStride)];
- int QP=0;
+ int QP=0, nonBQP=0;
/* can we mess with a 8x16 block from srcBlock/dstBlock downwards and 1 line upwards
if not than use a temporary buffer */
if(y+15 >= height){
@@ -3512,6 +3512,29 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
int endx = FFMIN(width, x+32);
uint8_t *dstBlockStart = dstBlock;
const uint8_t *srcBlockStart = srcBlock;
+ int qp_index = 0;
+ for(qp_index=0; qp_index < (endx-startx)/BLOCK_SIZE; qp_index++){
+ QP = QPptr[(x+qp_index*BLOCK_SIZE)>>qpHShift];
+ nonBQP = nonBQPptr[(x+qp_index*BLOCK_SIZE)>>qpHShift];
+ if(!isColor){
+ QP= (QP* QPCorrecture + 256*128)>>16;
+ nonBQP= (nonBQP* QPCorrecture + 256*128)>>16;
+ yHistogram[(srcBlock+qp_index*8)[srcStride*12 + 4]]++;
+ }
+ c.QP_block[qp_index] = QP;
+ c.nonBQP_block[qp_index] = nonBQP;
+#if TEMPLATE_PP_MMX
+ __asm__ volatile(
+ "movd %1, %%mm7 \n\t"
+ "packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP
+ "packuswb %%mm7, %%mm7 \n\t" // 0,QP, 0, QP, 0,QP, 0, QP
+ "packuswb %%mm7, %%mm7 \n\t" // QP,..., QP
+ "movq %%mm7, %0 \n\t"
+ : "=m" (c.pQPb_block[qp_index])
+ : "r" (QP)
+ );
+#endif
+ }
for(; x < endx; x+=BLOCK_SIZE){
RENAME(prefetchnta)(srcBlock + (((x>>2)&6) + copyAhead)*srcStride + 32);
RENAME(prefetchnta)(srcBlock + (((x>>2)&6) + copyAhead+1)*srcStride + 32);
@@ -3543,27 +3566,15 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
dstBlock = dstBlockStart;
srcBlock = srcBlockStart;
- for(x = startx; x < endx; x+=BLOCK_SIZE){
+ for(x = startx, qp_index = 0; x < endx; x+=BLOCK_SIZE, qp_index++){
const int stride= dstStride;
- QP = QPptr[x>>qpHShift];
- c.nonBQP = nonBQPptr[x>>qpHShift];
- if(!isColor){
- QP= (QP* QPCorrecture + 256*128)>>16;
- c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16;
- yHistogram[srcBlock[srcStride*12 + 4]]++;
- }
- c.QP= QP;
-#if TEMPLATE_PP_MMX
- __asm__ volatile(
- "movd %1, %%mm7 \n\t"
- "packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP
- "packuswb %%mm7, %%mm7 \n\t" // 0,QP, 0, QP, 0,QP, 0, QP
- "packuswb %%mm7, %%mm7 \n\t" // QP,..., QP
- "movq %%mm7, %0 \n\t"
- : "=m" (c.pQPb)
- : "r" (QP)
- );
-#endif
+ //temporary while changing QP stuff to make things continue to work
+ //eventually QP,nonBQP,etc will be arrays and this will be unnecessary
+ c.QP = c.QP_block[qp_index];
+ c.nonBQP = c.nonBQP_block[qp_index];
+ c.pQPb = c.pQPb_block[qp_index];
+ c.pQPb2 = c.pQPb2_block[qp_index];
+
/* only deblock if we have 2 blocks */
if(y + 8 < height){
if(mode & V_X1_FILTER)
@@ -3587,30 +3598,14 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
dstBlock = dstBlockStart;
srcBlock = srcBlockStart;
- for(x = startx; x < endx; x+=BLOCK_SIZE){
+ for(x = startx, qp_index=0; x < endx; x+=BLOCK_SIZE, qp_index++){
const int stride= dstStride;
av_unused uint8_t *tmpXchg;
-
- if(isColor){
- QP= QPptr[x>>qpHShift];
- c.nonBQP= nonBQPptr[x>>qpHShift];
- }else{
- QP= QPptr[x>>4];
- QP= (QP* QPCorrecture + 256*128)>>16;
- c.nonBQP= nonBQPptr[x>>4];
- c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16;
- }
- c.QP= QP;
+ c.QP = c.QP_block[qp_index];
+ c.nonBQP = c.nonBQP_block[qp_index];
+ c.pQPb = c.pQPb_block[qp_index];
+ c.pQPb2 = c.pQPb2_block[qp_index];
#if TEMPLATE_PP_MMX
- __asm__ volatile(
- "movd %1, %%mm7 \n\t"
- "packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP
- "packuswb %%mm7, %%mm7 \n\t" // 0,QP, 0, QP, 0,QP, 0, QP
- "packuswb %%mm7, %%mm7 \n\t" // QP,..., QP
- "movq %%mm7, %0 \n\t"
- : "=m" (c.pQPb)
- : "r" (QP)
- );
RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride);
#endif
/* check if we have a previous block to deblock it with dstBlock */
@@ -3632,7 +3627,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
#else
if(mode & H_X1_FILTER)
- horizX1Filter(dstBlock-4, stride, QP);
+ horizX1Filter(dstBlock-4, stride, c.QP);
else if(mode & H_DEBLOCK){
#if TEMPLATE_PP_ALTIVEC
DECLARE_ALIGNED(16, unsigned char, tempBlock)[272];