diff options
Diffstat (limited to 'libavcodec/hevcpred_template.c')
-rw-r--r-- | libavcodec/hevcpred_template.c | 291 |
1 files changed, 156 insertions, 135 deletions
diff --git a/libavcodec/hevcpred_template.c b/libavcodec/hevcpred_template.c index 53b9c590b6..6b763b3a73 100644 --- a/libavcodec/hevcpred_template.c +++ b/libavcodec/hevcpred_template.c @@ -3,28 +3,27 @@ * * Copyright (C) 2012 - 2013 Guillaume Martres * - * This file is part of Libav. + * This file is part of FFmpeg. * - * Libav is free software; you can redistribute it and/or + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * Libav is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with Libav; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include "libavutil/pixdesc.h" -#include "hevc.h" - #include "bit_depth_template.c" +#include "hevcpred.h" #define POS(x, y) src[(x) + stride * (y)] @@ -38,10 +37,9 @@ static av_always_inline void FUNC(intra_pred)(HEVCContext *s, int x0, int y0, #define MVF_PU(x, y) \ MVF(PU(x0 + ((x) << hshift)), PU(y0 + ((y) << vshift))) #define IS_INTRA(x, y) \ - MVF_PU(x, y).is_intra + (MVF_PU(x, y).pred_flag == PF_INTRA) #define MIN_TB_ADDR_ZS(x, y) \ - s->pps->min_tb_addr_zs[(y) * s->sps->min_tb_width + (x)] - + s->pps->min_tb_addr_zs[(y) * (s->sps->tb_mask+2) + (x)] #define EXTEND(ptr, val, len) \ do { \ pixel4 pix = PIXEL_SPLAT_X4(val); \ @@ -49,36 +47,43 @@ do { \ AV_WN4P(ptr + i, pix); \ } while (0) +#define EXTEND_RIGHT_CIP(ptr, start, length) \ + for (i = start; i < (start) + (length); i += 4) \ + if (!IS_INTRA(i, -1)) \ + AV_WN4P(&ptr[i], a); \ + else \ + a = PIXEL_SPLAT_X4(ptr[i+3]) #define EXTEND_LEFT_CIP(ptr, start, length) \ - for (i = (start); i > (start) - (length); i--) \ + for (i = start; i > (start) - (length); i--) \ if (!IS_INTRA(i - 1, -1)) \ ptr[i - 1] = ptr[i] -#define EXTEND_RIGHT_CIP(ptr, start, length) \ - for (i = (start); i < (start) + (length); i++) \ - if (!IS_INTRA(i, -1)) \ - ptr[i] = ptr[i - 1] -#define EXTEND_UP_CIP(ptr, start, length) \ - for (i = (start); i > (start) - (length); i--) \ - if (!IS_INTRA(-1, i - 1)) \ - ptr[i - 1] = ptr[i] -#define EXTEND_UP_CIP_0(ptr, start, length) \ - for (i = (start); i > (start) - (length); i--) \ - ptr[i - 1] = ptr[i] -#define EXTEND_DOWN_CIP(ptr, start, length) \ - for (i = (start); i < (start) + (length); i++) \ - if (!IS_INTRA(-1, i)) \ - ptr[i] = ptr[i - 1] - HEVCLocalContext *lc = &s->HEVClc; +#define EXTEND_UP_CIP(ptr, start, length) \ + for (i = (start); i > (start) - (length); i -= 4) \ + if (!IS_INTRA(-1, i - 3)) \ + AV_WN4P(&ptr[i - 3], a); \ + else \ + a = PIXEL_SPLAT_X4(ptr[i - 3]) +#define EXTEND_DOWN_CIP(ptr, start, length) \ + for (i = start; i < (start) + (length); i += 4) \ + if (!IS_INTRA(-1, i)) \ + AV_WN4P(&ptr[i], a); \ + else \ + a = PIXEL_SPLAT_X4(ptr[i + 3]) + + HEVCLocalContext *lc = s->HEVClc; int i; int hshift = s->sps->hshift[c_idx]; int vshift = s->sps->vshift[c_idx]; int size = (1 << log2_size); - int size_in_luma = size << hshift; - int size_in_tbs = size_in_luma >> s->sps->log2_min_tb_size; + int size_in_luma_h = size << hshift; + int size_in_tbs_h = size_in_luma_h >> s->sps->log2_min_tb_size; + int size_in_luma_v = size << vshift; + int size_in_tbs_v = size_in_luma_v >> s->sps->log2_min_tb_size; int x = x0 >> hshift; int y = y0 >> vshift; - int x_tb = x0 >> s->sps->log2_min_tb_size; - int y_tb = y0 >> s->sps->log2_min_tb_size; + int x_tb = (x0 >> s->sps->log2_min_tb_size) & s->sps->tb_mask; + int y_tb = (y0 >> s->sps->log2_min_tb_size) & s->sps->tb_mask; + int cur_tb_addr = MIN_TB_ADDR_ZS(x_tb, y_tb); ptrdiff_t stride = s->frame->linesize[c_idx] / sizeof(pixel); @@ -86,87 +91,77 @@ do { \ int min_pu_width = s->sps->min_pu_width; - enum IntraPredMode mode = c_idx ? lc->pu.intra_pred_mode_c : - lc->tu.cur_intra_pred_mode; - - pixel left_array[2 * MAX_TB_SIZE + 1]; - pixel filtered_left_array[2 * MAX_TB_SIZE + 1]; - pixel top_array[2 * MAX_TB_SIZE + 1]; - pixel filtered_top_array[2 * MAX_TB_SIZE + 1]; - - pixel *left = left_array + 1; - pixel *top = top_array + 1; - pixel *filtered_left = filtered_left_array + 1; - pixel *filtered_top = filtered_top_array + 1; - - int cand_bottom_left = lc->na.cand_bottom_left && cur_tb_addr > MIN_TB_ADDR_ZS(x_tb - 1, y_tb + size_in_tbs); + enum IntraPredMode mode = c_idx ? lc->tu.intra_pred_mode_c : + lc->tu.intra_pred_mode; + pixel4 a; + pixel left_array[2 * MAX_TB_SIZE + 1]; + pixel filtered_left_array[2 * MAX_TB_SIZE + 1]; + pixel top_array[2 * MAX_TB_SIZE + 1]; + pixel filtered_top_array[2 * MAX_TB_SIZE + 1]; + + pixel *left = left_array + 1; + pixel *top = top_array + 1; + pixel *filtered_left = filtered_left_array + 1; + pixel *filtered_top = filtered_top_array + 1; + int cand_bottom_left = lc->na.cand_bottom_left && cur_tb_addr > MIN_TB_ADDR_ZS( x_tb - 1, (y_tb + size_in_tbs_v) & s->sps->tb_mask); int cand_left = lc->na.cand_left; int cand_up_left = lc->na.cand_up_left; int cand_up = lc->na.cand_up; - int cand_up_right = lc->na.cand_up_right && cur_tb_addr > MIN_TB_ADDR_ZS(x_tb + size_in_tbs, y_tb - 1); + int cand_up_right = lc->na.cand_up_right && cur_tb_addr > MIN_TB_ADDR_ZS((x_tb + size_in_tbs_h) & s->sps->tb_mask, y_tb - 1); - int bottom_left_size = (FFMIN(y0 + 2 * size_in_luma, s->sps->height) - - (y0 + size_in_luma)) >> vshift; - int top_right_size = (FFMIN(x0 + 2 * size_in_luma, s->sps->width) - - (x0 + size_in_luma)) >> hshift; + int bottom_left_size = (FFMIN(y0 + 2 * size_in_luma_v, s->sps->height) - + (y0 + size_in_luma_v)) >> vshift; + int top_right_size = (FFMIN(x0 + 2 * size_in_luma_h, s->sps->width) - + (x0 + size_in_luma_h)) >> hshift; if (s->pps->constrained_intra_pred_flag == 1) { - int size_in_luma_pu = PU(size_in_luma); - int on_pu_edge_x = !(x0 & ((1 << s->sps->log2_min_pu_size) - 1)); - int on_pu_edge_y = !(y0 & ((1 << s->sps->log2_min_pu_size) - 1)); - if (!size_in_luma_pu) - size_in_luma_pu++; + int size_in_luma_pu_v = PU(size_in_luma_v); + int size_in_luma_pu_h = PU(size_in_luma_h); + int on_pu_edge_x = !av_mod_uintp2(x0, s->sps->log2_min_pu_size); + int on_pu_edge_y = !av_mod_uintp2(y0, s->sps->log2_min_pu_size); + if (!size_in_luma_pu_h) + size_in_luma_pu_h++; if (cand_bottom_left == 1 && on_pu_edge_x) { int x_left_pu = PU(x0 - 1); - int y_bottom_pu = PU(y0 + size_in_luma); - int max = FFMIN(size_in_luma_pu, s->sps->min_pu_height - y_bottom_pu); + int y_bottom_pu = PU(y0 + size_in_luma_v); + int max = FFMIN(size_in_luma_pu_v, s->sps->min_pu_height - y_bottom_pu); cand_bottom_left = 0; - for (i = 0; i < max; i++) - cand_bottom_left |= MVF(x_left_pu, y_bottom_pu + i).is_intra; + for (i = 0; i < max; i += 2) + cand_bottom_left |= (MVF(x_left_pu, y_bottom_pu + i).pred_flag == PF_INTRA); } if (cand_left == 1 && on_pu_edge_x) { int x_left_pu = PU(x0 - 1); int y_left_pu = PU(y0); - int max = FFMIN(size_in_luma_pu, s->sps->min_pu_height - y_left_pu); + int max = FFMIN(size_in_luma_pu_v, s->sps->min_pu_height - y_left_pu); cand_left = 0; - for (i = 0; i < max; i++) - cand_left |= MVF(x_left_pu, y_left_pu + i).is_intra; + for (i = 0; i < max; i += 2) + cand_left |= (MVF(x_left_pu, y_left_pu + i).pred_flag == PF_INTRA); } if (cand_up_left == 1) { int x_left_pu = PU(x0 - 1); int y_top_pu = PU(y0 - 1); - cand_up_left = MVF(x_left_pu, y_top_pu).is_intra; + cand_up_left = MVF(x_left_pu, y_top_pu).pred_flag == PF_INTRA; } if (cand_up == 1 && on_pu_edge_y) { int x_top_pu = PU(x0); int y_top_pu = PU(y0 - 1); - int max = FFMIN(size_in_luma_pu, s->sps->min_pu_width - x_top_pu); + int max = FFMIN(size_in_luma_pu_h, s->sps->min_pu_width - x_top_pu); cand_up = 0; - for (i = 0; i < max; i++) - cand_up |= MVF(x_top_pu + i, y_top_pu).is_intra; + for (i = 0; i < max; i += 2) + cand_up |= (MVF(x_top_pu + i, y_top_pu).pred_flag == PF_INTRA); } if (cand_up_right == 1 && on_pu_edge_y) { int y_top_pu = PU(y0 - 1); - int x_right_pu = PU(x0 + size_in_luma); - int max = FFMIN(size_in_luma_pu, s->sps->min_pu_width - x_right_pu); + int x_right_pu = PU(x0 + size_in_luma_h); + int max = FFMIN(size_in_luma_pu_h, s->sps->min_pu_width - x_right_pu); cand_up_right = 0; - for (i = 0; i < max; i++) - cand_up_right |= MVF(x_right_pu + i, y_top_pu).is_intra; + for (i = 0; i < max; i += 2) + cand_up_right |= (MVF(x_right_pu + i, y_top_pu).pred_flag == PF_INTRA); } - for (i = 0; i < 2 * MAX_TB_SIZE; i++) { - left[i] = 128; - top[i] = 128; - } - } - if (cand_bottom_left) { - for (i = size; i < size + bottom_left_size; i++) - left[i] = POS(-1, i); - EXTEND(left + size + bottom_left_size, POS(-1, size + bottom_left_size - 1), - size - bottom_left_size); + memset(left, 128, 2 * MAX_TB_SIZE*sizeof(pixel)); + memset(top , 128, 2 * MAX_TB_SIZE*sizeof(pixel)); + top[-1] = 128; } - if (cand_left) - for (i = size - 1; i >= 0; i--) - left[i] = POS(-1, i); if (cand_up_left) { left[-1] = POS(-1, -1); top[-1] = left[-1]; @@ -178,6 +173,15 @@ do { \ EXTEND(top + size + top_right_size, POS(size + top_right_size - 1, -1), size - top_right_size); } + if (cand_left) + for (i = 0; i < size; i++) + left[i] = POS(-1, i); + if (cand_bottom_left) { + for (i = size; i < size + bottom_left_size; i++) + left[i] = POS(-1, i); + EXTEND(left + size + bottom_left_size, POS(-1, size + bottom_left_size - 1), + size - bottom_left_size); + } if (s->pps->constrained_intra_pred_flag == 1) { if (cand_bottom_left || cand_left || cand_up_left || cand_up || cand_up_right) { @@ -203,7 +207,6 @@ do { \ j++; EXTEND_LEFT_CIP(top, j, j + 1); left[-1] = top[-1]; - j = 0; } } else { j = 0; @@ -217,24 +220,30 @@ do { \ top[-1] = top[0]; } left[-1] = top[-1]; - j = 0; } + left[-1] = top[-1]; if (cand_bottom_left || cand_left) { - EXTEND_DOWN_CIP(left, j, size_max_y - j); + a = PIXEL_SPLAT_X4(left[-1]); + EXTEND_DOWN_CIP(left, 0, size_max_y); } if (!cand_left) EXTEND(left, left[-1], size); if (!cand_bottom_left) EXTEND(left + size, left[size - 1], size); if (x0 != 0 && y0 != 0) { + a = PIXEL_SPLAT_X4(left[size_max_y - 1]); EXTEND_UP_CIP(left, size_max_y - 1, size_max_y); + if (!IS_INTRA(-1, - 1)) + left[-1] = left[0]; } else if (x0 == 0) { - EXTEND_UP_CIP_0(left, size_max_y - 1, size_max_y); + EXTEND(left, 0, size_max_y); } else { - EXTEND_UP_CIP(left, size_max_y - 1, size_max_y - 1); + a = PIXEL_SPLAT_X4(left[size_max_y - 1]); + EXTEND_UP_CIP(left, size_max_y - 1, size_max_y); } top[-1] = left[-1]; if (y0 != 0) { + a = PIXEL_SPLAT_X4(left[-1]); EXTEND_RIGHT_CIP(top, 0, size_max_x); } } @@ -278,40 +287,42 @@ do { \ top[-1] = left[-1]; // Filtering process - if (c_idx == 0 && mode != INTRA_DC && size != 4) { - int intra_hor_ver_dist_thresh[] = { 7, 1, 0 }; - int min_dist_vert_hor = FFMIN(FFABS((int)mode - 26), - FFABS((int)mode - 10)); - if (min_dist_vert_hor > intra_hor_ver_dist_thresh[log2_size - 3]) { - int threshold = 1 << (BIT_DEPTH - 5); - if (s->sps->sps_strong_intra_smoothing_enable_flag && - log2_size == 5 && - FFABS(top[-1] + top[63] - 2 * top[31]) < threshold && - FFABS(left[-1] + left[63] - 2 * left[31]) < threshold) { - // We can't just overwrite values in top because it could be - // a pointer into src - filtered_top[-1] = top[-1]; - filtered_top[63] = top[63]; - for (i = 0; i < 63; i++) - filtered_top[i] = ((64 - (i + 1)) * top[-1] + - (i + 1) * top[63] + 32) >> 6; - for (i = 0; i < 63; i++) - left[i] = ((64 - (i + 1)) * left[-1] + - (i + 1) * left[63] + 32) >> 6; - top = filtered_top; - } else { - filtered_left[2 * size - 1] = left[2 * size - 1]; - filtered_top[2 * size - 1] = top[2 * size - 1]; - for (i = 2 * size - 2; i >= 0; i--) - filtered_left[i] = (left[i + 1] + 2 * left[i] + - left[i - 1] + 2) >> 2; - filtered_top[-1] = - filtered_left[-1] = (left[0] + 2 * left[-1] + top[0] + 2) >> 2; - for (i = 2 * size - 2; i >= 0; i--) - filtered_top[i] = (top[i + 1] + 2 * top[i] + - top[i - 1] + 2) >> 2; - left = filtered_left; - top = filtered_top; + if (!s->sps->intra_smoothing_disabled_flag && (c_idx == 0 || s->sps->chroma_format_idc == 3)) { + if (mode != INTRA_DC && size != 4){ + int intra_hor_ver_dist_thresh[] = { 7, 1, 0 }; + int min_dist_vert_hor = FFMIN(FFABS((int)(mode - 26U)), + FFABS((int)(mode - 10U))); + if (min_dist_vert_hor > intra_hor_ver_dist_thresh[log2_size - 3]) { + int threshold = 1 << (BIT_DEPTH - 5); + if (s->sps->sps_strong_intra_smoothing_enable_flag && c_idx == 0 && + log2_size == 5 && + FFABS(top[-1] + top[63] - 2 * top[31]) < threshold && + FFABS(left[-1] + left[63] - 2 * left[31]) < threshold) { + // We can't just overwrite values in top because it could be + // a pointer into src + filtered_top[-1] = top[-1]; + filtered_top[63] = top[63]; + for (i = 0; i < 63; i++) + filtered_top[i] = ((64 - (i + 1)) * top[-1] + + (i + 1) * top[63] + 32) >> 6; + for (i = 0; i < 63; i++) + left[i] = ((64 - (i + 1)) * left[-1] + + (i + 1) * left[63] + 32) >> 6; + top = filtered_top; + } else { + filtered_left[2 * size - 1] = left[2 * size - 1]; + filtered_top[2 * size - 1] = top[2 * size - 1]; + for (i = 2 * size - 2; i >= 0; i--) + filtered_left[i] = (left[i + 1] + 2 * left[i] + + left[i - 1] + 2) >> 2; + filtered_top[-1] = + filtered_left[-1] = (left[0] + 2 * left[-1] + top[0] + 2) >> 2; + for (i = 2 * size - 2; i >= 0; i--) + filtered_top[i] = (top[i + 1] + 2 * top[i] + + top[i - 1] + 2) >> 2; + left = filtered_left; + top = filtered_top; + } } } } @@ -394,8 +405,8 @@ static void FUNC(pred_dc)(uint8_t *_src, const uint8_t *_top, a = PIXEL_SPLAT_X4(dc); for (i = 0; i < size; i++) - for (j = 0; j < size / 4; j++) - AV_WN4PA(&POS(j * 4, i), a); + for (j = 0; j < size; j+=4) + AV_WN4P(&POS(j, i), a); if (c_idx == 0 && size < 32) { POS(0, 0) = (left[0] + 2 * dc + top[0] + 2) >> 2; @@ -427,7 +438,7 @@ static av_always_inline void FUNC(pred_angular)(uint8_t *_src, }; int angle = intra_pred_angle[mode - 2]; - pixel ref_array[3 * MAX_TB_SIZE + 1]; + pixel ref_array[3 * MAX_TB_SIZE + 4]; pixel *ref_tmp = ref_array + size; const pixel *ref; int last = (size * angle) >> 5; @@ -435,8 +446,8 @@ static av_always_inline void FUNC(pred_angular)(uint8_t *_src, if (mode >= 18) { ref = top - 1; if (angle < 0 && last < -1) { - for (x = 0; x <= size; x++) - ref_tmp[x] = top[x - 1]; + for (x = 0; x <= size; x += 4) + AV_WN4P(&ref_tmp[x], AV_RN4P(&top[x - 1])); for (x = last; x <= -1; x++) ref_tmp[x] = left[-1 + ((x * inv_angle[mode - 11] + 128) >> 8)]; ref = ref_tmp; @@ -446,13 +457,19 @@ static av_always_inline void FUNC(pred_angular)(uint8_t *_src, int idx = ((y + 1) * angle) >> 5; int fact = ((y + 1) * angle) & 31; if (fact) { - for (x = 0; x < size; x++) { - POS(x, y) = ((32 - fact) * ref[x + idx + 1] + - fact * ref[x + idx + 2] + 16) >> 5; + for (x = 0; x < size; x += 4) { + POS(x , y) = ((32 - fact) * ref[x + idx + 1] + + fact * ref[x + idx + 2] + 16) >> 5; + POS(x + 1, y) = ((32 - fact) * ref[x + 1 + idx + 1] + + fact * ref[x + 1 + idx + 2] + 16) >> 5; + POS(x + 2, y) = ((32 - fact) * ref[x + 2 + idx + 1] + + fact * ref[x + 2 + idx + 2] + 16) >> 5; + POS(x + 3, y) = ((32 - fact) * ref[x + 3 + idx + 1] + + fact * ref[x + 3 + idx + 2] + 16) >> 5; } } else { - for (x = 0; x < size; x++) - POS(x, y) = ref[x + idx + 1]; + for (x = 0; x < size; x += 4) + AV_WN4P(&POS(x, y), AV_RN4P(&ref[x + idx + 1])); } } if (mode == 26 && c_idx == 0 && size < 32) { @@ -462,8 +479,8 @@ static av_always_inline void FUNC(pred_angular)(uint8_t *_src, } else { ref = left - 1; if (angle < 0 && last < -1) { - for (x = 0; x <= size; x++) - ref_tmp[x] = left[x - 1]; + for (x = 0; x <= size; x += 4) + AV_WN4P(&ref_tmp[x], AV_RN4P(&left[x - 1])); for (x = last; x <= -1; x++) ref_tmp[x] = top[-1 + ((x * inv_angle[mode - 11] + 128) >> 8)]; ref = ref_tmp; @@ -483,8 +500,12 @@ static av_always_inline void FUNC(pred_angular)(uint8_t *_src, } } if (mode == 10 && c_idx == 0 && size < 32) { - for (x = 0; x < size; x++) - POS(x, 0) = av_clip_pixel(left[0] + ((top[x] - top[-1]) >> 1)); + for (x = 0; x < size; x += 4) { + POS(x, 0) = av_clip_pixel(left[0] + ((top[x ] - top[-1]) >> 1)); + POS(x + 1, 0) = av_clip_pixel(left[0] + ((top[x + 1] - top[-1]) >> 1)); + POS(x + 2, 0) = av_clip_pixel(left[0] + ((top[x + 2] - top[-1]) >> 1)); + POS(x + 3, 0) = av_clip_pixel(left[0] + ((top[x + 3] - top[-1]) >> 1)); + } } } } |