diff options
-rw-r--r-- | vp9/encoder/vp9_mcomp.c | 227 | ||||
-rw-r--r-- | vp9/encoder/vp9_mcomp.h | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_speed_features.c | 4 | ||||
-rw-r--r-- | vp9/encoder/vp9_speed_features.h | 2 |
4 files changed, 191 insertions, 44 deletions
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 89c37d98e..258b45994 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -286,7 +286,71 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) { bestmv->row *= 8; \ bestmv->col *= 8; -int vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x, +#if CONFIG_VP9_HIGHBITDEPTH +#define SETUP_CENTER_ERROR \ + if (second_pred != NULL) { \ + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { \ + DECLARE_ALIGNED_ARRAY(16, uint16_t, comp_pred16, 64 * 64); \ + vp9_high_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset, \ + y_stride); \ + besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, z, src_stride, \ + sse1); \ + } else { \ + DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); \ + vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); \ + besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); \ + } \ + } else { \ + besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1); \ + } \ + *distortion = besterr; \ + besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); + +#else + +#define SETUP_CENTER_ERROR \ + if (second_pred != NULL) { \ + DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); \ + vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); \ + besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); \ + } else { \ + besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1); \ + } \ + *distortion = besterr; \ + besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); +#endif // CONFIG_VP9_HIGHBITDEPTH + + + + +static INLINE int divide_and_round(const int n, const int d) { + return ((n < 0) ^ (d < 0)) ? ((n - d / 2) / d) : ((n + d / 2) / d); +} + +static INLINE int is_sad_list_wellbehaved(int *sad_list) { + return sad_list[0] >= sad_list[1] && + sad_list[0] >= sad_list[2] && + sad_list[0] >= sad_list[3] && + sad_list[0] >= sad_list[4]; +} + +// Returns surface minima estimate at given precision in 1/2^n bits. +// Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C +// For a given set of costs S0, S1, S2, S3, S4 at points +// (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively, +// the solution for the location of the minima (x0, y0) is given by: +// x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0), +// y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0). +// The code below is an integerized version of that. +static void get_cost_surf_min(int *sad_list, int *ir, int *ic, + int bits) { + *ic = divide_and_round((sad_list[1] - sad_list[3]) * (1 << (bits - 1)), + (sad_list[1] - 2 * sad_list[0] + sad_list[3])); + *ir = divide_and_round((sad_list[4] - sad_list[2]) * (1 << (bits - 1)), + (sad_list[4] - 2 * sad_list[0] + sad_list[2])); +} + +int vp9_find_best_sub_pixel_surface_fit(const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit, @@ -300,30 +364,127 @@ int vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x, const uint8_t *second_pred, int w, int h) { SETUP_SUBPEL_SEARCH; - if (second_pred != NULL) { -#if CONFIG_VP9_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - DECLARE_ALIGNED_ARRAY(16, uint16_t, comp_pred16, 64 * 64); - vp9_high_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset, - y_stride); - besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, z, src_stride, - sse1); - } else { - DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); - vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); - besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); + SETUP_CENTER_ERROR; + (void) halfiters; + (void) quarteriters; + (void) eighthiters; + (void) whichdir; + (void) allow_hp; + (void) forced_stop; + (void) hstep; + + if (sad_list && + sad_list[0] != INT_MAX && sad_list[1] != INT_MAX && + sad_list[2] != INT_MAX && sad_list[3] != INT_MAX && + sad_list[4] != INT_MAX && + is_sad_list_wellbehaved(sad_list)) { + int ir, ic; + unsigned int minpt; + get_cost_surf_min(sad_list, &ir, &ic, 3); + if (ir != 0 || ic != 0) { + CHECK_BETTER(minpt, tr + ir, tc + ic); + } + } + + bestmv->row = br; + bestmv->col = bc; + + if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || + (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) + return INT_MAX; + + return besterr; +} + +int vp9_find_best_sub_pixel_tree_pruned_more(const MACROBLOCK *x, + MV *bestmv, const MV *ref_mv, + int allow_hp, + int error_per_bit, + const vp9_variance_fn_ptr_t *vfp, + int forced_stop, + int iters_per_step, + int *sad_list, + int *mvjcost, int *mvcost[2], + int *distortion, + unsigned int *sse1, + const uint8_t *second_pred, + int w, int h) { + SETUP_SUBPEL_SEARCH; + SETUP_CENTER_ERROR; + if (sad_list && + sad_list[0] != INT_MAX && sad_list[1] != INT_MAX && + sad_list[2] != INT_MAX && sad_list[3] != INT_MAX && + sad_list[4] != INT_MAX && + is_sad_list_wellbehaved(sad_list)) { + unsigned int minpt; + int ir, ic; + get_cost_surf_min(sad_list, &ir, &ic, 1); + if (ir != 0 || ic != 0) { + CHECK_BETTER(minpt, tr + ir * hstep, tc + ic * hstep); } -#else - DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); - vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); - besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); -#endif // CONFIG_VP9_HIGHBITDEPTH } else { - besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1); + FIRST_LEVEL_CHECKS; + if (halfiters > 1) { + SECOND_LEVEL_CHECKS; + } } - *distortion = besterr; - besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); + tr = br; + tc = bc; + + // Each subsequent iteration checks at least one point in common with + // the last iteration could be 2 ( if diag selected) 1/4 pel + + // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only + if (forced_stop != 2) { + hstep >>= 1; + FIRST_LEVEL_CHECKS; + if (quarteriters > 1) { + SECOND_LEVEL_CHECKS; + } + tr = br; + tc = bc; + } + + if (allow_hp && vp9_use_mv_hp(ref_mv) && forced_stop == 0) { + hstep >>= 1; + FIRST_LEVEL_CHECKS; + if (eighthiters > 1) { + SECOND_LEVEL_CHECKS; + } + tr = br; + tc = bc; + } + // These lines insure static analysis doesn't warn that + // tr and tc aren't used after the above point. + (void) tr; + (void) tc; + + bestmv->row = br; + bestmv->col = bc; + + if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || + (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) + return INT_MAX; + + return besterr; +} + +int vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x, + MV *bestmv, const MV *ref_mv, + int allow_hp, + int error_per_bit, + const vp9_variance_fn_ptr_t *vfp, + int forced_stop, + int iters_per_step, + int *sad_list, + int *mvjcost, int *mvcost[2], + int *distortion, + unsigned int *sse1, + const uint8_t *second_pred, + int w, int h) { + SETUP_SUBPEL_SEARCH; + SETUP_CENTER_ERROR; if (sad_list && sad_list[0] != INT_MAX && sad_list[1] != INT_MAX && sad_list[2] != INT_MAX && sad_list[3] != INT_MAX && @@ -415,29 +576,7 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x, const uint8_t *second_pred, int w, int h) { SETUP_SUBPEL_SEARCH; - if (second_pred != NULL) { -#if CONFIG_VP9_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - DECLARE_ALIGNED_ARRAY(16, uint16_t, comp_pred16, 64 * 64); - vp9_high_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset, - y_stride); - besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, z, src_stride, - sse1); - } else { - DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); - vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); - besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); - } -#else - DECLARE_ALIGNED_ARRAY(16, uint8_t, comp_pred, 64 * 64); - vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); - besterr = vfp->vf(comp_pred, w, z, src_stride, sse1); -#endif // CONFIG_VP9_HIGHBITDEPTH - } else { - besterr = vfp->vf(y + offset, y_stride, z, src_stride, sse1); - } - *distortion = besterr; - besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); + SETUP_CENTER_ERROR; (void) sad_list; // to silence compiler warning // Each subsequent iteration checks at least one point in diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h index eb3f3ebab..3156cb21e 100644 --- a/vp9/encoder/vp9_mcomp.h +++ b/vp9/encoder/vp9_mcomp.h @@ -108,6 +108,8 @@ typedef int (fractional_mv_step_fp) ( extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree; extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned; +extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned_more; +extern fractional_mv_step_fp vp9_find_best_sub_pixel_surface_fit; typedef int (*vp9_full_search_fn_t)(const MACROBLOCK *x, const MV *ref_mv, int sad_per_bit, diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index 92e31497e..9368ac1f7 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -421,6 +421,10 @@ void vp9_set_speed_features(VP9_COMP *cpi) { cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree; } else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED) { cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree_pruned; + } else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED_MORE) { + cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree_pruned_more; + } else if (sf->mv.subpel_search_method == SUBPEL_SURFACE_FIT) { + cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_surface_fit; } cpi->mb.optimize = sf->optimize_coefficients == 1 && oxcf->pass != 1; diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index ed8400863..f224e65de 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -79,6 +79,8 @@ typedef enum { typedef enum { SUBPEL_TREE = 0, SUBPEL_TREE_PRUNED = 1, + SUBPEL_TREE_PRUNED_MORE = 2, + SUBPEL_SURFACE_FIT = 3, // Other methods to come } SUBPEL_SEARCH_METHODS; |