summaryrefslogtreecommitdiff
path: root/libavcodec
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2012-09-04 17:04:51 +0200
committerMichael Niedermayer <michaelni@gmx.at>2012-09-04 17:04:51 +0200
commit9dcc4c30f9d8ef70d3c07b4a77fdc507e8766107 (patch)
treeae6bb70dcf614bfbf032b5fe4ac4f712777cd0c3 /libavcodec
parent9de7622927b1b0ec6f8045b17b3116f046f44b87 (diff)
parentb36f87ff90d87687f574d51385f47bb98d14600a (diff)
downloadffmpeg-9dcc4c30f9d8ef70d3c07b4a77fdc507e8766107.tar.gz
Merge remote-tracking branch 'qatar/master'
* qatar/master: configure: add support for bdver1 and bdver2 CPU types. avio: make avio_close NULL the freed buffer pixdesc: cosmetics proresenc: Don't free a buffer not owned by the codec proresenc: Write the full value in one put_bits call adpcmenc: Calculate the IMA_QT predictor without overflow x86: Add convenience macros to check for CPU extensions and flags x86: h264dsp: drop some unnecessary ifdefs around prototype declarations mss12: merge decode_pixel() and decode_top_left_pixel() mss12: reduce SliceContext size from 1067 to 164 KB mss12: move SliceContexts out of the common context into the codec contexts Conflicts: libavformat/aviobuf.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/mss1.c11
-rw-r--r--libavcodec/mss12.c177
-rw-r--r--libavcodec/mss12.h18
-rw-r--r--libavcodec/mss2.c16
-rw-r--r--libavcodec/x86/h264dsp_init.c6
5 files changed, 97 insertions, 131 deletions
diff --git a/libavcodec/mss1.c b/libavcodec/mss1.c
index d622e2ca61..c90485b512 100644
--- a/libavcodec/mss1.c
+++ b/libavcodec/mss1.c
@@ -30,7 +30,7 @@
typedef struct MSS1Context {
MSS12Context ctx;
AVFrame pic;
- SliceContext sc[2];
+ SliceContext sc;
} MSS1Context;
static void arith_normalise(ArithCoder *c)
@@ -89,7 +89,7 @@ static int arith_get_number(ArithCoder *c, int mod_val)
return val;
}
-static int arith_get_prob(ArithCoder *c, int *probs)
+static int arith_get_prob(ArithCoder *c, int16_t *probs)
{
int range = c->high - c->low + 1;
int val = ((c->value - c->low + 1) * probs[0] - 1) / range;
@@ -162,7 +162,8 @@ static int mss1_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
c->pal_stride = -ctx->pic.linesize[0];
c->keyframe = !arith_get_bit(&acoder);
if (c->keyframe) {
- ff_mss12_codec_reset(c);
+ c->corrupted = 0;
+ ff_mss12_slicecontext_reset(&ctx->sc);
pal_changed = decode_pal(c, &acoder);
ctx->pic.key_frame = 1;
ctx->pic.pict_type = AV_PICTURE_TYPE_I;
@@ -172,7 +173,7 @@ static int mss1_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
ctx->pic.key_frame = 0;
ctx->pic.pict_type = AV_PICTURE_TYPE_P;
}
- c->corrupted = ff_mss12_decode_rect(&c->sc[0], &acoder, 0, 0,
+ c->corrupted = ff_mss12_decode_rect(&ctx->sc, &acoder, 0, 0,
avctx->width, avctx->height);
if (c->corrupted)
return AVERROR_INVALIDDATA;
@@ -194,7 +195,7 @@ static av_cold int mss1_decode_init(AVCodecContext *avctx)
c->ctx.avctx = avctx;
avctx->coded_frame = &c->pic;
- ret = ff_mss12_decode_init(&c->ctx, 0);
+ ret = ff_mss12_decode_init(&c->ctx, 0, &c->sc, NULL);
avctx->pix_fmt = PIX_FMT_PAL8;
diff --git a/libavcodec/mss12.c b/libavcodec/mss12.c
index 9283470884..b8df225822 100644
--- a/libavcodec/mss12.c
+++ b/libavcodec/mss12.c
@@ -61,13 +61,9 @@ static void model_reset(Model *m)
m->weights[i] = 1;
m->cum_prob[i] = m->num_syms - i;
}
- m->weights[0] = -1;
- m->idx2sym[0] = -1;
- m->sym2idx[m->num_syms] = -1;
- for (i = 0; i < m->num_syms; i++) {
- m->sym2idx[i] = i + 1;
+ m->weights[0] = 0;
+ for (i = 0; i < m->num_syms; i++)
m->idx2sym[i + 1] = i;
- }
}
static av_cold void model_init(Model *m, int num_syms, int thr_weight)
@@ -75,7 +71,6 @@ static av_cold void model_init(Model *m, int num_syms, int thr_weight)
m->num_syms = num_syms;
m->thr_weight = thr_weight;
m->threshold = num_syms * thr_weight;
- model_reset(m);
}
static void model_rescale_weights(Model *m)
@@ -109,8 +104,6 @@ void ff_mss12_model_update(Model *m, int val)
m->idx2sym[val] = sym2;
m->idx2sym[i] = sym1;
- m->sym2idx[sym1] = i;
- m->sym2idx[sym2] = val;
val = i;
}
@@ -123,7 +116,7 @@ void ff_mss12_model_update(Model *m, int val)
static void pixctx_reset(PixContext *ctx)
{
- int i, j, k;
+ int i, j;
if (!ctx->special_initial_cache)
for (i = 0; i < ctx->cache_size; i++)
@@ -137,16 +130,15 @@ static void pixctx_reset(PixContext *ctx)
model_reset(&ctx->cache_model);
model_reset(&ctx->full_model);
- for (i = 0; i < 4; i++)
- for (j = 0; j < sec_order_sizes[i]; j++)
- for (k = 0; k < 4; k++)
- model_reset(&ctx->sec_models[i][j][k]);
+ for (i = 0; i < 15; i++)
+ for (j = 0; j < 4; j++)
+ model_reset(&ctx->sec_models[i][j]);
}
static av_cold void pixctx_init(PixContext *ctx, int cache_size,
int full_model_syms, int special_initial_cache)
{
- int i, j, k;
+ int i, j, k, idx;
ctx->cache_size = cache_size + 4;
ctx->num_syms = cache_size;
@@ -155,57 +147,36 @@ static av_cold void pixctx_init(PixContext *ctx, int cache_size,
model_init(&ctx->cache_model, ctx->num_syms + 1, THRESH_LOW);
model_init(&ctx->full_model, full_model_syms, THRESH_HIGH);
- for (i = 0; i < 4; i++)
- for (j = 0; j < sec_order_sizes[i]; j++)
+ for (i = 0, idx = 0; i < 4; i++)
+ for (j = 0; j < sec_order_sizes[i]; j++, idx++)
for (k = 0; k < 4; k++)
- model_init(&ctx->sec_models[i][j][k], 2 + i,
+ model_init(&ctx->sec_models[idx][k], 2 + i,
i ? THRESH_LOW : THRESH_ADAPTIVE);
}
-static int decode_top_left_pixel(ArithCoder *acoder, PixContext *pctx)
-{
- int i, val, pix;
-
- val = acoder->get_model_sym(acoder, &pctx->cache_model);
- if (val < pctx->num_syms) {
- pix = pctx->cache[val];
- } else {
- pix = acoder->get_model_sym(acoder, &pctx->full_model);
- for (i = 0; i < pctx->cache_size - 1; i++)
- if (pctx->cache[i] == pix)
- break;
- val = i;
- }
- if (val) {
- for (i = val; i > 0; i--)
- pctx->cache[i] = pctx->cache[i - 1];
- pctx->cache[0] = pix;
- }
-
- return pix;
-}
-
-static int decode_pixel(ArithCoder *acoder, PixContext *pctx,
- uint8_t *ngb, int num_ngb)
+static av_always_inline int decode_pixel(ArithCoder *acoder, PixContext *pctx,
+ uint8_t *ngb, int num_ngb, int any_ngb)
{
int i, val, pix;
val = acoder->get_model_sym(acoder, &pctx->cache_model);
if (val < pctx->num_syms) {
- int idx, j;
-
- idx = 0;
- for (i = 0; i < pctx->cache_size; i++) {
- for (j = 0; j < num_ngb; j++)
- if (pctx->cache[i] == ngb[j])
- break;
- if (j == num_ngb) {
- if (idx == val)
- break;
- idx++;
+ if (any_ngb) {
+ int idx, j;
+
+ idx = 0;
+ for (i = 0; i < pctx->cache_size; i++) {
+ for (j = 0; j < num_ngb; j++)
+ if (pctx->cache[i] == ngb[j])
+ break;
+ if (j == num_ngb) {
+ if (idx == val)
+ break;
+ idx++;
+ }
}
+ val = FFMIN(i, pctx->cache_size - 1);
}
- val = FFMIN(i, pctx->cache_size - 1);
pix = pctx->cache[val];
} else {
pix = acoder->get_model_sym(acoder, &pctx->full_model);
@@ -268,50 +239,52 @@ static int decode_pixel_in_context(ArithCoder *acoder, PixContext *pctx,
switch (nlen) {
case 1:
- case 4:
layer = 0;
break;
case 2:
if (neighbours[TOP] == neighbours[TOP_LEFT]) {
if (neighbours[TOP_RIGHT] == neighbours[TOP_LEFT])
- layer = 3;
+ layer = 1;
else if (neighbours[LEFT] == neighbours[TOP_LEFT])
layer = 2;
else
- layer = 4;
+ layer = 3;
} else if (neighbours[TOP_RIGHT] == neighbours[TOP_LEFT]) {
if (neighbours[LEFT] == neighbours[TOP_LEFT])
- layer = 1;
+ layer = 4;
else
layer = 5;
} else if (neighbours[LEFT] == neighbours[TOP_LEFT]) {
layer = 6;
} else {
- layer = 0;
+ layer = 7;
}
break;
case 3:
if (neighbours[TOP] == neighbours[TOP_LEFT])
- layer = 0;
+ layer = 8;
else if (neighbours[TOP_RIGHT] == neighbours[TOP_LEFT])
- layer = 1;
+ layer = 9;
else if (neighbours[LEFT] == neighbours[TOP_LEFT])
- layer = 2;
+ layer = 10;
else if (neighbours[TOP_RIGHT] == neighbours[TOP])
- layer = 3;
+ layer = 11;
else if (neighbours[TOP] == neighbours[LEFT])
- layer = 4;
+ layer = 12;
else
- layer = 5;
+ layer = 13;
+ break;
+ case 4:
+ layer = 14;
break;
}
pix = acoder->get_model_sym(acoder,
- &pctx->sec_models[nlen - 1][layer][sub]);
+ &pctx->sec_models[layer][sub]);
if (pix < nlen)
return ref_pix[pix];
else
- return decode_pixel(acoder, pctx, ref_pix, nlen);
+ return decode_pixel(acoder, pctx, ref_pix, nlen, 1);
}
static int decode_region(ArithCoder *acoder, uint8_t *dst, uint8_t *rgb_pic,
@@ -326,7 +299,7 @@ static int decode_region(ArithCoder *acoder, uint8_t *dst, uint8_t *rgb_pic,
for (j = 0; j < height; j++) {
for (i = 0; i < width; i++) {
if (!i && !j)
- p = decode_top_left_pixel(acoder, pctx);
+ p = decode_pixel(acoder, pctx, NULL, 0, 0);
else
p = decode_pixel_in_context(acoder, pctx, dst + i, stride,
i, j, width - i - 1);
@@ -418,7 +391,7 @@ static int decode_region_masked(MSS12Context const *c, ArithCoder *acoder,
return -1;
} else if (mask[i] != 0x80) {
if (!i && !j)
- p = decode_top_left_pixel(acoder, pctx);
+ p = decode_pixel(acoder, pctx, NULL, 0, 0);
else
p = decode_pixel_in_context(acoder, pctx, dst + i, stride,
i, j, width - i - 1);
@@ -435,39 +408,30 @@ static int decode_region_masked(MSS12Context const *c, ArithCoder *acoder,
return 0;
}
-static av_cold void codec_init(MSS12Context *c, int version)
+static av_cold void slicecontext_init(SliceContext *sc,
+ int version, int full_model_syms)
{
- int i;
- for (i = 0; i < (c->slice_split ? 2 : 1); i++) {
- c->sc[i].c = c;
- model_init(&c->sc[i].intra_region, 2, THRESH_ADAPTIVE);
- model_init(&c->sc[i].inter_region, 2, THRESH_ADAPTIVE);
- model_init(&c->sc[i].split_mode, 3, THRESH_HIGH);
- model_init(&c->sc[i].edge_mode, 2, THRESH_HIGH);
- model_init(&c->sc[i].pivot, 3, THRESH_LOW);
-
- pixctx_init(&c->sc[i].intra_pix_ctx, 8, c->full_model_syms, 0);
-
- pixctx_init(&c->sc[i].inter_pix_ctx, version ? 3 : 2,
- c->full_model_syms, version ? 1 : 0);
- }
- c->corrupted = 1;
+ model_init(&sc->intra_region, 2, THRESH_ADAPTIVE);
+ model_init(&sc->inter_region, 2, THRESH_ADAPTIVE);
+ model_init(&sc->split_mode, 3, THRESH_HIGH);
+ model_init(&sc->edge_mode, 2, THRESH_HIGH);
+ model_init(&sc->pivot, 3, THRESH_LOW);
+
+ pixctx_init(&sc->intra_pix_ctx, 8, full_model_syms, 0);
+
+ pixctx_init(&sc->inter_pix_ctx, version ? 3 : 2,
+ full_model_syms, version ? 1 : 0);
}
-void ff_mss12_codec_reset(MSS12Context *c)
+void ff_mss12_slicecontext_reset(SliceContext *sc)
{
- int i;
- for (i = 0; i < (c->slice_split ? 2 : 1); i++) {
- model_reset(&c->sc[i].intra_region);
- model_reset(&c->sc[i].inter_region);
- model_reset(&c->sc[i].split_mode);
- model_reset(&c->sc[i].edge_mode);
- model_reset(&c->sc[i].pivot);
- pixctx_reset(&c->sc[i].intra_pix_ctx);
- pixctx_reset(&c->sc[i].inter_pix_ctx);
- }
-
- c->corrupted = 0;
+ model_reset(&sc->intra_region);
+ model_reset(&sc->inter_region);
+ model_reset(&sc->split_mode);
+ model_reset(&sc->edge_mode);
+ model_reset(&sc->pivot);
+ pixctx_reset(&sc->intra_pix_ctx);
+ pixctx_reset(&sc->inter_pix_ctx);
}
static int decode_pivot(SliceContext *sc, ArithCoder *acoder, int base)
@@ -505,7 +469,7 @@ static int decode_region_intra(SliceContext *sc, ArithCoder *acoder,
uint8_t *dst = c->pal_pic + x + y * stride;
uint8_t *rgb_dst = c->rgb_pic + x * 3 + y * rgb_stride;
- pix = decode_top_left_pixel(acoder, &sc->intra_pix_ctx);
+ pix = decode_pixel(acoder, &sc->intra_pix_ctx, NULL, 0, 0);
rgb_pix = c->pal[pix];
for (i = 0; i < height; i++, dst += stride, rgb_dst += rgb_stride) {
memset(dst, pix, width);
@@ -531,7 +495,7 @@ static int decode_region_inter(SliceContext *sc, ArithCoder *acoder,
mode = acoder->get_model_sym(acoder, &sc->inter_region);
if (!mode) {
- mode = decode_top_left_pixel(acoder, &sc->inter_pix_ctx);
+ mode = decode_pixel(acoder, &sc->inter_pix_ctx, NULL, 0, 0);
if (c->avctx->err_recognition & AV_EF_EXPLODE &&
( c->rgb_pic && mode != 0x01 && mode != 0x02 && mode != 0x04 ||
@@ -595,7 +559,8 @@ int ff_mss12_decode_rect(SliceContext *sc, ArithCoder *acoder,
return 0;
}
-av_cold int ff_mss12_decode_init(MSS12Context *c, int version)
+av_cold int ff_mss12_decode_init(MSS12Context *c, int version,
+ SliceContext* sc1, SliceContext *sc2)
{
AVCodecContext *avctx = c->avctx;
int i;
@@ -690,7 +655,13 @@ av_cold int ff_mss12_decode_init(MSS12Context *c, int version)
return AVERROR(ENOMEM);
}
- codec_init(c, version);
+ sc1->c = c;
+ slicecontext_init(sc1, version, c->full_model_syms);
+ if (c->slice_split) {
+ sc2->c = c;
+ slicecontext_init(sc2, version, c->full_model_syms);
+ }
+ c->corrupted = 1;
return 0;
}
diff --git a/libavcodec/mss12.h b/libavcodec/mss12.h
index 9068651e06..ba80030ee3 100644
--- a/libavcodec/mss12.h
+++ b/libavcodec/mss12.h
@@ -38,10 +38,9 @@
#define THRESH_HIGH 50
typedef struct Model {
- int cum_prob[MODEL_MAX_SYMS + 1];
- int weights[MODEL_MAX_SYMS + 1];
- int idx2sym[MODEL_MAX_SYMS + 1];
- int sym2idx[MODEL_MAX_SYMS + 1];
+ int16_t cum_prob[MODEL_MAX_SYMS + 1];
+ int16_t weights[MODEL_MAX_SYMS + 1];
+ uint8_t idx2sym[MODEL_MAX_SYMS + 1];
int num_syms;
int thr_weight, threshold;
} Model;
@@ -60,7 +59,7 @@ typedef struct PixContext {
int cache_size, num_syms;
uint8_t cache[12];
Model cache_model, full_model;
- Model sec_models[4][8][4];
+ Model sec_models[15][4];
int special_initial_cache;
} PixContext;
@@ -86,21 +85,18 @@ typedef struct MSS12Context {
int rgb_stride;
int free_colours;
int keyframe;
- Model intra_region, inter_region;
- Model pivot, edge_mode, split_mode;
- PixContext intra_pix_ctx, inter_pix_ctx;
int mvX, mvY;
int corrupted;
int slice_split;
int full_model_syms;
- SliceContext sc[2];
} MSS12Context;
int ff_mss12_decode_rect(SliceContext *ctx, ArithCoder *acoder,
int x, int y, int width, int height);
void ff_mss12_model_update(Model *m, int val);
-void ff_mss12_codec_reset(MSS12Context *ctx);
-av_cold int ff_mss12_decode_init(MSS12Context *ctx, int version);
+void ff_mss12_slicecontext_reset(SliceContext *sc);
+av_cold int ff_mss12_decode_init(MSS12Context *c, int version,
+ SliceContext* sc1, SliceContext *sc2);
av_cold int ff_mss12_decode_end(MSS12Context *ctx);
#define ARITH_GET_BIT(VERSION) \
diff --git a/libavcodec/mss2.c b/libavcodec/mss2.c
index 886da05540..fc9ebff4d7 100644
--- a/libavcodec/mss2.c
+++ b/libavcodec/mss2.c
@@ -106,7 +106,7 @@ static int arith2_get_number(ArithCoder *c, int n)
return val;
}
-static int arith2_get_prob(ArithCoder *c, int *probs)
+static int arith2_get_prob(ArithCoder *c, int16_t *probs)
{
int range = c->high - c->low + 1, n = *probs;
int scale = av_log2(range) - av_log2(n);
@@ -671,14 +671,18 @@ static int mss2_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
buf += get_bits_count(&gb) >> 3;
buf_size -= get_bits_count(&gb) >> 3;
} else {
- if (keyframe)
- ff_mss12_codec_reset(c);
+ if (keyframe) {
+ c->corrupted = 0;
+ ff_mss12_slicecontext_reset(&ctx->sc[0]);
+ if (c->slice_split)
+ ff_mss12_slicecontext_reset(&ctx->sc[1]);
+ }
else if (c->corrupted)
return AVERROR_INVALIDDATA;
bytestream2_init(&gB, buf, buf_size + ARITH2_PADDING);
arith2_init(&acoder, &gB);
c->keyframe = keyframe;
- if (c->corrupted = ff_mss12_decode_rect(&c->sc[0], &acoder, 0, 0,
+ if (c->corrupted = ff_mss12_decode_rect(&ctx->sc[0], &acoder, 0, 0,
avctx->width,
ctx->split_position))
return AVERROR_INVALIDDATA;
@@ -690,7 +694,7 @@ static int mss2_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
return AVERROR_INVALIDDATA;
bytestream2_init(&gB, buf, buf_size + ARITH2_PADDING);
arith2_init(&acoder, &gB);
- if (c->corrupted = ff_mss12_decode_rect(&c->sc[1], &acoder, 0,
+ if (c->corrupted = ff_mss12_decode_rect(&ctx->sc[1], &acoder, 0,
ctx->split_position,
avctx->width,
avctx->height - ctx->split_position))
@@ -830,7 +834,7 @@ static av_cold int mss2_decode_init(AVCodecContext *avctx)
int ret;
c->avctx = avctx;
avctx->coded_frame = &ctx->pic;
- if (ret = ff_mss12_decode_init(c, 1))
+ if (ret = ff_mss12_decode_init(c, 1, &ctx->sc[0], &ctx->sc[1]))
return ret;
c->pal_stride = c->mask_stride;
c->pal_pic = av_malloc(c->pal_stride * avctx->height);
diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c
index 472f252ba9..6c83d8ce58 100644
--- a/libavcodec/x86/h264dsp_init.c
+++ b/libavcodec/x86/h264dsp_init.c
@@ -39,11 +39,9 @@ IDCT_ADD_FUNC(8_dc, 10, sse2)
IDCT_ADD_FUNC(8, 8, mmx)
IDCT_ADD_FUNC(8, 8, sse2)
IDCT_ADD_FUNC(8, 10, sse2)
-#if HAVE_AVX_EXTERNAL
IDCT_ADD_FUNC(, 10, avx)
IDCT_ADD_FUNC(8_dc, 10, avx)
IDCT_ADD_FUNC(8, 10, avx)
-#endif
#define IDCT_ADD_REP_FUNC(NUM, REP, DEPTH, OPT) \
@@ -64,10 +62,8 @@ IDCT_ADD_REP_FUNC(, 16intra, 8, mmx)
IDCT_ADD_REP_FUNC(, 16intra, 8, mmx2)
IDCT_ADD_REP_FUNC(, 16intra, 8, sse2)
IDCT_ADD_REP_FUNC(, 16intra, 10, sse2)
-#if HAVE_AVX_EXTERNAL
IDCT_ADD_REP_FUNC(, 16, 10, avx)
IDCT_ADD_REP_FUNC(, 16intra, 10, avx)
-#endif
#define IDCT_ADD_REP_FUNC2(NUM, REP, DEPTH, OPT) \
@@ -79,9 +75,7 @@ IDCT_ADD_REP_FUNC2(, 8, 8, mmx)
IDCT_ADD_REP_FUNC2(, 8, 8, mmx2)
IDCT_ADD_REP_FUNC2(, 8, 8, sse2)
IDCT_ADD_REP_FUNC2(, 8, 10, sse2)
-#if HAVE_AVX_EXTERNAL
IDCT_ADD_REP_FUNC2(, 8, 10, avx)
-#endif
void ff_h264_luma_dc_dequant_idct_mmx(DCTELEM *output, DCTELEM *input, int qmul);
void ff_h264_luma_dc_dequant_idct_sse2(DCTELEM *output, DCTELEM *input, int qmul);