summaryrefslogtreecommitdiff
path: root/libavcodec/diracdec.c
diff options
context:
space:
mode:
authorLynne <dev@lynne.ee>2020-03-01 11:23:53 +0000
committerLynne <dev@lynne.ee>2020-03-12 20:26:48 +0000
commit675bb1f4f9de76f2c0eb1c8b1be6781a2cd52d29 (patch)
tree482aee9776d2e790c5b18bc11cd51cee300b1bd7 /libavcodec/diracdec.c
parentd778be6e4a0565e9a96adec57339e4c8a2464664 (diff)
downloadffmpeg-675bb1f4f9de76f2c0eb1c8b1be6781a2cd52d29.tar.gz
diracdec: rewrite golomb reader
This version is able to output multiple coefficients at a time and is able to altogether remove actual golomb code parsing. Its also able to partially recover the last coefficient in case the packet is incomplete. Total decoder performance gain for 8bit 420 1080p lossless: 40%. Total decoder performance gain for 10bit 420 1080p lossless: 40%. clang was able to vectorize the loop much better than my handwritten assembly, but gcc was very naive and didn't. Lookup table is a rewritten version of vc2hqdecode.
Diffstat (limited to 'libavcodec/diracdec.c')
-rw-r--r--libavcodec/diracdec.c12
1 files changed, 4 insertions, 8 deletions
diff --git a/libavcodec/diracdec.c b/libavcodec/diracdec.c
index 22ec913bf7..ed42bc366a 100644
--- a/libavcodec/diracdec.c
+++ b/libavcodec/diracdec.c
@@ -136,7 +136,6 @@ typedef struct DiracContext {
MpegvideoEncDSPContext mpvencdsp;
VideoDSPContext vdsp;
DiracDSPContext diracdsp;
- DiracGolombLUT *reader_ctx;
DiracVersionInfo version;
GetBitContext gb;
AVDiracSeqHeader seq;
@@ -395,7 +394,6 @@ static av_cold int dirac_decode_init(AVCodecContext *avctx)
s->threads_num_buf = -1;
s->thread_buf_size = -1;
- ff_dirac_golomb_reader_init(&s->reader_ctx);
ff_diracdsp_init(&s->diracdsp);
ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
ff_videodsp_init(&s->vdsp, 8);
@@ -428,8 +426,6 @@ static av_cold int dirac_decode_end(AVCodecContext *avctx)
DiracContext *s = avctx->priv_data;
int i;
- ff_dirac_golomb_reader_end(&s->reader_ctx);
-
dirac_decode_flush(avctx);
for (i = 0; i < MAX_FRAMES; i++)
av_frame_free(&s->all_frames[i].avframe);
@@ -881,11 +877,11 @@ static int decode_hq_slice(DiracContext *s, DiracSlice *slice, uint8_t *tmp_buf)
coef_num = subband_coeffs(s, slice->slice_x, slice->slice_y, i, coeffs_num);
if (s->pshift)
- coef_par = ff_dirac_golomb_read_32bit(s->reader_ctx, addr,
- length, tmp_buf, coef_num);
+ coef_par = ff_dirac_golomb_read_32bit(addr, length,
+ tmp_buf, coef_num);
else
- coef_par = ff_dirac_golomb_read_16bit(s->reader_ctx, addr,
- length, tmp_buf, coef_num);
+ coef_par = ff_dirac_golomb_read_16bit(addr, length,
+ tmp_buf, coef_num);
if (coef_num > coef_par) {
const int start_b = coef_par * (1 << (s->pshift + 1));