avcodec/pixlet: postprocess luma using precalculated lut

Realtime decoding speed raises from 1.08 to 1.84 for 1504x846, 25391 kb/s, 24 fps video.
author: Paul B Mahol <onemda@gmail.com> 2020-09-06 13:44:52 +0200
committer: Paul B Mahol <onemda@gmail.com> 2020-09-06 13:58:09 +0200
commit: bc8662984261f422b49fdfff4cb273adde06917a (patch)
tree: 5a1f82030eebfcf38ce0a06d26b7b6599ab6501b /libavcodec/pixlet.c
parent: d182d8f10cf69c59ef9c21df4b06e5478df063ef (diff)
download: ffmpeg-bc8662984261f422b49fdfff4cb273adde06917a.tar.gz
1 files changed, 26 insertions, 8 deletions
diff --git a/libavcodec/pixlet.c b/libavcodec/pixlet.c
index 78f571cd5f..42acd683fa 100644
--- a/libavcodec/pixlet.c
+++ b/libavcodec/pixlet.c
@@ -58,6 +58,7 @@ typedef struct PixletContext {
     int16_t *filter[2];
     int16_t *prediction;
     int64_t scaling[4][2][NB_LEVELS];
+    uint16_t lut[65536];
     SubBand band[4][NB_LEVELS * 3 + 1];
 } PixletContext;
 
@@ -462,11 +463,27 @@ static void reconstruction(AVCodecContext *avctx, int16_t *dest,
     }
 }
 
-static void postprocess_luma(AVFrame *frame, int w, int h, int depth)
+static void build_luma_lut(AVCodecContext *avctx, int depth)
 {
+    PixletContext *ctx = avctx->priv_data;
+    int max = (1 << depth) - 1;
+
+    if (ctx->depth == depth)
+        return;
+    ctx->depth = depth;
+
+    for (int i = 0; i < FF_ARRAY_ELEMS(ctx->lut); i++)
+        ctx->lut[i] = ((int64_t)i * i * 65535LL) / max / max;
+}
+
+static void postprocess_luma(AVCodecContext *avctx, AVFrame *frame,
+                             int w, int h, int depth)
+{
+    PixletContext *ctx = avctx->priv_data;
     uint16_t *dsty = (uint16_t *)frame->data[0];
     int16_t *srcy  = (int16_t *)frame->data[0];
     ptrdiff_t stridey = frame->linesize[0] / 2;
+    uint16_t *lut = ctx->lut;
     int i, j;
 
     for (j = 0; j < h; j++) {
@@ -476,8 +493,7 @@ static void postprocess_luma(AVFrame *frame, int w, int h, int depth)
             else if (srcy[i] > ((1 << depth) - 1))
                 dsty[i] = 65535;
             else
-                dsty[i] = ((int64_t) srcy[i] * srcy[i] * 65535) /
-                          ((1 << depth) - 1) / ((1 << depth) - 1);
+                dsty[i] = lut[srcy[i]];
         }
         dsty += stridey;
         srcy += stridey;
@@ -591,7 +607,7 @@ static int pixlet_decode_frame(AVCodecContext *avctx, void *data,
     int i, w, h, width, height, ret, version;
     AVFrame *p = data;
     ThreadFrame frame = { .f = data };
-    uint32_t pktsize;
+    uint32_t pktsize, depth;
 
     bytestream2_init(&ctx->gb, avpkt->data, avpkt->size);
 
@@ -623,12 +639,14 @@ static int pixlet_decode_frame(AVCodecContext *avctx, void *data,
     ctx->levels = bytestream2_get_be32(&ctx->gb);
     if (ctx->levels != NB_LEVELS)
         return AVERROR_INVALIDDATA;
-    ctx->depth = bytestream2_get_be32(&ctx->gb);
-    if (ctx->depth < 8 || ctx->depth > 15) {
-        avpriv_request_sample(avctx, "Depth %d", ctx->depth);
+    depth = bytestream2_get_be32(&ctx->gb);
+    if (depth < 8 || depth > 15) {
+        avpriv_request_sample(avctx, "Depth %d", depth);
         return AVERROR_INVALIDDATA;
     }
 
+    build_luma_lut(avctx, depth);
+
     ret = ff_set_dimensions(avctx, w, h);
     if (ret < 0)
         return ret;
@@ -667,7 +685,7 @@ static int pixlet_decode_frame(AVCodecContext *avctx, void *data,
             break;
     }
 
-    postprocess_luma(frame.f, ctx->w, ctx->h, ctx->depth);
+    postprocess_luma(avctx, frame.f, ctx->w, ctx->h, ctx->depth);
     postprocess_chroma(frame.f, ctx->w >> 1, ctx->h >> 1, ctx->depth);
 
     *got_frame = 1;
author	Paul B Mahol <onemda@gmail.com>	2020-09-06 13:44:52 +0200
committer	Paul B Mahol <onemda@gmail.com>	2020-09-06 13:58:09 +0200
commit	bc8662984261f422b49fdfff4cb273adde06917a (patch)
tree	5a1f82030eebfcf38ce0a06d26b7b6599ab6501b /libavcodec/pixlet.c
parent	d182d8f10cf69c59ef9c21df4b06e5478df063ef (diff)
download	ffmpeg-bc8662984261f422b49fdfff4cb273adde06917a.tar.gz