From 22ec08081956084687e0824ffd0ff4d768584143 Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Tue, 18 Sep 2018 11:34:07 +0200 Subject: gpu: ipu-cpmem: add WARN_ON_ONCE() for unaligned dma buffers Add a WARN_ON_ONCE() if either the Y/packed buffer, or the U/V offsets, are not aligned on 8-byte boundaries. This will catch alignment bugs in DRM, V4L2. Signed-off-by: Steve Longerbeam Tested-by: Philipp Zabel Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-cpmem.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu/ipu-v3') diff --git a/drivers/gpu/ipu-v3/ipu-cpmem.c b/drivers/gpu/ipu-v3/ipu-cpmem.c index a9d2501500a1..7e65954f13c2 100644 --- a/drivers/gpu/ipu-v3/ipu-cpmem.c +++ b/drivers/gpu/ipu-v3/ipu-cpmem.c @@ -259,6 +259,8 @@ EXPORT_SYMBOL_GPL(ipu_cpmem_set_high_priority); void ipu_cpmem_set_buffer(struct ipuv3_channel *ch, int bufnum, dma_addr_t buf) { + WARN_ON_ONCE(buf & 0x7); + if (bufnum) ipu_ch_param_write_field(ch, IPU_FIELD_EBA1, buf >> 3); else @@ -268,6 +270,8 @@ EXPORT_SYMBOL_GPL(ipu_cpmem_set_buffer); void ipu_cpmem_set_uv_offset(struct ipuv3_channel *ch, u32 u_off, u32 v_off) { + WARN_ON_ONCE((u_off & 0x7) || (v_off & 0x7)); + ipu_ch_param_write_field(ch, IPU_FIELD_UBO, u_off / 8); ipu_ch_param_write_field(ch, IPU_FIELD_VBO, v_off / 8); } @@ -435,6 +439,8 @@ void ipu_cpmem_set_yuv_planar_full(struct ipuv3_channel *ch, unsigned int uv_stride, unsigned int u_offset, unsigned int v_offset) { + WARN_ON_ONCE((u_offset & 0x7) || (v_offset & 0x7)); + ipu_ch_param_write_field(ch, IPU_FIELD_SLUV, uv_stride - 1); ipu_ch_param_write_field(ch, IPU_FIELD_UBO, u_offset / 8); ipu_ch_param_write_field(ch, IPU_FIELD_VBO, v_offset / 8); -- cgit v1.2.1 From dec408fd23ae81338a55556cd570dacd6bf41046 Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Sat, 6 Oct 2018 14:45:48 -0700 Subject: gpu: ipu-v3: Add chroma plane offset overrides to ipu_cpmem_set_image() Allow the caller of ipu_cpmem_set_image() to override the latters calculation of the chroma plane offsets, by adding override U/V plane offsets to 'struct ipu_image'. Signed-off-by: Steve Longerbeam Tested-by: Philipp Zabel Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-cpmem.c | 46 ++++++++++++++++++++-------------- drivers/gpu/ipu-v3/ipu-image-convert.c | 10 ++++---- 2 files changed, 32 insertions(+), 24 deletions(-) (limited to 'drivers/gpu/ipu-v3') diff --git a/drivers/gpu/ipu-v3/ipu-cpmem.c b/drivers/gpu/ipu-v3/ipu-cpmem.c index 7e65954f13c2..163fadb8a33a 100644 --- a/drivers/gpu/ipu-v3/ipu-cpmem.c +++ b/drivers/gpu/ipu-v3/ipu-cpmem.c @@ -745,48 +745,56 @@ int ipu_cpmem_set_image(struct ipuv3_channel *ch, struct ipu_image *image) switch (pix->pixelformat) { case V4L2_PIX_FMT_YUV420: offset = Y_OFFSET(pix, image->rect.left, image->rect.top); - u_offset = U_OFFSET(pix, image->rect.left, - image->rect.top) - offset; - v_offset = V_OFFSET(pix, image->rect.left, - image->rect.top) - offset; + u_offset = image->u_offset ? + image->u_offset : U_OFFSET(pix, image->rect.left, + image->rect.top) - offset; + v_offset = image->v_offset ? + image->v_offset : V_OFFSET(pix, image->rect.left, + image->rect.top) - offset; ipu_cpmem_set_yuv_planar_full(ch, pix->bytesperline / 2, u_offset, v_offset); break; case V4L2_PIX_FMT_YVU420: offset = Y_OFFSET(pix, image->rect.left, image->rect.top); - u_offset = U_OFFSET(pix, image->rect.left, - image->rect.top) - offset; - v_offset = V_OFFSET(pix, image->rect.left, - image->rect.top) - offset; + u_offset = image->u_offset ? + image->u_offset : V_OFFSET(pix, image->rect.left, + image->rect.top) - offset; + v_offset = image->v_offset ? + image->v_offset : U_OFFSET(pix, image->rect.left, + image->rect.top) - offset; ipu_cpmem_set_yuv_planar_full(ch, pix->bytesperline / 2, - v_offset, u_offset); + u_offset, v_offset); break; case V4L2_PIX_FMT_YUV422P: offset = Y_OFFSET(pix, image->rect.left, image->rect.top); - u_offset = U2_OFFSET(pix, image->rect.left, - image->rect.top) - offset; - v_offset = V2_OFFSET(pix, image->rect.left, - image->rect.top) - offset; + u_offset = image->u_offset ? + image->u_offset : U2_OFFSET(pix, image->rect.left, + image->rect.top) - offset; + v_offset = image->v_offset ? + image->v_offset : V2_OFFSET(pix, image->rect.left, + image->rect.top) - offset; ipu_cpmem_set_yuv_planar_full(ch, pix->bytesperline / 2, u_offset, v_offset); break; case V4L2_PIX_FMT_NV12: offset = Y_OFFSET(pix, image->rect.left, image->rect.top); - u_offset = UV_OFFSET(pix, image->rect.left, - image->rect.top) - offset; - v_offset = 0; + u_offset = image->u_offset ? + image->u_offset : UV_OFFSET(pix, image->rect.left, + image->rect.top) - offset; + v_offset = image->v_offset ? image->v_offset : 0; ipu_cpmem_set_yuv_planar_full(ch, pix->bytesperline, u_offset, v_offset); break; case V4L2_PIX_FMT_NV16: offset = Y_OFFSET(pix, image->rect.left, image->rect.top); - u_offset = UV2_OFFSET(pix, image->rect.left, - image->rect.top) - offset; - v_offset = 0; + u_offset = image->u_offset ? + image->u_offset : UV2_OFFSET(pix, image->rect.left, + image->rect.top) - offset; + v_offset = image->v_offset ? image->v_offset : 0; ipu_cpmem_set_yuv_planar_full(ch, pix->bytesperline, u_offset, v_offset); diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c index f4081962784c..41fb62b88c54 100644 --- a/drivers/gpu/ipu-v3/ipu-image-convert.c +++ b/drivers/gpu/ipu-v3/ipu-image-convert.c @@ -655,12 +655,12 @@ static void init_idmac_channel(struct ipu_image_convert_ctx *ctx, tile_image.pix.pixelformat = image->fmt->fourcc; tile_image.phys0 = addr0; tile_image.phys1 = addr1; - ipu_cpmem_set_image(channel, &tile_image); + if (image->fmt->planar && !rot_swap_width_height) { + tile_image.u_offset = image->tile[tile_idx[0]].u_off; + tile_image.v_offset = image->tile[tile_idx[0]].v_off; + } - if (image->fmt->planar && !rot_swap_width_height) - ipu_cpmem_set_uv_offset(channel, - image->tile[tile_idx[0]].u_off, - image->tile[tile_idx[0]].v_off); + ipu_cpmem_set_image(channel, &tile_image); if (rot_mode) ipu_cpmem_set_rotation(channel, rot_mode); -- cgit v1.2.1 From d0cbc93a0110a82ff9e01d3c21d5a2e2027b706f Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 18 Sep 2018 11:34:08 +0200 Subject: gpu: ipu-v3: ipu-ic: allow to manually set resize coefficients For tiled scaling, we want to compute the scaling coefficients externally in such a way that the interpolation overshoots tile boundaries and samples up to the first pixel of the next tile. Prepare to override the resizing coefficients from the image conversion code. Signed-off-by: Philipp Zabel Acked-by: Steve Longerbeam Tested-by: Steve Longerbeam --- drivers/gpu/ipu-v3/ipu-ic.c | 52 ++++++++++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 19 deletions(-) (limited to 'drivers/gpu/ipu-v3') diff --git a/drivers/gpu/ipu-v3/ipu-ic.c b/drivers/gpu/ipu-v3/ipu-ic.c index 67cc820253a9..594c3cbc8291 100644 --- a/drivers/gpu/ipu-v3/ipu-ic.c +++ b/drivers/gpu/ipu-v3/ipu-ic.c @@ -442,36 +442,40 @@ unlock: } EXPORT_SYMBOL_GPL(ipu_ic_task_graphics_init); -int ipu_ic_task_init(struct ipu_ic *ic, - int in_width, int in_height, - int out_width, int out_height, - enum ipu_color_space in_cs, - enum ipu_color_space out_cs) +int ipu_ic_task_init_rsc(struct ipu_ic *ic, + int in_width, int in_height, + int out_width, int out_height, + enum ipu_color_space in_cs, + enum ipu_color_space out_cs, + u32 rsc) { struct ipu_ic_priv *priv = ic->priv; - u32 reg, downsize_coeff, resize_coeff; + u32 downsize_coeff, resize_coeff; unsigned long flags; int ret = 0; - /* Setup vertical resizing */ - ret = calc_resize_coeffs(ic, in_height, out_height, - &resize_coeff, &downsize_coeff); - if (ret) - return ret; + if (!rsc) { + /* Setup vertical resizing */ - reg = (downsize_coeff << 30) | (resize_coeff << 16); + ret = calc_resize_coeffs(ic, in_height, out_height, + &resize_coeff, &downsize_coeff); + if (ret) + return ret; + + rsc = (downsize_coeff << 30) | (resize_coeff << 16); - /* Setup horizontal resizing */ - ret = calc_resize_coeffs(ic, in_width, out_width, - &resize_coeff, &downsize_coeff); - if (ret) - return ret; + /* Setup horizontal resizing */ + ret = calc_resize_coeffs(ic, in_width, out_width, + &resize_coeff, &downsize_coeff); + if (ret) + return ret; - reg |= (downsize_coeff << 14) | resize_coeff; + rsc |= (downsize_coeff << 14) | resize_coeff; + } spin_lock_irqsave(&priv->lock, flags); - ipu_ic_write(ic, reg, ic->reg->rsc); + ipu_ic_write(ic, rsc, ic->reg->rsc); /* Setup color space conversion */ ic->in_cs = in_cs; @@ -487,6 +491,16 @@ unlock: spin_unlock_irqrestore(&priv->lock, flags); return ret; } + +int ipu_ic_task_init(struct ipu_ic *ic, + int in_width, int in_height, + int out_width, int out_height, + enum ipu_color_space in_cs, + enum ipu_color_space out_cs) +{ + return ipu_ic_task_init_rsc(ic, in_width, in_height, out_width, + out_height, in_cs, out_cs, 0); +} EXPORT_SYMBOL_GPL(ipu_ic_task_init); int ipu_ic_task_idma_init(struct ipu_ic *ic, struct ipuv3_channel *channel, -- cgit v1.2.1 From 819bec35c8c9706185498c9222bd244e0781ad35 Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Wed, 19 Sep 2018 16:07:18 -0700 Subject: gpu: ipu-v3: image-convert: Prevent race between run and unprepare Prevent possible race by parallel threads between ipu_image_convert_run() and ipu_image_convert_unprepare(). This involves setting ctx->aborting to true unconditionally so that no new job runs can be queued during unprepare, and holding the ctx->aborting flag until the context is freed. Note that the "normal" ipu_image_convert_abort() case (e.g. not during context unprepare) should clear the ctx->aborting flag after aborting any active run and clearing the context's pending queue. This is because it should be possible to continue to use the conversion context and queue more runs after an abort. Signed-off-by: Steve Longerbeam Tested-by: Philipp Zabel Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-image-convert.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/ipu-v3') diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c index 41fb62b88c54..6c15bf8efaa2 100644 --- a/drivers/gpu/ipu-v3/ipu-image-convert.c +++ b/drivers/gpu/ipu-v3/ipu-image-convert.c @@ -1524,7 +1524,7 @@ unlock: EXPORT_SYMBOL_GPL(ipu_image_convert_queue); /* Abort any active or pending conversions for this context */ -void ipu_image_convert_abort(struct ipu_image_convert_ctx *ctx) +static void __ipu_image_convert_abort(struct ipu_image_convert_ctx *ctx) { struct ipu_image_convert_chan *chan = ctx->chan; struct ipu_image_convert_priv *priv = chan->priv; @@ -1551,7 +1551,7 @@ void ipu_image_convert_abort(struct ipu_image_convert_ctx *ctx) need_abort = (run_count || active_run); - ctx->aborting = need_abort; + ctx->aborting = true; spin_unlock_irqrestore(&chan->irqlock, flags); @@ -1572,7 +1572,11 @@ void ipu_image_convert_abort(struct ipu_image_convert_ctx *ctx) dev_warn(priv->ipu->dev, "%s: timeout\n", __func__); force_abort(ctx); } +} +void ipu_image_convert_abort(struct ipu_image_convert_ctx *ctx) +{ + __ipu_image_convert_abort(ctx); ctx->aborting = false; } EXPORT_SYMBOL_GPL(ipu_image_convert_abort); @@ -1586,7 +1590,7 @@ void ipu_image_convert_unprepare(struct ipu_image_convert_ctx *ctx) bool put_res; /* make sure no runs are hanging around */ - ipu_image_convert_abort(ctx); + __ipu_image_convert_abort(ctx); dev_dbg(priv->ipu->dev, "%s: task %u: removing ctx %p\n", __func__, chan->ic_task, ctx); -- cgit v1.2.1 From 920340ae1f6949c3c75f96253bb249efa8b80167 Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Wed, 19 Sep 2018 16:13:03 -0700 Subject: gpu: ipu-v3: image-convert: Only wait for abort completion if active run Only wait for the ctx->aborted completion if there is an active run in progress, otherwise the wait will just timeout after 10 seconds. If there is no active run in progress, the done queue just needs to be emptied. Signed-off-by: Steve Longerbeam Tested-by: Philipp Zabel Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-image-convert.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/ipu-v3') diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c index 6c15bf8efaa2..e3e032252604 100644 --- a/drivers/gpu/ipu-v3/ipu-image-convert.c +++ b/drivers/gpu/ipu-v3/ipu-image-convert.c @@ -1562,9 +1562,14 @@ static void __ipu_image_convert_abort(struct ipu_image_convert_ctx *ctx) return; } + if (!active_run) { + empty_done_q(chan); + return; + } + dev_dbg(priv->ipu->dev, - "%s: task %u: wait for completion: %d runs, active run %p\n", - __func__, chan->ic_task, run_count, active_run); + "%s: task %u: wait for completion: %d runs\n", + __func__, chan->ic_task, run_count); ret = wait_for_completion_timeout(&ctx->aborted, msecs_to_jiffies(10000)); -- cgit v1.2.1 From aa60b261c636220fc1dd6b2783c1e28aeab34364 Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Wed, 19 Sep 2018 16:17:15 -0700 Subject: gpu: ipu-v3: image-convert: Allow reentrancy into abort Allow reentrancy into ipu_image_convert_abort(), by moving re-init of ctx->aborted completion under the spin lock, and only if there is an active run, and complete all waiters do_bh(). Note: ipu_image_convert_unprepare() is still _not_ reentrant, and can't be made reentrant. Signed-off-by: Steve Longerbeam Tested-by: Philipp Zabel Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-image-convert.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/ipu-v3') diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c index e3e032252604..abd8afb22b48 100644 --- a/drivers/gpu/ipu-v3/ipu-image-convert.c +++ b/drivers/gpu/ipu-v3/ipu-image-convert.c @@ -896,7 +896,7 @@ static irqreturn_t do_bh(int irq, void *dev_id) dev_dbg(priv->ipu->dev, "%s: task %u: signaling abort for ctx %p\n", __func__, chan->ic_task, ctx); - complete(&ctx->aborted); + complete_all(&ctx->aborted); } } @@ -1533,8 +1533,6 @@ static void __ipu_image_convert_abort(struct ipu_image_convert_ctx *ctx) int run_count, ret; bool need_abort; - reinit_completion(&ctx->aborted); - spin_lock_irqsave(&chan->irqlock, flags); /* move all remaining pending runs in this context to done_q */ @@ -1549,6 +1547,9 @@ static void __ipu_image_convert_abort(struct ipu_image_convert_ctx *ctx) active_run = (chan->current_run && chan->current_run->ctx == ctx) ? chan->current_run : NULL; + if (active_run) + reinit_completion(&ctx->aborted); + need_abort = (run_count || active_run); ctx->aborting = true; -- cgit v1.2.1 From b288adad61054f77b08e14ca2f833f1617119547 Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Wed, 19 Sep 2018 16:20:43 -0700 Subject: gpu: ipu-v3: image-convert: Remove need_abort flag The need_abort flag is not really needed anymore in __ipu_image_convert_abort(), remove it. No functional changes. Signed-off-by: Steve Longerbeam Tested-by: Philipp Zabel Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-image-convert.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers/gpu/ipu-v3') diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c index abd8afb22b48..b8a400182a00 100644 --- a/drivers/gpu/ipu-v3/ipu-image-convert.c +++ b/drivers/gpu/ipu-v3/ipu-image-convert.c @@ -1531,7 +1531,6 @@ static void __ipu_image_convert_abort(struct ipu_image_convert_ctx *ctx) struct ipu_image_convert_run *run, *active_run, *tmp; unsigned long flags; int run_count, ret; - bool need_abort; spin_lock_irqsave(&chan->irqlock, flags); @@ -1550,13 +1549,11 @@ static void __ipu_image_convert_abort(struct ipu_image_convert_ctx *ctx) if (active_run) reinit_completion(&ctx->aborted); - need_abort = (run_count || active_run); - ctx->aborting = true; spin_unlock_irqrestore(&chan->irqlock, flags); - if (!need_abort) { + if (!run_count && !active_run) { dev_dbg(priv->ipu->dev, "%s: task %u: no abort needed for ctx %p\n", __func__, chan->ic_task, ctx); -- cgit v1.2.1 From c4e456583a46182fc11492206aed8995af66163b Mon Sep 17 00:00:00 2001 From: Steve Longerbeam Date: Fri, 21 Sep 2018 11:46:39 -0700 Subject: gpu: ipu-v3: image-convert: Catch unaligned tile offsets Catch calculated tile offsets that are not 8-byte aligned as required by the IDMAC engine and return error in calc_tile_offsets(). Signed-off-by: Steve Longerbeam Tested-by: Philipp Zabel Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-image-convert.c | 61 +++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 24 deletions(-) (limited to 'drivers/gpu/ipu-v3') diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c index b8a400182a00..5fccba176e39 100644 --- a/drivers/gpu/ipu-v3/ipu-image-convert.c +++ b/drivers/gpu/ipu-v3/ipu-image-convert.c @@ -459,8 +459,8 @@ static void calc_out_tile_map(struct ipu_image_convert_ctx *ctx) } } -static void calc_tile_offsets_planar(struct ipu_image_convert_ctx *ctx, - struct ipu_image_convert_image *image) +static int calc_tile_offsets_planar(struct ipu_image_convert_ctx *ctx, + struct ipu_image_convert_image *image) { struct ipu_image_convert_chan *chan = ctx->chan; struct ipu_image_convert_priv *priv = chan->priv; @@ -509,24 +509,30 @@ static void calc_tile_offsets_planar(struct ipu_image_convert_ctx *ctx, image->tile[tile].u_off = u_off; image->tile[tile++].v_off = v_off; - dev_dbg(priv->ipu->dev, - "task %u: ctx %p: %s@[%d,%d]: y_off %08x, u_off %08x, v_off %08x\n", - chan->ic_task, ctx, - image->type == IMAGE_CONVERT_IN ? - "Input" : "Output", row, col, - y_off, u_off, v_off); + if ((y_off & 0x7) || (u_off & 0x7) || (v_off & 0x7)) { + dev_err(priv->ipu->dev, + "task %u: ctx %p: %s@[%d,%d]: " + "y_off %08x, u_off %08x, v_off %08x\n", + chan->ic_task, ctx, + image->type == IMAGE_CONVERT_IN ? + "Input" : "Output", row, col, + y_off, u_off, v_off); + return -EINVAL; + } } } + + return 0; } -static void calc_tile_offsets_packed(struct ipu_image_convert_ctx *ctx, - struct ipu_image_convert_image *image) +static int calc_tile_offsets_packed(struct ipu_image_convert_ctx *ctx, + struct ipu_image_convert_image *image) { struct ipu_image_convert_chan *chan = ctx->chan; struct ipu_image_convert_priv *priv = chan->priv; const struct ipu_image_pixfmt *fmt = image->fmt; unsigned int row, col, tile = 0; - u32 w, h, bpp, stride; + u32 w, h, bpp, stride, offset; u32 row_off, col_off; /* setup some convenience vars */ @@ -541,27 +547,35 @@ static void calc_tile_offsets_packed(struct ipu_image_convert_ctx *ctx, for (col = 0; col < image->num_cols; col++) { col_off = (col * w * bpp) >> 3; - image->tile[tile].offset = row_off + col_off; + offset = row_off + col_off; + + image->tile[tile].offset = offset; image->tile[tile].u_off = 0; image->tile[tile++].v_off = 0; - dev_dbg(priv->ipu->dev, - "task %u: ctx %p: %s@[%d,%d]: phys %08x\n", - chan->ic_task, ctx, - image->type == IMAGE_CONVERT_IN ? - "Input" : "Output", row, col, - row_off + col_off); + if (offset & 0x7) { + dev_err(priv->ipu->dev, + "task %u: ctx %p: %s@[%d,%d]: " + "phys %08x\n", + chan->ic_task, ctx, + image->type == IMAGE_CONVERT_IN ? + "Input" : "Output", row, col, + row_off + col_off); + return -EINVAL; + } } } + + return 0; } -static void calc_tile_offsets(struct ipu_image_convert_ctx *ctx, +static int calc_tile_offsets(struct ipu_image_convert_ctx *ctx, struct ipu_image_convert_image *image) { if (image->fmt->planar) - calc_tile_offsets_planar(ctx, image); - else - calc_tile_offsets_packed(ctx, image); + return calc_tile_offsets_planar(ctx, image); + + return calc_tile_offsets_packed(ctx, image); } /* @@ -1199,9 +1213,8 @@ static int fill_image(struct ipu_image_convert_ctx *ctx, ic_image->stride = ic_image->base.pix.bytesperline; calc_tile_dimensions(ctx, ic_image); - calc_tile_offsets(ctx, ic_image); - return 0; + return calc_tile_offsets(ctx, ic_image); } /* borrowed from drivers/media/v4l2-core/v4l2-common.c */ -- cgit v1.2.1 From dd65d2a93b0c215a8e6fa58102952dacc6ceac37 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 18 Sep 2018 11:34:09 +0200 Subject: gpu: ipu-v3: image-convert: prepare for per-tile configuration Let convert_start start from a given tile index, allocate intermediate tile with maximum tile size. Signed-off-by: Philipp Zabel Acked-by: Steve Longerbeam Tested-by: Steve Longerbeam --- drivers/gpu/ipu-v3/ipu-image-convert.c | 60 ++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 25 deletions(-) (limited to 'drivers/gpu/ipu-v3') diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c index 5fccba176e39..c2f82d681c48 100644 --- a/drivers/gpu/ipu-v3/ipu-image-convert.c +++ b/drivers/gpu/ipu-v3/ipu-image-convert.c @@ -625,7 +625,8 @@ static void init_idmac_channel(struct ipu_image_convert_ctx *ctx, struct ipuv3_channel *channel, struct ipu_image_convert_image *image, enum ipu_rotate_mode rot_mode, - bool rot_swap_width_height) + bool rot_swap_width_height, + unsigned int tile) { struct ipu_image_convert_chan *chan = ctx->chan; unsigned int burst_size; @@ -635,23 +636,23 @@ static void init_idmac_channel(struct ipu_image_convert_ctx *ctx, unsigned int tile_idx[2]; if (image->type == IMAGE_CONVERT_OUT) { - tile_idx[0] = ctx->out_tile_map[0]; + tile_idx[0] = ctx->out_tile_map[tile]; tile_idx[1] = ctx->out_tile_map[1]; } else { - tile_idx[0] = 0; + tile_idx[0] = tile; tile_idx[1] = 1; } if (rot_swap_width_height) { - width = image->tile[0].height; - height = image->tile[0].width; - stride = image->tile[0].rot_stride; + width = image->tile[tile_idx[0]].height; + height = image->tile[tile_idx[0]].width; + stride = image->tile[tile_idx[0]].rot_stride; addr0 = ctx->rot_intermediate[0].phys; if (ctx->double_buffering) addr1 = ctx->rot_intermediate[1].phys; } else { - width = image->tile[0].width; - height = image->tile[0].height; + width = image->tile[tile_idx[0]].width; + height = image->tile[tile_idx[0]].height; stride = image->stride; addr0 = image->base.phys0 + image->tile[tile_idx[0]].offset; @@ -701,7 +702,7 @@ static void init_idmac_channel(struct ipu_image_convert_ctx *ctx, ipu_idmac_set_double_buffer(channel, ctx->double_buffering); } -static int convert_start(struct ipu_image_convert_run *run) +static int convert_start(struct ipu_image_convert_run *run, unsigned int tile) { struct ipu_image_convert_ctx *ctx = run->ctx; struct ipu_image_convert_chan *chan = ctx->chan; @@ -709,28 +710,29 @@ static int convert_start(struct ipu_image_convert_run *run) struct ipu_image_convert_image *s_image = &ctx->in; struct ipu_image_convert_image *d_image = &ctx->out; enum ipu_color_space src_cs, dest_cs; + unsigned int dst_tile = ctx->out_tile_map[tile]; unsigned int dest_width, dest_height; int ret; - dev_dbg(priv->ipu->dev, "%s: task %u: starting ctx %p run %p\n", - __func__, chan->ic_task, ctx, run); + dev_dbg(priv->ipu->dev, "%s: task %u: starting ctx %p run %p tile %u -> %u\n", + __func__, chan->ic_task, ctx, run, tile, dst_tile); src_cs = ipu_pixelformat_to_colorspace(s_image->fmt->fourcc); dest_cs = ipu_pixelformat_to_colorspace(d_image->fmt->fourcc); if (ipu_rot_mode_is_irt(ctx->rot_mode)) { /* swap width/height for resizer */ - dest_width = d_image->tile[0].height; - dest_height = d_image->tile[0].width; + dest_width = d_image->tile[dst_tile].height; + dest_height = d_image->tile[dst_tile].width; } else { - dest_width = d_image->tile[0].width; - dest_height = d_image->tile[0].height; + dest_width = d_image->tile[dst_tile].width; + dest_height = d_image->tile[dst_tile].height; } /* setup the IC resizer and CSC */ ret = ipu_ic_task_init(chan->ic, - s_image->tile[0].width, - s_image->tile[0].height, + s_image->tile[tile].width, + s_image->tile[tile].height, dest_width, dest_height, src_cs, dest_cs); @@ -741,27 +743,27 @@ static int convert_start(struct ipu_image_convert_run *run) /* init the source MEM-->IC PP IDMAC channel */ init_idmac_channel(ctx, chan->in_chan, s_image, - IPU_ROTATE_NONE, false); + IPU_ROTATE_NONE, false, tile); if (ipu_rot_mode_is_irt(ctx->rot_mode)) { /* init the IC PP-->MEM IDMAC channel */ init_idmac_channel(ctx, chan->out_chan, d_image, - IPU_ROTATE_NONE, true); + IPU_ROTATE_NONE, true, tile); /* init the MEM-->IC PP ROT IDMAC channel */ init_idmac_channel(ctx, chan->rotation_in_chan, d_image, - ctx->rot_mode, true); + ctx->rot_mode, true, tile); /* init the destination IC PP ROT-->MEM IDMAC channel */ init_idmac_channel(ctx, chan->rotation_out_chan, d_image, - IPU_ROTATE_NONE, false); + IPU_ROTATE_NONE, false, tile); /* now link IC PP-->MEM to MEM-->IC PP ROT */ ipu_idmac_link(chan->out_chan, chan->rotation_in_chan); } else { /* init the destination IC PP-->MEM IDMAC channel */ init_idmac_channel(ctx, chan->out_chan, d_image, - ctx->rot_mode, false); + ctx->rot_mode, false, tile); } /* enable the IC */ @@ -819,7 +821,7 @@ static int do_run(struct ipu_image_convert_run *run) list_del(&run->list); chan->current_run = run; - return convert_start(run); + return convert_start(run, 0); } /* hold irqlock when calling */ @@ -1449,14 +1451,22 @@ ipu_image_convert_prepare(struct ipu_soc *ipu, enum ipu_ic_task ic_task, !d_image->fmt->planar); if (ipu_rot_mode_is_irt(ctx->rot_mode)) { + unsigned long intermediate_size = d_image->tile[0].size; + unsigned int i; + + for (i = 1; i < ctx->num_tiles; i++) { + if (d_image->tile[i].size > intermediate_size) + intermediate_size = d_image->tile[i].size; + } + ret = alloc_dma_buf(priv, &ctx->rot_intermediate[0], - d_image->tile[0].size); + intermediate_size); if (ret) goto out_free; if (ctx->double_buffering) { ret = alloc_dma_buf(priv, &ctx->rot_intermediate[1], - d_image->tile[0].size); + intermediate_size); if (ret) goto out_free_dmabuf0; } -- cgit v1.2.1 From 70b9b6b3bcb216926d8dcec27f1579abffd047c0 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 18 Sep 2018 11:34:10 +0200 Subject: gpu: ipu-v3: image-convert: calculate per-tile resize coefficients Slightly modifying resize coefficients per-tile allows to completely hide the seams between tiles and to sample the correct input pixels at the bottom and right edges of the image. Tiling requires a bilinear interpolator reset at each tile start, which causes the image to be slightly shifted if the starting pixel should not have been sampled from an integer pixel position in the source image according to the full image resizing ratio. To work around this hardware limitation, calculate per-tile resizing coefficients that make sure that the correct input pixels are sampled at the tile end. Signed-off-by: Philipp Zabel Acked-by: Steve Longerbeam Tested-by: Steve Longerbeam --- drivers/gpu/ipu-v3/ipu-image-convert.c | 236 ++++++++++++++++++++++++++++++++- 1 file changed, 234 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/ipu-v3') diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c index c2f82d681c48..31e7186bcc00 100644 --- a/drivers/gpu/ipu-v3/ipu-image-convert.c +++ b/drivers/gpu/ipu-v3/ipu-image-convert.c @@ -135,6 +135,12 @@ struct ipu_image_convert_ctx { struct ipu_image_convert_image in; struct ipu_image_convert_image out; enum ipu_rotate_mode rot_mode; + u32 downsize_coeff_h; + u32 downsize_coeff_v; + u32 image_resize_coeff_h; + u32 image_resize_coeff_v; + u32 resize_coeffs_h[MAX_STRIPES_W]; + u32 resize_coeffs_v[MAX_STRIPES_H]; /* intermediate buffer for rotation */ struct ipu_image_convert_dma_buf rot_intermediate[2]; @@ -361,6 +367,69 @@ static inline int num_stripes(int dim) return 4; } +/* + * Calculate downsizing coefficients, which are the same for all tiles, + * and bilinear resizing coefficients, which are used to find the best + * seam positions. + */ +static int calc_image_resize_coefficients(struct ipu_image_convert_ctx *ctx, + struct ipu_image *in, + struct ipu_image *out) +{ + u32 downsized_width = in->rect.width; + u32 downsized_height = in->rect.height; + u32 downsize_coeff_v = 0; + u32 downsize_coeff_h = 0; + u32 resized_width = out->rect.width; + u32 resized_height = out->rect.height; + u32 resize_coeff_h; + u32 resize_coeff_v; + + if (ipu_rot_mode_is_irt(ctx->rot_mode)) { + resized_width = out->rect.height; + resized_height = out->rect.width; + } + + /* Do not let invalid input lead to an endless loop below */ + if (WARN_ON(resized_width == 0 || resized_height == 0)) + return -EINVAL; + + while (downsized_width >= resized_width * 2) { + downsized_width >>= 1; + downsize_coeff_h++; + } + + while (downsized_height >= resized_height * 2) { + downsized_height >>= 1; + downsize_coeff_v++; + } + + /* + * Calculate the bilinear resizing coefficients that could be used if + * we were converting with a single tile. The bottom right output pixel + * should sample as close as possible to the bottom right input pixel + * out of the decimator, but not overshoot it: + */ + resize_coeff_h = 8192 * (downsized_width - 1) / (resized_width - 1); + resize_coeff_v = 8192 * (downsized_height - 1) / (resized_height - 1); + + dev_dbg(ctx->chan->priv->ipu->dev, + "%s: hscale: >>%u, *8192/%u vscale: >>%u, *8192/%u, %ux%u tiles\n", + __func__, downsize_coeff_h, resize_coeff_h, downsize_coeff_v, + resize_coeff_v, ctx->in.num_cols, ctx->in.num_rows); + + if (downsize_coeff_h > 2 || downsize_coeff_v > 2 || + resize_coeff_h > 0x3fff || resize_coeff_v > 0x3fff) + return -EINVAL; + + ctx->downsize_coeff_h = downsize_coeff_h; + ctx->downsize_coeff_v = downsize_coeff_v; + ctx->image_resize_coeff_h = resize_coeff_h; + ctx->image_resize_coeff_v = resize_coeff_v; + + return 0; +} + static void calc_tile_dimensions(struct ipu_image_convert_ctx *ctx, struct ipu_image_convert_image *image) { @@ -578,6 +647,149 @@ static int calc_tile_offsets(struct ipu_image_convert_ctx *ctx, return calc_tile_offsets_packed(ctx, image); } +/* + * Calculate the resizing ratio for the IC main processing section given input + * size, fixed downsizing coefficient, and output size. + * Either round to closest for the next tile's first pixel to minimize seams + * and distortion (for all but right column / bottom row), or round down to + * avoid sampling beyond the edges of the input image for this tile's last + * pixel. + * Returns the resizing coefficient, resizing ratio is 8192.0 / resize_coeff. + */ +static u32 calc_resize_coeff(u32 input_size, u32 downsize_coeff, + u32 output_size, bool allow_overshoot) +{ + u32 downsized = input_size >> downsize_coeff; + + if (allow_overshoot) + return DIV_ROUND_CLOSEST(8192 * downsized, output_size); + else + return 8192 * (downsized - 1) / (output_size - 1); +} + +/* + * Slightly modify resize coefficients per tile to hide the bilinear + * interpolator reset at tile borders, shifting the right / bottom edge + * by up to a half input pixel. This removes noticeable seams between + * tiles at higher upscaling factors. + */ +static void calc_tile_resize_coefficients(struct ipu_image_convert_ctx *ctx) +{ + struct ipu_image_convert_chan *chan = ctx->chan; + struct ipu_image_convert_priv *priv = chan->priv; + struct ipu_image_tile *in_tile, *out_tile; + unsigned int col, row, tile_idx; + unsigned int last_output; + + for (col = 0; col < ctx->in.num_cols; col++) { + bool closest = (col < ctx->in.num_cols - 1) && + !(ctx->rot_mode & IPU_ROT_BIT_HFLIP); + u32 resized_width; + u32 resize_coeff_h; + + tile_idx = col; + in_tile = &ctx->in.tile[tile_idx]; + out_tile = &ctx->out.tile[ctx->out_tile_map[tile_idx]]; + + if (ipu_rot_mode_is_irt(ctx->rot_mode)) + resized_width = out_tile->height; + else + resized_width = out_tile->width; + + resize_coeff_h = calc_resize_coeff(in_tile->width, + ctx->downsize_coeff_h, + resized_width, closest); + + dev_dbg(priv->ipu->dev, "%s: column %u hscale: *8192/%u\n", + __func__, col, resize_coeff_h); + + + for (row = 0; row < ctx->in.num_rows; row++) { + tile_idx = row * ctx->in.num_cols + col; + in_tile = &ctx->in.tile[tile_idx]; + out_tile = &ctx->out.tile[ctx->out_tile_map[tile_idx]]; + + /* + * With the horizontal scaling factor known, round up + * resized width (output width or height) to burst size. + */ + if (ipu_rot_mode_is_irt(ctx->rot_mode)) + out_tile->height = round_up(resized_width, 8); + else + out_tile->width = round_up(resized_width, 8); + + /* + * Calculate input width from the last accessed input + * pixel given resized width and scaling coefficients. + * Round up to burst size. + */ + last_output = round_up(resized_width, 8) - 1; + if (closest) + last_output++; + in_tile->width = round_up( + (DIV_ROUND_UP(last_output * resize_coeff_h, + 8192) + 1) + << ctx->downsize_coeff_h, 8); + } + + ctx->resize_coeffs_h[col] = resize_coeff_h; + } + + for (row = 0; row < ctx->in.num_rows; row++) { + bool closest = (row < ctx->in.num_rows - 1) && + !(ctx->rot_mode & IPU_ROT_BIT_VFLIP); + u32 resized_height; + u32 resize_coeff_v; + + tile_idx = row * ctx->in.num_cols; + in_tile = &ctx->in.tile[tile_idx]; + out_tile = &ctx->out.tile[ctx->out_tile_map[tile_idx]]; + + if (ipu_rot_mode_is_irt(ctx->rot_mode)) + resized_height = out_tile->width; + else + resized_height = out_tile->height; + + resize_coeff_v = calc_resize_coeff(in_tile->height, + ctx->downsize_coeff_v, + resized_height, closest); + + dev_dbg(priv->ipu->dev, "%s: row %u vscale: *8192/%u\n", + __func__, row, resize_coeff_v); + + for (col = 0; col < ctx->in.num_cols; col++) { + tile_idx = row * ctx->in.num_cols + col; + in_tile = &ctx->in.tile[tile_idx]; + out_tile = &ctx->out.tile[ctx->out_tile_map[tile_idx]]; + + /* + * With the vertical scaling factor known, round up + * resized height (output width or height) to IDMAC + * limitations. + */ + if (ipu_rot_mode_is_irt(ctx->rot_mode)) + out_tile->width = round_up(resized_height, 2); + else + out_tile->height = round_up(resized_height, 2); + + /* + * Calculate input width from the last accessed input + * pixel given resized height and scaling coefficients. + * Align to IDMAC restrictions. + */ + last_output = round_up(resized_height, 2) - 1; + if (closest) + last_output++; + in_tile->height = round_up( + (DIV_ROUND_UP(last_output * resize_coeff_v, + 8192) + 1) + << ctx->downsize_coeff_v, 2); + } + + ctx->resize_coeffs_v[row] = resize_coeff_v; + } +} + /* * return the number of runs in given queue (pending_q or done_q) * for this context. hold irqlock when calling. @@ -712,6 +924,8 @@ static int convert_start(struct ipu_image_convert_run *run, unsigned int tile) enum ipu_color_space src_cs, dest_cs; unsigned int dst_tile = ctx->out_tile_map[tile]; unsigned int dest_width, dest_height; + unsigned int col, row; + u32 rsc; int ret; dev_dbg(priv->ipu->dev, "%s: task %u: starting ctx %p run %p tile %u -> %u\n", @@ -729,13 +943,26 @@ static int convert_start(struct ipu_image_convert_run *run, unsigned int tile) dest_height = d_image->tile[dst_tile].height; } + row = tile / s_image->num_cols; + col = tile % s_image->num_cols; + + rsc = (ctx->downsize_coeff_v << 30) | + (ctx->resize_coeffs_v[row] << 16) | + (ctx->downsize_coeff_h << 14) | + (ctx->resize_coeffs_h[col]); + + dev_dbg(priv->ipu->dev, "%s: %ux%u -> %ux%u (rsc = 0x%x)\n", + __func__, s_image->tile[tile].width, + s_image->tile[tile].height, dest_width, dest_height, rsc); + /* setup the IC resizer and CSC */ - ret = ipu_ic_task_init(chan->ic, + ret = ipu_ic_task_init_rsc(chan->ic, s_image->tile[tile].width, s_image->tile[tile].height, dest_width, dest_height, - src_cs, dest_cs); + src_cs, dest_cs, + rsc); if (ret) { dev_err(priv->ipu->dev, "ipu_ic_task_init failed, %d\n", ret); return ret; @@ -1420,6 +1647,10 @@ ipu_image_convert_prepare(struct ipu_soc *ipu, enum ipu_ic_task ic_task, ctx->num_tiles = d_image->num_cols * d_image->num_rows; ctx->rot_mode = rot_mode; + ret = calc_image_resize_coefficients(ctx, in, out); + if (ret) + goto out_free; + ret = fill_image(ctx, s_image, in, IMAGE_CONVERT_IN); if (ret) goto out_free; @@ -1428,6 +1659,7 @@ ipu_image_convert_prepare(struct ipu_soc *ipu, enum ipu_ic_task ic_task, goto out_free; calc_out_tile_map(ctx); + calc_tile_resize_coefficients(ctx); dump_format(ctx, s_image); dump_format(ctx, d_image); -- cgit v1.2.1 From 0537db801bb01b3bf8281d254f740fd8856ad15c Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 18 Sep 2018 11:34:11 +0200 Subject: gpu: ipu-v3: image-convert: reconfigure IC per tile For differently sized tiles or if the resizing coefficients change, we have to stop, reconfigure, and restart the IC between tiles. Signed-off-by: Philipp Zabel Acked-by: Steve Longerbeam Tested-by: Steve Longerbeam --- drivers/gpu/ipu-v3/ipu-image-convert.c | 65 +++++++++++++++++++++++----------- 1 file changed, 44 insertions(+), 21 deletions(-) (limited to 'drivers/gpu/ipu-v3') diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c index 31e7186bcc00..cb47981741b4 100644 --- a/drivers/gpu/ipu-v3/ipu-image-convert.c +++ b/drivers/gpu/ipu-v3/ipu-image-convert.c @@ -1151,6 +1151,24 @@ static irqreturn_t do_bh(int irq, void *dev_id) return IRQ_HANDLED; } +static bool ic_settings_changed(struct ipu_image_convert_ctx *ctx) +{ + unsigned int cur_tile = ctx->next_tile - 1; + unsigned int next_tile = ctx->next_tile; + + if (ctx->resize_coeffs_h[cur_tile % ctx->in.num_cols] != + ctx->resize_coeffs_h[next_tile % ctx->in.num_cols] || + ctx->resize_coeffs_v[cur_tile / ctx->in.num_cols] != + ctx->resize_coeffs_v[next_tile / ctx->in.num_cols] || + ctx->in.tile[cur_tile].width != ctx->in.tile[next_tile].width || + ctx->in.tile[cur_tile].height != ctx->in.tile[next_tile].height || + ctx->out.tile[cur_tile].width != ctx->out.tile[next_tile].width || + ctx->out.tile[cur_tile].height != ctx->out.tile[next_tile].height) + return true; + + return false; +} + /* hold irqlock when calling */ static irqreturn_t do_irq(struct ipu_image_convert_run *run) { @@ -1194,27 +1212,32 @@ static irqreturn_t do_irq(struct ipu_image_convert_run *run) * not done, place the next tile buffers. */ if (!ctx->double_buffering) { - - src_tile = &s_image->tile[ctx->next_tile]; - dst_idx = ctx->out_tile_map[ctx->next_tile]; - dst_tile = &d_image->tile[dst_idx]; - - ipu_cpmem_set_buffer(chan->in_chan, 0, - s_image->base.phys0 + src_tile->offset); - ipu_cpmem_set_buffer(outch, 0, - d_image->base.phys0 + dst_tile->offset); - if (s_image->fmt->planar) - ipu_cpmem_set_uv_offset(chan->in_chan, - src_tile->u_off, - src_tile->v_off); - if (d_image->fmt->planar) - ipu_cpmem_set_uv_offset(outch, - dst_tile->u_off, - dst_tile->v_off); - - ipu_idmac_select_buffer(chan->in_chan, 0); - ipu_idmac_select_buffer(outch, 0); - + if (ic_settings_changed(ctx)) { + convert_stop(run); + convert_start(run, ctx->next_tile); + } else { + src_tile = &s_image->tile[ctx->next_tile]; + dst_idx = ctx->out_tile_map[ctx->next_tile]; + dst_tile = &d_image->tile[dst_idx]; + + ipu_cpmem_set_buffer(chan->in_chan, 0, + s_image->base.phys0 + + src_tile->offset); + ipu_cpmem_set_buffer(outch, 0, + d_image->base.phys0 + + dst_tile->offset); + if (s_image->fmt->planar) + ipu_cpmem_set_uv_offset(chan->in_chan, + src_tile->u_off, + src_tile->v_off); + if (d_image->fmt->planar) + ipu_cpmem_set_uv_offset(outch, + dst_tile->u_off, + dst_tile->v_off); + + ipu_idmac_select_buffer(chan->in_chan, 0); + ipu_idmac_select_buffer(outch, 0); + } } else if (ctx->next_tile < ctx->num_tiles - 1) { src_tile = &s_image->tile[ctx->next_tile + 1]; -- cgit v1.2.1 From 571dd82c508da882a4988fe28908bce652525f72 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 18 Sep 2018 11:34:12 +0200 Subject: gpu: ipu-v3: image-convert: store tile top/left position Store tile top/left position in pixels in the tile structure. This will allow overlapping tiles with different sizes later. Signed-off-by: Philipp Zabel Acked-by: Steve Longerbeam Tested-by: Steve Longerbeam --- drivers/gpu/ipu-v3/ipu-image-convert.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'drivers/gpu/ipu-v3') diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c index cb47981741b4..d14ee7b303a1 100644 --- a/drivers/gpu/ipu-v3/ipu-image-convert.c +++ b/drivers/gpu/ipu-v3/ipu-image-convert.c @@ -84,6 +84,8 @@ struct ipu_image_convert_dma_chan { struct ipu_image_tile { u32 width; u32 height; + u32 left; + u32 top; /* size and strides are in bytes */ u32 size; u32 stride; @@ -433,13 +435,17 @@ static int calc_image_resize_coefficients(struct ipu_image_convert_ctx *ctx, static void calc_tile_dimensions(struct ipu_image_convert_ctx *ctx, struct ipu_image_convert_image *image) { - int i; + unsigned int i; for (i = 0; i < ctx->num_tiles; i++) { struct ipu_image_tile *tile = &image->tile[i]; + const unsigned int row = i / image->num_cols; + const unsigned int col = i % image->num_cols; tile->height = image->base.pix.height / image->num_rows; tile->width = image->base.pix.width / image->num_cols; + tile->left = col * tile->width; + tile->top = row * tile->height; tile->size = ((tile->height * image->fmt->bpp) >> 3) * tile->width; @@ -535,7 +541,7 @@ static int calc_tile_offsets_planar(struct ipu_image_convert_ctx *ctx, struct ipu_image_convert_priv *priv = chan->priv; const struct ipu_image_pixfmt *fmt = image->fmt; unsigned int row, col, tile = 0; - u32 H, w, h, y_stride, uv_stride; + u32 H, top, y_stride, uv_stride; u32 uv_row_off, uv_col_off, uv_off, u_off, v_off, tmp; u32 y_row_off, y_col_off, y_off; u32 y_size, uv_size; @@ -552,13 +558,12 @@ static int calc_tile_offsets_planar(struct ipu_image_convert_ctx *ctx, uv_size = y_size / (fmt->uv_width_dec * fmt->uv_height_dec); for (row = 0; row < image->num_rows; row++) { - w = image->tile[tile].width; - h = image->tile[tile].height; - y_row_off = row * h * y_stride; - uv_row_off = (row * h * uv_stride) / fmt->uv_height_dec; + top = image->tile[tile].top; + y_row_off = top * y_stride; + uv_row_off = (top * uv_stride) / fmt->uv_height_dec; for (col = 0; col < image->num_cols; col++) { - y_col_off = col * w; + y_col_off = image->tile[tile].left; uv_col_off = y_col_off / fmt->uv_width_dec; if (fmt->uv_packed) uv_col_off *= 2; @@ -601,7 +606,7 @@ static int calc_tile_offsets_packed(struct ipu_image_convert_ctx *ctx, struct ipu_image_convert_priv *priv = chan->priv; const struct ipu_image_pixfmt *fmt = image->fmt; unsigned int row, col, tile = 0; - u32 w, h, bpp, stride, offset; + u32 bpp, stride, offset; u32 row_off, col_off; /* setup some convenience vars */ @@ -609,12 +614,10 @@ static int calc_tile_offsets_packed(struct ipu_image_convert_ctx *ctx, bpp = fmt->bpp; for (row = 0; row < image->num_rows; row++) { - w = image->tile[tile].width; - h = image->tile[tile].height; - row_off = row * h * stride; + row_off = image->tile[tile].top * stride; for (col = 0; col < image->num_cols; col++) { - col_off = (col * w * bpp) >> 3; + col_off = (image->tile[tile].left * bpp) >> 3; offset = row_off + col_off; -- cgit v1.2.1 From 26ddd032a82bf66868271350f20453e865a90d5f Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 18 Sep 2018 11:34:13 +0200 Subject: gpu: ipu-v3: image-convert: calculate tile dimensions and offsets outside fill_image This will allow to calculate seam positions after initializing the ipu_image base structure but before calculating tile dimensions. Signed-off-by: Philipp Zabel Acked-by: Steve Longerbeam Tested-by: Steve Longerbeam --- drivers/gpu/ipu-v3/ipu-image-convert.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/ipu-v3') diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c index d14ee7b303a1..542c091cfef1 100644 --- a/drivers/gpu/ipu-v3/ipu-image-convert.c +++ b/drivers/gpu/ipu-v3/ipu-image-convert.c @@ -1467,9 +1467,7 @@ static int fill_image(struct ipu_image_convert_ctx *ctx, else ic_image->stride = ic_image->base.pix.bytesperline; - calc_tile_dimensions(ctx, ic_image); - - return calc_tile_offsets(ctx, ic_image); + return 0; } /* borrowed from drivers/media/v4l2-core/v4l2-common.c */ @@ -1673,14 +1671,24 @@ ipu_image_convert_prepare(struct ipu_soc *ipu, enum ipu_ic_task ic_task, ctx->num_tiles = d_image->num_cols * d_image->num_rows; ctx->rot_mode = rot_mode; + ret = fill_image(ctx, s_image, in, IMAGE_CONVERT_IN); + if (ret) + goto out_free; + ret = fill_image(ctx, d_image, out, IMAGE_CONVERT_OUT); + if (ret) + goto out_free; + ret = calc_image_resize_coefficients(ctx, in, out); if (ret) goto out_free; - ret = fill_image(ctx, s_image, in, IMAGE_CONVERT_IN); + calc_tile_dimensions(ctx, s_image); + ret = calc_tile_offsets(ctx, s_image); if (ret) goto out_free; - ret = fill_image(ctx, d_image, out, IMAGE_CONVERT_OUT); + + calc_tile_dimensions(ctx, d_image); + ret = calc_tile_offsets(ctx, d_image); if (ret) goto out_free; -- cgit v1.2.1 From 76e77bf543c57be14c3daac40f6fb7578ac90992 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 18 Sep 2018 11:34:14 +0200 Subject: gpu: ipu-v3: image-convert: move tile alignment helpers Move tile_width_align and tile_height_align up so they can be used by the tile edge position calculation code. Signed-off-by: Philipp Zabel Acked-by: Steve Longerbeam Tested-by: Steve Longerbeam --- drivers/gpu/ipu-v3/ipu-image-convert.c | 54 +++++++++++++++++----------------- 1 file changed, 27 insertions(+), 27 deletions(-) (limited to 'drivers/gpu/ipu-v3') diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c index 542c091cfef1..a407ca3b367b 100644 --- a/drivers/gpu/ipu-v3/ipu-image-convert.c +++ b/drivers/gpu/ipu-v3/ipu-image-convert.c @@ -432,6 +432,33 @@ static int calc_image_resize_coefficients(struct ipu_image_convert_ctx *ctx, return 0; } +/* + * We have to adjust the tile width such that the tile physaddrs and + * U and V plane offsets are multiples of 8 bytes as required by + * the IPU DMA Controller. For the planar formats, this corresponds + * to a pixel alignment of 16 (but use a more formal equation since + * the variables are available). For all the packed formats, 8 is + * good enough. + */ +static inline u32 tile_width_align(const struct ipu_image_pixfmt *fmt) +{ + return fmt->planar ? 8 * fmt->uv_width_dec : 8; +} + +/* + * For tile height alignment, we have to ensure that the output tile + * heights are multiples of 8 lines if the IRT is required by the + * given rotation mode (the IRT performs rotations on 8x8 blocks + * at a time). If the IRT is not used, or for input image tiles, + * 2 lines are good enough. + */ +static inline u32 tile_height_align(enum ipu_image_convert_type type, + enum ipu_rotate_mode rot_mode) +{ + return (type == IMAGE_CONVERT_OUT && + ipu_rot_mode_is_irt(rot_mode)) ? 8 : 2; +} + static void calc_tile_dimensions(struct ipu_image_convert_ctx *ctx, struct ipu_image_convert_image *image) { @@ -1487,33 +1514,6 @@ static unsigned int clamp_align(unsigned int x, unsigned int min, return x; } -/* - * We have to adjust the tile width such that the tile physaddrs and - * U and V plane offsets are multiples of 8 bytes as required by - * the IPU DMA Controller. For the planar formats, this corresponds - * to a pixel alignment of 16 (but use a more formal equation since - * the variables are available). For all the packed formats, 8 is - * good enough. - */ -static inline u32 tile_width_align(const struct ipu_image_pixfmt *fmt) -{ - return fmt->planar ? 8 * fmt->uv_width_dec : 8; -} - -/* - * For tile height alignment, we have to ensure that the output tile - * heights are multiples of 8 lines if the IRT is required by the - * given rotation mode (the IRT performs rotations on 8x8 blocks - * at a time). If the IRT is not used, or for input image tiles, - * 2 lines are good enough. - */ -static inline u32 tile_height_align(enum ipu_image_convert_type type, - enum ipu_rotate_mode rot_mode) -{ - return (type == IMAGE_CONVERT_OUT && - ipu_rot_mode_is_irt(rot_mode)) ? 8 : 2; -} - /* Adjusts input/output images to IPU restrictions */ void ipu_image_convert_adjust(struct ipu_image *in, struct ipu_image *out, enum ipu_rotate_mode rot_mode) -- cgit v1.2.1 From 64fbae5e3e2e1b60ae420810216220f59fcde78d Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 18 Sep 2018 11:34:15 +0200 Subject: gpu: ipu-v3: image-convert: select optimal seam positions Select seam positions that minimize distortions during seam hiding while satifying input and output IDMAC, rotator, and image format constraints. This code looks for aligned output seam positions that minimize the difference between the fractional corresponding ideal input positions and the input positions rounded to alignment requirements. Since now tiles can be sized differently, alignment restrictions of the complete image can be relaxed in the next step. Signed-off-by: Philipp Zabel Acked-by: Steve Longerbeam Tested-by: Steve Longerbeam --- drivers/gpu/ipu-v3/ipu-image-convert.c | 343 ++++++++++++++++++++++++++++++++- 1 file changed, 337 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/ipu-v3') diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c index a407ca3b367b..a674241dd0b8 100644 --- a/drivers/gpu/ipu-v3/ipu-image-convert.c +++ b/drivers/gpu/ipu-v3/ipu-image-convert.c @@ -432,6 +432,126 @@ static int calc_image_resize_coefficients(struct ipu_image_convert_ctx *ctx, return 0; } +#define round_closest(x, y) round_down((x) + (y)/2, (y)) + +/* + * Find the best aligned seam position in the inverval [out_start, out_end]. + * Rotation and image offsets are out of scope. + * + * @out_start: start of inverval, must be within 1024 pixels / lines + * of out_end + * @out_end: end of interval, smaller than or equal to out_edge + * @in_edge: input right / bottom edge + * @out_edge: output right / bottom edge + * @in_align: input alignment, either horizontal 8-byte line start address + * alignment, or pixel alignment due to image format + * @out_align: output alignment, either horizontal 8-byte line start address + * alignment, or pixel alignment due to image format or rotator + * block size + * @in_burst: horizontal input burst size in case of horizontal flip + * @out_burst: horizontal output burst size or rotator block size + * @downsize_coeff: downsizing section coefficient + * @resize_coeff: main processing section resizing coefficient + * @_in_seam: aligned input seam position return value + * @_out_seam: aligned output seam position return value + */ +static void find_best_seam(struct ipu_image_convert_ctx *ctx, + unsigned int out_start, + unsigned int out_end, + unsigned int in_edge, + unsigned int out_edge, + unsigned int in_align, + unsigned int out_align, + unsigned int in_burst, + unsigned int out_burst, + unsigned int downsize_coeff, + unsigned int resize_coeff, + u32 *_in_seam, + u32 *_out_seam) +{ + struct device *dev = ctx->chan->priv->ipu->dev; + unsigned int out_pos; + /* Input / output seam position candidates */ + unsigned int out_seam = 0; + unsigned int in_seam = 0; + unsigned int min_diff = UINT_MAX; + + /* + * Output tiles must start at a multiple of 8 bytes horizontally and + * possibly at an even line horizontally depending on the pixel format. + * Only consider output aligned positions for the seam. + */ + out_start = round_up(out_start, out_align); + for (out_pos = out_start; out_pos < out_end; out_pos += out_align) { + unsigned int in_pos; + unsigned int in_pos_aligned; + unsigned int abs_diff; + + /* + * Tiles in the right row / bottom column may not be allowed to + * overshoot horizontally / vertically. out_burst may be the + * actual DMA burst size, or the rotator block size. + */ + if ((out_burst > 1) && (out_edge - out_pos) % out_burst) + continue; + + /* + * Input sample position, corresponding to out_pos, 19.13 fixed + * point. + */ + in_pos = (out_pos * resize_coeff) << downsize_coeff; + /* + * The closest input sample position that we could actually + * start the input tile at, 19.13 fixed point. + */ + in_pos_aligned = round_closest(in_pos, 8192U * in_align); + + if ((in_burst > 1) && + (in_edge - in_pos_aligned / 8192U) % in_burst) + continue; + + if (in_pos < in_pos_aligned) + abs_diff = in_pos_aligned - in_pos; + else + abs_diff = in_pos - in_pos_aligned; + + if (abs_diff < min_diff) { + in_seam = in_pos_aligned; + out_seam = out_pos; + min_diff = abs_diff; + } + } + + *_out_seam = out_seam; + /* Convert 19.13 fixed point to integer seam position */ + *_in_seam = DIV_ROUND_CLOSEST(in_seam, 8192U); + + dev_dbg(dev, "%s: out_seam %u(%u) in [%u, %u], in_seam %u(%u) diff %u.%03u\n", + __func__, out_seam, out_align, out_start, out_end, + *_in_seam, in_align, min_diff / 8192, + DIV_ROUND_CLOSEST(min_diff % 8192 * 1000, 8192)); +} + +/* + * Tile left edges are required to be aligned to multiples of 8 bytes + * by the IDMAC. + */ +static inline u32 tile_left_align(const struct ipu_image_pixfmt *fmt) +{ + if (fmt->planar) + return fmt->uv_packed ? 8 : 8 * fmt->uv_width_dec; + else + return fmt->bpp == 32 ? 2 : fmt->bpp == 16 ? 4 : 8; +} + +/* + * Tile top edge alignment is only limited by chroma subsampling. + */ +static inline u32 tile_top_align(const struct ipu_image_pixfmt *fmt) +{ + return fmt->uv_height_dec > 1 ? 2 : 1; +} + /* * We have to adjust the tile width such that the tile physaddrs and * U and V plane offsets are multiples of 8 bytes as required by @@ -459,20 +579,228 @@ static inline u32 tile_height_align(enum ipu_image_convert_type type, ipu_rot_mode_is_irt(rot_mode)) ? 8 : 2; } +/* + * Fill in left position and width and for all tiles in an input column, and + * for all corresponding output tiles. If the 90° rotator is used, the output + * tiles are in a row, and output tile top position and height are set. + */ +static void fill_tile_column(struct ipu_image_convert_ctx *ctx, + unsigned int col, + struct ipu_image_convert_image *in, + unsigned int in_left, unsigned int in_width, + struct ipu_image_convert_image *out, + unsigned int out_left, unsigned int out_width) +{ + unsigned int row, tile_idx; + struct ipu_image_tile *in_tile, *out_tile; + + for (row = 0; row < in->num_rows; row++) { + tile_idx = in->num_cols * row + col; + in_tile = &in->tile[tile_idx]; + out_tile = &out->tile[ctx->out_tile_map[tile_idx]]; + + in_tile->left = in_left; + in_tile->width = in_width; + + if (ipu_rot_mode_is_irt(ctx->rot_mode)) { + out_tile->top = out_left; + out_tile->height = out_width; + } else { + out_tile->left = out_left; + out_tile->width = out_width; + } + } +} + +/* + * Fill in top position and height and for all tiles in an input row, and + * for all corresponding output tiles. If the 90° rotator is used, the output + * tiles are in a column, and output tile left position and width are set. + */ +static void fill_tile_row(struct ipu_image_convert_ctx *ctx, unsigned int row, + struct ipu_image_convert_image *in, + unsigned int in_top, unsigned int in_height, + struct ipu_image_convert_image *out, + unsigned int out_top, unsigned int out_height) +{ + unsigned int col, tile_idx; + struct ipu_image_tile *in_tile, *out_tile; + + for (col = 0; col < in->num_cols; col++) { + tile_idx = in->num_cols * row + col; + in_tile = &in->tile[tile_idx]; + out_tile = &out->tile[ctx->out_tile_map[tile_idx]]; + + in_tile->top = in_top; + in_tile->height = in_height; + + if (ipu_rot_mode_is_irt(ctx->rot_mode)) { + out_tile->left = out_top; + out_tile->width = out_height; + } else { + out_tile->top = out_top; + out_tile->height = out_height; + } + } +} + +/* + * Find the best horizontal and vertical seam positions to split into tiles. + * Minimize the fractional part of the input sampling position for the + * top / left pixels of each tile. + */ +static void find_seams(struct ipu_image_convert_ctx *ctx, + struct ipu_image_convert_image *in, + struct ipu_image_convert_image *out) +{ + struct device *dev = ctx->chan->priv->ipu->dev; + unsigned int resized_width = out->base.rect.width; + unsigned int resized_height = out->base.rect.height; + unsigned int col; + unsigned int row; + unsigned int in_left_align = tile_left_align(in->fmt); + unsigned int in_top_align = tile_top_align(in->fmt); + unsigned int out_left_align = tile_left_align(out->fmt); + unsigned int out_top_align = tile_top_align(out->fmt); + unsigned int out_width_align = tile_width_align(out->fmt); + unsigned int out_height_align = tile_height_align(out->type, + ctx->rot_mode); + unsigned int in_right = in->base.rect.width; + unsigned int in_bottom = in->base.rect.height; + unsigned int out_right = out->base.rect.width; + unsigned int out_bottom = out->base.rect.height; + unsigned int flipped_out_left; + unsigned int flipped_out_top; + + if (ipu_rot_mode_is_irt(ctx->rot_mode)) { + /* Switch width/height and align top left to IRT block size */ + resized_width = out->base.rect.height; + resized_height = out->base.rect.width; + out_left_align = out_height_align; + out_top_align = out_width_align; + out_width_align = out_left_align; + out_height_align = out_top_align; + out_right = out->base.rect.height; + out_bottom = out->base.rect.width; + } + + for (col = in->num_cols - 1; col > 0; col--) { + bool allow_in_overshoot = ipu_rot_mode_is_irt(ctx->rot_mode) || + !(ctx->rot_mode & IPU_ROT_BIT_HFLIP); + bool allow_out_overshoot = (col < in->num_cols - 1) && + !(ctx->rot_mode & IPU_ROT_BIT_HFLIP); + unsigned int out_start; + unsigned int out_end; + unsigned int in_left; + unsigned int out_left; + + /* + * Align input width to burst length if the scaling step flips + * horizontally. + */ + + /* Start within 1024 pixels of the right edge */ + out_start = max_t(int, 0, out_right - 1024); + /* End before having to add more columns to the left */ + out_end = min_t(unsigned int, out_right, col * 1024); + + find_best_seam(ctx, out_start, out_end, + in_right, out_right, + in_left_align, out_left_align, + allow_in_overshoot ? 1 : 8 /* burst length */, + allow_out_overshoot ? 1 : out_width_align, + ctx->downsize_coeff_h, ctx->image_resize_coeff_h, + &in_left, &out_left); + + if (ctx->rot_mode & IPU_ROT_BIT_HFLIP) + flipped_out_left = resized_width - out_right; + else + flipped_out_left = out_left; + + fill_tile_column(ctx, col, in, in_left, in_right - in_left, + out, flipped_out_left, out_right - out_left); + + dev_dbg(dev, "%s: col %u: %u, %u -> %u, %u\n", __func__, col, + in_left, in_right - in_left, + flipped_out_left, out_right - out_left); + + in_right = in_left; + out_right = out_left; + } + + flipped_out_left = (ctx->rot_mode & IPU_ROT_BIT_HFLIP) ? + resized_width - out_right : 0; + + fill_tile_column(ctx, 0, in, 0, in_right, + out, flipped_out_left, out_right); + + dev_dbg(dev, "%s: col 0: 0, %u -> %u, %u\n", __func__, + in_right, flipped_out_left, out_right); + + for (row = in->num_rows - 1; row > 0; row--) { + bool allow_overshoot = row < in->num_rows - 1; + unsigned int out_start; + unsigned int out_end; + unsigned int in_top; + unsigned int out_top; + + /* Start within 1024 lines of the bottom edge */ + out_start = max_t(int, 0, out_bottom - 1024); + /* End before having to add more rows above */ + out_end = min_t(unsigned int, out_bottom, row * 1024); + + find_best_seam(ctx, out_start, out_end, + in_bottom, out_bottom, + in_top_align, out_top_align, + 1, allow_overshoot ? 1 : out_height_align, + ctx->downsize_coeff_v, ctx->image_resize_coeff_v, + &in_top, &out_top); + + if ((ctx->rot_mode & IPU_ROT_BIT_VFLIP) ^ + ipu_rot_mode_is_irt(ctx->rot_mode)) + flipped_out_top = resized_height - out_bottom; + else + flipped_out_top = out_top; + + fill_tile_row(ctx, row, in, in_top, in_bottom - in_top, + out, flipped_out_top, out_bottom - out_top); + + dev_dbg(dev, "%s: row %u: %u, %u -> %u, %u\n", __func__, row, + in_top, in_bottom - in_top, + flipped_out_top, out_bottom - out_top); + + in_bottom = in_top; + out_bottom = out_top; + } + + if ((ctx->rot_mode & IPU_ROT_BIT_VFLIP) ^ + ipu_rot_mode_is_irt(ctx->rot_mode)) + flipped_out_top = resized_height - out_bottom; + else + flipped_out_top = 0; + + fill_tile_row(ctx, 0, in, 0, in_bottom, + out, flipped_out_top, out_bottom); + + dev_dbg(dev, "%s: row 0: 0, %u -> %u, %u\n", __func__, + in_bottom, flipped_out_top, out_bottom); +} + static void calc_tile_dimensions(struct ipu_image_convert_ctx *ctx, struct ipu_image_convert_image *image) { unsigned int i; for (i = 0; i < ctx->num_tiles; i++) { - struct ipu_image_tile *tile = &image->tile[i]; + struct ipu_image_tile *tile; const unsigned int row = i / image->num_cols; const unsigned int col = i % image->num_cols; - tile->height = image->base.pix.height / image->num_rows; - tile->width = image->base.pix.width / image->num_cols; - tile->left = col * tile->width; - tile->top = row * tile->height; + if (image->type == IMAGE_CONVERT_OUT) + tile = &image->tile[ctx->out_tile_map[i]]; + else + tile = &image->tile[i]; + tile->size = ((tile->height * image->fmt->bpp) >> 3) * tile->width; @@ -1682,6 +2010,10 @@ ipu_image_convert_prepare(struct ipu_soc *ipu, enum ipu_ic_task ic_task, if (ret) goto out_free; + calc_out_tile_map(ctx); + + find_seams(ctx, s_image, d_image); + calc_tile_dimensions(ctx, s_image); ret = calc_tile_offsets(ctx, s_image); if (ret) @@ -1692,7 +2024,6 @@ ipu_image_convert_prepare(struct ipu_soc *ipu, enum ipu_ic_task ic_task, if (ret) goto out_free; - calc_out_tile_map(ctx); calc_tile_resize_coefficients(ctx); dump_format(ctx, s_image); -- cgit v1.2.1 From a3f42419e2a72b174a7d993debea85df7a56bd4b Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 18 Sep 2018 11:34:16 +0200 Subject: gpu: ipu-v3: image-convert: fix debug output for varying tile sizes Since tile dimensions now vary between tiles, add debug output for each tile's position and dimensions. Signed-off-by: Philipp Zabel Acked-by: Steve Longerbeam Tested-by: Steve Longerbeam --- drivers/gpu/ipu-v3/ipu-image-convert.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/ipu-v3') diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c index a674241dd0b8..0451d699f515 100644 --- a/drivers/gpu/ipu-v3/ipu-image-convert.c +++ b/drivers/gpu/ipu-v3/ipu-image-convert.c @@ -308,12 +308,11 @@ static void dump_format(struct ipu_image_convert_ctx *ctx, struct ipu_image_convert_priv *priv = chan->priv; dev_dbg(priv->ipu->dev, - "task %u: ctx %p: %s format: %dx%d (%dx%d tiles of size %dx%d), %c%c%c%c\n", + "task %u: ctx %p: %s format: %dx%d (%dx%d tiles), %c%c%c%c\n", chan->ic_task, ctx, ic_image->type == IMAGE_CONVERT_OUT ? "Output" : "Input", ic_image->base.pix.width, ic_image->base.pix.height, ic_image->num_cols, ic_image->num_rows, - ic_image->tile[0].width, ic_image->tile[0].height, ic_image->fmt->fourcc & 0xff, (ic_image->fmt->fourcc >> 8) & 0xff, (ic_image->fmt->fourcc >> 16) & 0xff, @@ -789,6 +788,8 @@ static void find_seams(struct ipu_image_convert_ctx *ctx, static void calc_tile_dimensions(struct ipu_image_convert_ctx *ctx, struct ipu_image_convert_image *image) { + struct ipu_image_convert_chan *chan = ctx->chan; + struct ipu_image_convert_priv *priv = chan->priv; unsigned int i; for (i = 0; i < ctx->num_tiles; i++) { @@ -813,6 +814,13 @@ static void calc_tile_dimensions(struct ipu_image_convert_ctx *ctx, tile->rot_stride = (image->fmt->bpp * tile->height) >> 3; } + + dev_dbg(priv->ipu->dev, + "task %u: ctx %p: %s@[%u,%u]: %ux%u@%u,%u\n", + chan->ic_task, ctx, + image->type == IMAGE_CONVERT_IN ? "Input" : "Output", + row, col, + tile->width, tile->height, tile->left, tile->top); } } -- cgit v1.2.1 From ff652fcf84f7a0977bbad4eab976f7813665fbc8 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 18 Sep 2018 11:34:17 +0200 Subject: gpu: ipu-v3: image-convert: relax alignment restrictions For the planar but U/V-packed formats NV12 and NV16, 8 pixel width alignment is good enough to fulfill the 8 byte stride requirement. If we allow the input 8-pixel DMA bursts to overshoot the end of the line, the only input alignment restrictions are dictated by the pixel format and 8-byte aligned line start address. Since different tile sizes are allowed, the output tile with / height alignment doesn't need to be multiplied by number of columns / rows. Signed-off-by: Philipp Zabel [slongerbeam@gmail.com: Bring in the fixes to format width and height alignment restrictions from imx-media-mem2mem.c.] Signed-off-by: Steve Longerbeam Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-image-convert.c | 81 +++++++++++++++++----------------- 1 file changed, 41 insertions(+), 40 deletions(-) (limited to 'drivers/gpu/ipu-v3') diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c index 0451d699f515..0829723a7599 100644 --- a/drivers/gpu/ipu-v3/ipu-image-convert.c +++ b/drivers/gpu/ipu-v3/ipu-image-convert.c @@ -551,31 +551,46 @@ static inline u32 tile_top_align(const struct ipu_image_pixfmt *fmt) return fmt->uv_height_dec > 1 ? 2 : 1; } -/* - * We have to adjust the tile width such that the tile physaddrs and - * U and V plane offsets are multiples of 8 bytes as required by - * the IPU DMA Controller. For the planar formats, this corresponds - * to a pixel alignment of 16 (but use a more formal equation since - * the variables are available). For all the packed formats, 8 is - * good enough. - */ -static inline u32 tile_width_align(const struct ipu_image_pixfmt *fmt) +static inline u32 tile_width_align(enum ipu_image_convert_type type, + const struct ipu_image_pixfmt *fmt, + enum ipu_rotate_mode rot_mode) { - return fmt->planar ? 8 * fmt->uv_width_dec : 8; + if (type == IMAGE_CONVERT_IN) { + /* + * The IC burst reads 8 pixels at a time. Reading beyond the + * end of the line is usually acceptable. Those pixels are + * ignored, unless the IC has to write the scaled line in + * reverse. + */ + return (!ipu_rot_mode_is_irt(rot_mode) && + (rot_mode & IPU_ROT_BIT_HFLIP)) ? 8 : 2; + } + + /* + * Align to 16x16 pixel blocks for planar 4:2:0 chroma subsampled + * formats to guarantee 8-byte aligned line start addresses in the + * chroma planes when IRT is used. Align to 8x8 pixel IRT block size + * for all other formats. + */ + return (ipu_rot_mode_is_irt(rot_mode) && + fmt->planar && !fmt->uv_packed) ? + 8 * fmt->uv_width_dec : 8; } -/* - * For tile height alignment, we have to ensure that the output tile - * heights are multiples of 8 lines if the IRT is required by the - * given rotation mode (the IRT performs rotations on 8x8 blocks - * at a time). If the IRT is not used, or for input image tiles, - * 2 lines are good enough. - */ static inline u32 tile_height_align(enum ipu_image_convert_type type, + const struct ipu_image_pixfmt *fmt, enum ipu_rotate_mode rot_mode) { - return (type == IMAGE_CONVERT_OUT && - ipu_rot_mode_is_irt(rot_mode)) ? 8 : 2; + if (type == IMAGE_CONVERT_IN || !ipu_rot_mode_is_irt(rot_mode)) + return 2; + + /* + * Align to 16x16 pixel blocks for planar 4:2:0 chroma subsampled + * formats to guarantee 8-byte aligned line start addresses in the + * chroma planes when IRT is used. Align to 8x8 pixel IRT block size + * for all other formats. + */ + return (fmt->planar && !fmt->uv_packed) ? 8 * fmt->uv_width_dec : 8; } /* @@ -661,8 +676,9 @@ static void find_seams(struct ipu_image_convert_ctx *ctx, unsigned int in_top_align = tile_top_align(in->fmt); unsigned int out_left_align = tile_left_align(out->fmt); unsigned int out_top_align = tile_top_align(out->fmt); - unsigned int out_width_align = tile_width_align(out->fmt); - unsigned int out_height_align = tile_height_align(out->type, + unsigned int out_width_align = tile_width_align(out->type, out->fmt, + ctx->rot_mode); + unsigned int out_height_align = tile_height_align(out->type, out->fmt, ctx->rot_mode); unsigned int in_right = in->base.rect.width; unsigned int in_bottom = in->base.rect.height; @@ -1855,8 +1871,6 @@ void ipu_image_convert_adjust(struct ipu_image *in, struct ipu_image *out, enum ipu_rotate_mode rot_mode) { const struct ipu_image_pixfmt *infmt, *outfmt; - unsigned int num_in_rows, num_in_cols; - unsigned int num_out_rows, num_out_cols; u32 w_align, h_align; infmt = get_format(in->pix.pixelformat); @@ -1888,28 +1902,15 @@ void ipu_image_convert_adjust(struct ipu_image *in, struct ipu_image *out, in->pix.height / 4); } - /* get tiling rows/cols from output format */ - num_out_rows = num_stripes(out->pix.height); - num_out_cols = num_stripes(out->pix.width); - if (ipu_rot_mode_is_irt(rot_mode)) { - num_in_rows = num_out_cols; - num_in_cols = num_out_rows; - } else { - num_in_rows = num_out_rows; - num_in_cols = num_out_cols; - } - /* align input width/height */ - w_align = ilog2(tile_width_align(infmt) * num_in_cols); - h_align = ilog2(tile_height_align(IMAGE_CONVERT_IN, rot_mode) * - num_in_rows); + w_align = ilog2(tile_width_align(IMAGE_CONVERT_IN, infmt, rot_mode)); + h_align = ilog2(tile_height_align(IMAGE_CONVERT_IN, infmt, rot_mode)); in->pix.width = clamp_align(in->pix.width, MIN_W, MAX_W, w_align); in->pix.height = clamp_align(in->pix.height, MIN_H, MAX_H, h_align); /* align output width/height */ - w_align = ilog2(tile_width_align(outfmt) * num_out_cols); - h_align = ilog2(tile_height_align(IMAGE_CONVERT_OUT, rot_mode) * - num_out_rows); + w_align = ilog2(tile_width_align(IMAGE_CONVERT_OUT, outfmt, rot_mode)); + h_align = ilog2(tile_height_align(IMAGE_CONVERT_OUT, outfmt, rot_mode)); out->pix.width = clamp_align(out->pix.width, MIN_W, MAX_W, w_align); out->pix.height = clamp_align(out->pix.height, MIN_H, MAX_H, h_align); -- cgit v1.2.1 From d966e23d61a2c3769ed0c0a3c6e20b300a313317 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 18 Sep 2018 11:34:18 +0200 Subject: gpu: ipu-v3: image-convert: fix bytesperline adjustment For planar formats, bytesperline does not depend on BPP. It must always be larger than width and aligned to tile width alignment restrictions. The input bytesperline to ipu_image_convert_adjust() may be uninitialized, so don't rely on input bytesperline as the minimum value for clamp_align(). Use 2 << w_align as the minimum instead. Signed-off-by: Philipp Zabel [slongerbeam@gmail.com: clamp input bytesperline] Signed-off-by: Steve Longerbeam Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-image-convert.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/ipu-v3') diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c index 0829723a7599..b735065fe288 100644 --- a/drivers/gpu/ipu-v3/ipu-image-convert.c +++ b/drivers/gpu/ipu-v3/ipu-image-convert.c @@ -1915,10 +1915,18 @@ void ipu_image_convert_adjust(struct ipu_image *in, struct ipu_image *out, out->pix.height = clamp_align(out->pix.height, MIN_H, MAX_H, h_align); /* set input/output strides and image sizes */ - in->pix.bytesperline = (in->pix.width * infmt->bpp) >> 3; - in->pix.sizeimage = in->pix.height * in->pix.bytesperline; - out->pix.bytesperline = (out->pix.width * outfmt->bpp) >> 3; - out->pix.sizeimage = out->pix.height * out->pix.bytesperline; + in->pix.bytesperline = infmt->planar ? + clamp_align(in->pix.width, 2 << w_align, MAX_W, w_align) : + clamp_align((in->pix.width * infmt->bpp) >> 3, + 2 << w_align, MAX_W, w_align); + in->pix.sizeimage = infmt->planar ? + (in->pix.height * in->pix.bytesperline * infmt->bpp) >> 3 : + in->pix.height * in->pix.bytesperline; + out->pix.bytesperline = outfmt->planar ? out->pix.width : + (out->pix.width * outfmt->bpp) >> 3; + out->pix.sizeimage = outfmt->planar ? + (out->pix.height * out->pix.bytesperline * outfmt->bpp) >> 3 : + out->pix.height * out->pix.bytesperline; } EXPORT_SYMBOL_GPL(ipu_image_convert_adjust); -- cgit v1.2.1 From e46279f097d44581d9474dd51963657b9d9feb6f Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 18 Sep 2018 11:34:19 +0200 Subject: gpu: ipu-v3: image-convert: add some ASCII art to the exposition Visualize the scaling and rotation pipeline with some ASCII art diagrams. Remove the FIXME comment about missing seam prevention. Signed-off-by: Philipp Zabel Acked-by: Steve Longerbeam --- drivers/gpu/ipu-v3/ipu-image-convert.c | 39 +++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/ipu-v3') diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c index b735065fe288..91fe8f1672b4 100644 --- a/drivers/gpu/ipu-v3/ipu-image-convert.c +++ b/drivers/gpu/ipu-v3/ipu-image-convert.c @@ -37,17 +37,36 @@ * when double_buffering boolean is set). * * Note that the input frame must be split up into the same number - * of tiles as the output frame. + * of tiles as the output frame: * - * FIXME: at this point there is no attempt to deal with visible seams - * at the tile boundaries when upscaling. The seams are caused by a reset - * of the bilinear upscale interpolation when starting a new tile. The - * seams are barely visible for small upscale factors, but become - * increasingly visible as the upscale factor gets larger, since more - * interpolated pixels get thrown out at the tile boundaries. A possilble - * fix might be to overlap tiles of different sizes, but this must be done - * while also maintaining the IDMAC dma buffer address alignment and 8x8 IRT - * alignment restrictions of each tile. + * +---------+-----+ + * +-----+---+ | A | B | + * | A | B | | | | + * +-----+---+ --> +---------+-----+ + * | C | D | | C | D | + * +-----+---+ | | | + * +---------+-----+ + * + * Clockwise 90° rotations are handled by first rescaling into a + * reusable temporary tile buffer and then rotating with the 8x8 + * block rotator, writing to the correct destination: + * + * +-----+-----+ + * | | | + * +-----+---+ +---------+ | C | A | + * | A | B | | A,B, | | | | | + * +-----+---+ --> | C,D | | --> | | | + * | C | D | +---------+ +-----+-----+ + * +-----+---+ | D | B | + * | | | + * +-----+-----+ + * + * If the 8x8 block rotator is used, horizontal or vertical flipping + * is done during the rotation step, otherwise flipping is done + * during the scaling step. + * With rotation or flipping, tile order changes between input and + * output image. Tiles are numbered row major from top left to bottom + * right for both input and output image. */ #define MAX_STRIPES_W 4 -- cgit v1.2.1 From f1ef14f30f058f063dfdab32d1e4b9536294e0ed Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 18 Sep 2018 11:34:20 +0200 Subject: gpu: ipu-v3: image-convert: disable double buffering if necessary Double-buffering only works if tile sizes are the same and the resizing coefficient does not change between tiles, even for non-planar formats. Signed-off-by: Philipp Zabel Acked-by: Steve Longerbeam Tested-by: Steve Longerbeam --- drivers/gpu/ipu-v3/ipu-image-convert.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/ipu-v3') diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c index 91fe8f1672b4..3e73494d5930 100644 --- a/drivers/gpu/ipu-v3/ipu-image-convert.c +++ b/drivers/gpu/ipu-v3/ipu-image-convert.c @@ -1990,6 +1990,7 @@ ipu_image_convert_prepare(struct ipu_soc *ipu, enum ipu_ic_task ic_task, struct ipu_image_convert_chan *chan; struct ipu_image_convert_ctx *ctx; unsigned long flags; + unsigned int i; bool get_res; int ret; @@ -2077,15 +2078,37 @@ ipu_image_convert_prepare(struct ipu_soc *ipu, enum ipu_ic_task ic_task, * for every tile, and therefore would have to be updated for * each buffer which is not possible. So double-buffering is * impossible when either the source or destination images are - * a planar format (YUV420, YUV422P, etc.). + * a planar format (YUV420, YUV422P, etc.). Further, differently + * sized tiles or different resizing coefficients per tile + * prevent double-buffering as well. */ ctx->double_buffering = (ctx->num_tiles > 1 && !s_image->fmt->planar && !d_image->fmt->planar); + for (i = 1; i < ctx->num_tiles; i++) { + if (ctx->in.tile[i].width != ctx->in.tile[0].width || + ctx->in.tile[i].height != ctx->in.tile[0].height || + ctx->out.tile[i].width != ctx->out.tile[0].width || + ctx->out.tile[i].height != ctx->out.tile[0].height) { + ctx->double_buffering = false; + break; + } + } + for (i = 1; i < ctx->in.num_cols; i++) { + if (ctx->resize_coeffs_h[i] != ctx->resize_coeffs_h[0]) { + ctx->double_buffering = false; + break; + } + } + for (i = 1; i < ctx->in.num_rows; i++) { + if (ctx->resize_coeffs_v[i] != ctx->resize_coeffs_v[0]) { + ctx->double_buffering = false; + break; + } + } if (ipu_rot_mode_is_irt(ctx->rot_mode)) { unsigned long intermediate_size = d_image->tile[0].size; - unsigned int i; for (i = 1; i < ctx->num_tiles; i++) { if (d_image->tile[i].size > intermediate_size) -- cgit v1.2.1 From 815b02e3c05dcfae759a96903025beb5dab85f97 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 18 Sep 2018 11:34:21 +0200 Subject: gpu: ipu-v3: image-convert: allow three rows or columns If width or height are in the [2049, 3072] range, allow to use just three tiles in this dimension, instead of four. Signed-off-by: Philipp Zabel Acked-by: Steve Longerbeam Tested-by: Steve Longerbeam --- drivers/gpu/ipu-v3/ipu-image-convert.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'drivers/gpu/ipu-v3') diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c index 3e73494d5930..13103ab86050 100644 --- a/drivers/gpu/ipu-v3/ipu-image-convert.c +++ b/drivers/gpu/ipu-v3/ipu-image-convert.c @@ -379,12 +379,7 @@ static int alloc_dma_buf(struct ipu_image_convert_priv *priv, static inline int num_stripes(int dim) { - if (dim <= 1024) - return 1; - else if (dim <= 2048) - return 2; - else - return 4; + return (dim - 1) / 1024 + 1; } /* -- cgit v1.2.1