From e200d86a68a634d039985ef87ce5ae0aad52feb7 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 30 Jun 2015 11:15:44 -0700 Subject: drm/vc4: Add support for async pageflips. This should be the fastest screen update path we have, but it doesn't seem to have really improved performance with vblank_mode=0 fullscreen. Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_crtc.c | 99 ++++++++++++++++++++++++++++++++++++++++- drivers/gpu/drm/vc4/vc4_drv.h | 1 + drivers/gpu/drm/vc4/vc4_gem.c | 6 --- drivers/gpu/drm/vc4/vc4_kms.c | 1 + drivers/gpu/drm/vc4/vc4_plane.c | 40 +++++++++++++++++ 5 files changed, 140 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index f9960ac34fd7..446f8b3aa436 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -10,8 +10,10 @@ * Controls the timings of the hardware's pixel valve. */ +#include "drm_atomic.h" #include "drm_atomic_helper.h" #include "drm_crtc_helper.h" +#include "drm_fb_cma_helper.h" #include "linux/component.h" #include "vc4_drv.h" #include "vc4_regs.h" @@ -329,10 +331,105 @@ static irqreturn_t vc4_crtc_irq_handler(int irq, void *data) return ret; } +struct vc4_async_flip_state { + struct drm_crtc *crtc; + struct drm_framebuffer *fb; + struct drm_pending_vblank_event *event; + + struct vc4_seqno_cb cb; +}; + +/* Called when the V3D execution for the BO being flipped to is done, so that + * we can actually update the plane's address to point to it. + */ +static void +vc4_async_page_flip_complete(struct vc4_seqno_cb *cb) +{ + struct vc4_async_flip_state *flip_state = + container_of(cb, struct vc4_async_flip_state, cb); + struct drm_crtc *crtc = flip_state->crtc; + struct drm_device *dev = crtc->dev; + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct drm_plane *plane = crtc->primary; + + vc4_plane_async_set_fb(plane, flip_state->fb); + if (flip_state->event) { + unsigned long flags; + spin_lock_irqsave(&dev->event_lock, flags); + drm_crtc_send_vblank_event(crtc, flip_state->event); + spin_unlock_irqrestore(&dev->event_lock, flags); + } + + drm_framebuffer_unreference(flip_state->fb); + kfree(flip_state); + + up(&vc4->async_modeset); +} + +/* Implements async (non-vblank-synced) page flips. + * + * The page flip ioctl needs to return immediately, so we grab the + * modeset semaphore on the pipe, and queue the address update for + * when V3D is done with the BO being flipped to. + */ +static int vc4_async_page_flip(struct drm_crtc *crtc, + struct drm_framebuffer *fb, + struct drm_pending_vblank_event *event, + uint32_t flags) +{ + struct drm_device *dev = crtc->dev; + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct drm_plane *plane = crtc->primary; + int ret = 0; + struct vc4_async_flip_state *flip_state; + struct drm_gem_cma_object *cma_bo = drm_fb_cma_get_gem_obj(fb, 0); + struct vc4_bo *bo = to_vc4_bo(&cma_bo->base); + + flip_state = kzalloc(sizeof(*flip_state), GFP_KERNEL); + if (!flip_state) + return -ENOMEM; + + drm_framebuffer_reference(fb); + flip_state->fb = fb; + flip_state->crtc = crtc; + flip_state->event = event; + + /* Make sure all other async modesetes have landed. */ + ret = down_interruptible(&vc4->async_modeset); + if (ret) { + kfree(flip_state); + return ret; + } + + /* Immediately update the plane's legacy fb pointer, so that later + * modeset prep sees the state that will be present when the semaphore + * is released. + */ + drm_atomic_set_fb_for_plane(plane->state, fb); + plane->fb = fb; + + vc4_queue_seqno_cb(dev, &flip_state->cb, bo->seqno, + vc4_async_page_flip_complete); + + /* Driver takes ownership of state on successful async commit. */ + return 0; +} + +static int vc4_page_flip(struct drm_crtc *crtc, + struct drm_framebuffer *fb, + struct drm_pending_vblank_event *event, + uint32_t flags) +{ + if (flags & DRM_MODE_PAGE_FLIP_ASYNC) + return vc4_async_page_flip(crtc, fb, event, flags); + else + return drm_atomic_helper_page_flip(crtc, fb, event, flags); +} + static const struct drm_crtc_funcs vc4_crtc_funcs = { .set_config = drm_atomic_helper_set_config, .destroy = vc4_crtc_destroy, - .page_flip = drm_atomic_helper_page_flip, + .page_flip = vc4_page_flip, .set_property = NULL, .cursor_set = NULL, /* handled by drm_mode_cursor_universal */ .cursor_move = NULL, /* handled by drm_mode_cursor_universal */ diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index 13b475785ca0..c2db6bd57352 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -431,6 +431,7 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev, enum drm_plane_type type); u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist); u32 vc4_plane_dlist_size(struct drm_plane_state *state); +void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb); /* vc4_v3d.c */ void vc4_v3d_register(void); diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 577cc6d3840f..ca3955c5ea79 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -636,12 +636,6 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, mutex_unlock(&dev->struct_mutex); - /* To keep any client from getting too far ahead (particularly - * a problem when BO caching is involved), we wait on the - * previous rendering before returning to userspace. - */ - vc4_wait_for_seqno(dev, args->seqno - 1, ~0ull, true); - return 0; fail: diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index acfd1b23ab4a..44836d51145d 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -207,6 +207,7 @@ vc4_kms_load(struct drm_device *dev) dev->mode_config.max_height = 2048; dev->mode_config.funcs = &vc4_mode_funcs; dev->mode_config.preferred_depth = 24; + dev->mode_config.async_page_flip = true; ret = vc4_init_modeset_objects(dev); if (ret) diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 0c8afb17314a..0d703154b29d 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -21,6 +21,14 @@ struct vc4_plane_state { u32 *dlist; u32 dlist_size; /* Number of dwords in allocated for the display list */ u32 dlist_count; /* Number of used dwords in the display list. */ + + /* Offset in the dlist to pointer word 0. */ + u32 pw0_offset; + + /* Offset where the plane's dlist was last stored in the + hardware at vc4_crtc_atomic_flush() time. + */ + u32 *hw_dlist; }; static inline struct vc4_plane_state * @@ -191,6 +199,8 @@ vc4_plane_mode_set(struct drm_plane *plane, struct drm_plane_state *state) /* Position Word 3: Context. Written by the HVS. */ vc4_dlist_write(vc4_state, 0xc0c0c0c0); + vc4_state->pw0_offset = vc4_state->dlist_count; + /* Pointer Word 0: RGB / Y Pointer */ vc4_dlist_write(vc4_state, bo->paddr + offset); @@ -244,6 +254,8 @@ u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist) struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state); int i; + vc4_state->hw_dlist = dlist; + /* Can't memcpy_toio() because it needs to be 32-bit writes. */ for (i = 0; i < vc4_state->dlist_count; i++) writel(vc4_state->dlist[i], &dlist[i]); @@ -258,6 +270,34 @@ u32 vc4_plane_dlist_size(struct drm_plane_state *state) return vc4_state->dlist_count; } +/* Updates the plane to immediately (well, once the FIFO needs + * refilling) scan out from at a new framebuffer. + */ +void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb) +{ + struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state); + struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0); + uint32_t addr; + + /* We're skipping the address adjustment for negative origin, + * because this is only called on the primary plane. + */ + WARN_ON_ONCE(plane->state->crtc_x < 0 || plane->state->crtc_y < 0); + addr = bo->paddr + fb->offsets[0]; + + /* Write the new address into the hardware immediately. The + * scanout will start from this address as soon as the FIFO + * needs to refill with pixels. + */ + writel(addr, &vc4_state->hw_dlist[vc4_state->pw0_offset]); + + /* Also update the CPU-side dlist copy, so that any later + * atomic updates that don't do a new modeset on our plane + * also use our updated address. + */ + vc4_state->dlist[vc4_state->pw0_offset] = addr; +} + static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = { .prepare_fb = NULL, .cleanup_fb = NULL, -- cgit v1.2.1