diff options
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
-rw-r--r-- | drivers/gpu/drm/i915/intel_ringbuffer.c | 152 |
1 files changed, 133 insertions, 19 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 1aef516cc6fa..ecbc5c5dbbbc 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -261,6 +261,83 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring, return 0; } +static int +gen7_render_ring_cs_stall_wa(struct intel_ring_buffer *ring) +{ + int ret; + + ret = intel_ring_begin(ring, 4); + if (ret) + return ret; + + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); + intel_ring_emit(ring, PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_STALL_AT_SCOREBOARD); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_advance(ring); + + return 0; +} + +static int +gen7_render_ring_flush(struct intel_ring_buffer *ring, + u32 invalidate_domains, u32 flush_domains) +{ + u32 flags = 0; + struct pipe_control *pc = ring->private; + u32 scratch_addr = pc->gtt_offset + 128; + int ret; + + /* + * Ensure that any following seqno writes only happen when the render + * cache is indeed flushed. + * + * Workaround: 4th PIPE_CONTROL command (except the ones with only + * read-cache invalidate bits set) must have the CS_STALL bit set. We + * don't try to be clever and just set it unconditionally. + */ + flags |= PIPE_CONTROL_CS_STALL; + + /* Just flush everything. Experiments have shown that reducing the + * number of bits based on the write domains has little performance + * impact. + */ + if (flush_domains) { + flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; + flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + } + if (invalidate_domains) { + flags |= PIPE_CONTROL_TLB_INVALIDATE; + flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; + /* + * TLB invalidate requires a post-sync write. + */ + flags |= PIPE_CONTROL_QW_WRITE; + + /* Workaround: we must issue a pipe_control with CS-stall bit + * set before a pipe_control command that has the state cache + * invalidate bit set. */ + gen7_render_ring_cs_stall_wa(ring); + } + + ret = intel_ring_begin(ring, 4); + if (ret) + return ret; + + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); + intel_ring_emit(ring, flags); + intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); + intel_ring_emit(ring, 0); + intel_ring_advance(ring); + + return 0; +} + static void ring_write_tail(struct intel_ring_buffer *ring, u32 value) { @@ -381,12 +458,12 @@ init_pipe_control(struct intel_ring_buffer *ring) i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); - ret = i915_gem_object_pin(obj, 4096, true); + ret = i915_gem_object_pin(obj, 4096, true, false); if (ret) goto err_unref; pc->gtt_offset = obj->gtt_offset; - pc->cpu_page = kmap(obj->pages[0]); + pc->cpu_page = kmap(sg_page(obj->pages->sgl)); if (pc->cpu_page == NULL) goto err_unpin; @@ -413,7 +490,8 @@ cleanup_pipe_control(struct intel_ring_buffer *ring) return; obj = pc->obj; - kunmap(obj->pages[0]); + + kunmap(sg_page(obj->pages->sgl)); i915_gem_object_unpin(obj); drm_gem_object_unreference(&obj->base); @@ -461,7 +539,7 @@ static int init_render_ring(struct intel_ring_buffer *ring) if (INTEL_INFO(dev)->gen >= 6) I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); - if (IS_IVYBRIDGE(dev)) + if (HAS_L3_GPU_CACHE(dev)) I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR); return ret; @@ -627,26 +705,24 @@ pc_render_add_request(struct intel_ring_buffer *ring, } static u32 -gen6_ring_get_seqno(struct intel_ring_buffer *ring) +gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) { - struct drm_device *dev = ring->dev; - /* Workaround to force correct ordering between irq and seqno writes on * ivb (and maybe also on snb) by reading from a CS register (like * ACTHD) before reading the status page. */ - if (IS_GEN6(dev) || IS_GEN7(dev)) + if (!lazy_coherency) intel_ring_get_active_head(ring); return intel_read_status_page(ring, I915_GEM_HWS_INDEX); } static u32 -ring_get_seqno(struct intel_ring_buffer *ring) +ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) { return intel_read_status_page(ring, I915_GEM_HWS_INDEX); } static u32 -pc_render_get_seqno(struct intel_ring_buffer *ring) +pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency) { struct pipe_control *pc = ring->private; return pc->cpu_page[0]; @@ -851,7 +927,7 @@ gen6_ring_get_irq(struct intel_ring_buffer *ring) spin_lock_irqsave(&dev_priv->irq_lock, flags); if (ring->irq_refcount++ == 0) { - if (IS_IVYBRIDGE(dev) && ring->id == RCS) + if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS) I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | GEN6_RENDER_L3_PARITY_ERROR)); else @@ -874,7 +950,7 @@ gen6_ring_put_irq(struct intel_ring_buffer *ring) spin_lock_irqsave(&dev_priv->irq_lock, flags); if (--ring->irq_refcount == 0) { - if (IS_IVYBRIDGE(dev) && ring->id == RCS) + if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS) I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR); else I915_WRITE_IMR(ring, ~0); @@ -950,7 +1026,7 @@ static void cleanup_status_page(struct intel_ring_buffer *ring) if (obj == NULL) return; - kunmap(obj->pages[0]); + kunmap(sg_page(obj->pages->sgl)); i915_gem_object_unpin(obj); drm_gem_object_unreference(&obj->base); ring->status_page.obj = NULL; @@ -971,13 +1047,13 @@ static int init_status_page(struct intel_ring_buffer *ring) i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); - ret = i915_gem_object_pin(obj, 4096, true); + ret = i915_gem_object_pin(obj, 4096, true, false); if (ret != 0) { goto err_unref; } ring->status_page.gfx_addr = obj->gtt_offset; - ring->status_page.page_addr = kmap(obj->pages[0]); + ring->status_page.page_addr = kmap(sg_page(obj->pages->sgl)); if (ring->status_page.page_addr == NULL) { ret = -ENOMEM; goto err_unpin; @@ -1009,7 +1085,6 @@ static int intel_init_ring_buffer(struct drm_device *dev, ring->dev = dev; INIT_LIST_HEAD(&ring->active_list); INIT_LIST_HEAD(&ring->request_list); - INIT_LIST_HEAD(&ring->gpu_write_list); ring->size = 32 * PAGE_SIZE; init_waitqueue_head(&ring->irq_queue); @@ -1029,7 +1104,7 @@ static int intel_init_ring_buffer(struct drm_device *dev, ring->obj = obj; - ret = i915_gem_object_pin(obj, PAGE_SIZE, true); + ret = i915_gem_object_pin(obj, PAGE_SIZE, true, false); if (ret) goto err_unref; @@ -1378,7 +1453,9 @@ int intel_init_render_ring_buffer(struct drm_device *dev) if (INTEL_INFO(dev)->gen >= 6) { ring->add_request = gen6_add_request; - ring->flush = gen6_render_ring_flush; + ring->flush = gen7_render_ring_flush; + if (INTEL_INFO(dev)->gen == 6) + ring->flush = gen6_render_ring_flush; ring->irq_get = gen6_ring_get_irq; ring->irq_put = gen6_ring_put_irq; ring->irq_enable_mask = GT_USER_INTERRUPT; @@ -1480,7 +1557,6 @@ int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size) ring->dev = dev; INIT_LIST_HEAD(&ring->active_list); INIT_LIST_HEAD(&ring->request_list); - INIT_LIST_HEAD(&ring->gpu_write_list); ring->size = size; ring->effective_size = ring->size; @@ -1573,3 +1649,41 @@ int intel_init_blt_ring_buffer(struct drm_device *dev) return intel_init_ring_buffer(dev, ring); } + +int +intel_ring_flush_all_caches(struct intel_ring_buffer *ring) +{ + int ret; + + if (!ring->gpu_caches_dirty) + return 0; + + ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS); + if (ret) + return ret; + + trace_i915_gem_ring_flush(ring, 0, I915_GEM_GPU_DOMAINS); + + ring->gpu_caches_dirty = false; + return 0; +} + +int +intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring) +{ + uint32_t flush_domains; + int ret; + + flush_domains = 0; + if (ring->gpu_caches_dirty) + flush_domains = I915_GEM_GPU_DOMAINS; + + ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains); + if (ret) + return ret; + + trace_i915_gem_ring_flush(ring, I915_GEM_GPU_DOMAINS, flush_domains); + + ring->gpu_caches_dirty = false; + return 0; +} |