diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 861 |
1 files changed, 298 insertions, 563 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 7a918d1c12ba..4d631a946481 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -46,11 +46,6 @@ static void i915_gem_object_retire__write(struct drm_i915_gem_object *obj); static void i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring); -static void i915_gem_write_fence(struct drm_device *dev, int reg, - struct drm_i915_gem_object *obj); -static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, - struct drm_i915_fence_reg *fence, - bool enable); static bool cpu_cache_is_coherent(struct drm_device *dev, enum i915_cache_level level) @@ -66,18 +61,6 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) return obj->pin_display; } -static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) -{ - if (obj->tiling_mode) - i915_gem_release_mmap(obj); - - /* As we do not have an associated fence register, we will force - * a tiling change if we ever need to acquire one. - */ - obj->fence_dirty = false; - obj->fence_reg = I915_FENCE_REG_NONE; -} - /* some bookkeeping */ static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, size_t size) @@ -149,14 +132,18 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, { struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_get_aperture *args = data; - struct drm_i915_gem_object *obj; + struct i915_gtt *ggtt = &dev_priv->gtt; + struct i915_vma *vma; size_t pinned; pinned = 0; mutex_lock(&dev->struct_mutex); - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) - if (i915_gem_obj_is_pinned(obj)) - pinned += i915_gem_obj_ggtt_size(obj); + list_for_each_entry(vma, &ggtt->base.active_list, mm_list) + if (vma->pin_count) + pinned += vma->node.size; + list_for_each_entry(vma, &ggtt->base.inactive_list, mm_list) + if (vma->pin_count) + pinned += vma->node.size; mutex_unlock(&dev->struct_mutex); args->aper_size = dev_priv->gtt.base.total; @@ -347,7 +334,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, if (ret) return ret; - intel_fb_obj_invalidate(obj, NULL, ORIGIN_CPU); + intel_fb_obj_invalidate(obj, ORIGIN_CPU); if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { unsigned long unwritten; @@ -368,7 +355,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, i915_gem_chipset_flush(dev); out: - intel_fb_obj_flush(obj, false); + intel_fb_obj_flush(obj, false, ORIGIN_CPU); return ret; } @@ -801,7 +788,7 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, offset = i915_gem_obj_ggtt_offset(obj) + args->offset; - intel_fb_obj_invalidate(obj, NULL, ORIGIN_GTT); + intel_fb_obj_invalidate(obj, ORIGIN_GTT); while (remain > 0) { /* Operation in this page @@ -832,7 +819,7 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, } out_flush: - intel_fb_obj_flush(obj, false); + intel_fb_obj_flush(obj, false, ORIGIN_GTT); out_unpin: i915_gem_object_ggtt_unpin(obj); out: @@ -945,7 +932,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, if (ret) return ret; - intel_fb_obj_invalidate(obj, NULL, ORIGIN_CPU); + intel_fb_obj_invalidate(obj, ORIGIN_CPU); i915_gem_object_pin_pages(obj); @@ -1025,7 +1012,7 @@ out: if (needs_clflush_after) i915_gem_chipset_flush(dev); - intel_fb_obj_flush(obj, false); + intel_fb_obj_flush(obj, false, ORIGIN_CPU); return ret; } @@ -1146,23 +1133,6 @@ i915_gem_check_wedge(struct i915_gpu_error *error, return 0; } -/* - * Compare arbitrary request against outstanding lazy request. Emit on match. - */ -int -i915_gem_check_olr(struct drm_i915_gem_request *req) -{ - int ret; - - WARN_ON(!mutex_is_locked(&req->ring->dev->struct_mutex)); - - ret = 0; - if (req == req->ring->outstanding_lazy_request) - ret = i915_add_request(req->ring); - - return ret; -} - static void fake_irq(unsigned long data) { wake_up_process((struct task_struct *)data); @@ -1334,6 +1304,33 @@ out: return ret; } +int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, + struct drm_file *file) +{ + struct drm_i915_private *dev_private; + struct drm_i915_file_private *file_priv; + + WARN_ON(!req || !file || req->file_priv); + + if (!req || !file) + return -EINVAL; + + if (req->file_priv) + return -EINVAL; + + dev_private = req->ring->dev->dev_private; + file_priv = file->driver_priv; + + spin_lock(&file_priv->mm.lock); + req->file_priv = file_priv; + list_add_tail(&req->client_list, &file_priv->mm.request_list); + spin_unlock(&file_priv->mm.lock); + + req->pid = get_pid(task_pid(current)); + + return 0; +} + static inline void i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) { @@ -1346,6 +1343,9 @@ i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) list_del(&request->client_list); request->file_priv = NULL; spin_unlock(&file_priv->mm.lock); + + put_pid(request->pid); + request->pid = NULL; } static void i915_gem_request_retire(struct drm_i915_gem_request *request) @@ -1365,8 +1365,6 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) list_del_init(&request->list); i915_gem_request_remove_from_client(request); - put_pid(request->pid); - i915_gem_request_unreference(request); } @@ -1415,10 +1413,6 @@ i915_wait_request(struct drm_i915_gem_request *req) if (ret) return ret; - ret = i915_gem_check_olr(req); - if (ret) - return ret; - ret = __i915_wait_request(req, atomic_read(&dev_priv->gpu_error.reset_counter), interruptible, NULL, NULL); @@ -1518,10 +1512,6 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, if (req == NULL) return 0; - ret = i915_gem_check_olr(req); - if (ret) - goto err; - requests[n++] = i915_gem_request_reference(req); } else { for (i = 0; i < I915_NUM_RINGS; i++) { @@ -1531,10 +1521,6 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, if (req == NULL) continue; - ret = i915_gem_check_olr(req); - if (ret) - goto err; - requests[n++] = i915_gem_request_reference(req); } } @@ -1545,7 +1531,6 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, NULL, rps); mutex_lock(&dev->struct_mutex); -err: for (i = 0; i < n; i++) { if (ret == 0) i915_gem_object_retire_request(obj, requests[i]); @@ -1613,6 +1598,11 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, else ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); + if (write_domain != 0) + intel_fb_obj_invalidate(obj, + write_domain == I915_GEM_DOMAIN_GTT ? + ORIGIN_GTT : ORIGIN_CPU); + unref: drm_gem_object_unreference(&obj->base); unlock: @@ -2349,9 +2339,12 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj) } void i915_vma_move_to_active(struct i915_vma *vma, - struct intel_engine_cs *ring) + struct drm_i915_gem_request *req) { struct drm_i915_gem_object *obj = vma->obj; + struct intel_engine_cs *ring; + + ring = i915_gem_request_get_ring(req); /* Add a reference if we're newly entering the active list. */ if (obj->active == 0) @@ -2359,8 +2352,7 @@ void i915_vma_move_to_active(struct i915_vma *vma, obj->active |= intel_ring_flag(ring); list_move_tail(&obj->ring_list[ring->id], &ring->active_list); - i915_gem_request_assign(&obj->last_read_req[ring->id], - intel_ring_get_request(ring)); + i915_gem_request_assign(&obj->last_read_req[ring->id], req); list_move_tail(&vma->mm_list, &vma->vm->active_list); } @@ -2372,7 +2364,7 @@ i915_gem_object_retire__write(struct drm_i915_gem_object *obj) RQ_BUG_ON(!(obj->active & intel_ring_flag(obj->last_write_req->ring))); i915_gem_request_assign(&obj->last_write_req, NULL); - intel_fb_obj_flush(obj, true); + intel_fb_obj_flush(obj, true, ORIGIN_CS); } static void @@ -2393,6 +2385,13 @@ i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) if (obj->active) return; + /* Bump our place on the bound list to keep it roughly in LRU order + * so that we don't steal from recently used but inactive objects + * (unless we are forced to ofc!) + */ + list_move_tail(&obj->global_list, + &to_i915(obj->base.dev)->mm.bound_list); + list_for_each_entry(vma, &obj->vma_list, vma_link) { if (!list_empty(&vma->mm_list)) list_move_tail(&vma->mm_list, &vma->vm->inactive_list); @@ -2472,24 +2471,34 @@ i915_gem_get_seqno(struct drm_device *dev, u32 *seqno) return 0; } -int __i915_add_request(struct intel_engine_cs *ring, - struct drm_file *file, - struct drm_i915_gem_object *obj) +/* + * NB: This function is not allowed to fail. Doing so would mean the the + * request is not being tracked for completion but the work itself is + * going to happen on the hardware. This would be a Bad Thing(tm). + */ +void __i915_add_request(struct drm_i915_gem_request *request, + struct drm_i915_gem_object *obj, + bool flush_caches) { - struct drm_i915_private *dev_priv = ring->dev->dev_private; - struct drm_i915_gem_request *request; + struct intel_engine_cs *ring; + struct drm_i915_private *dev_priv; struct intel_ringbuffer *ringbuf; u32 request_start; int ret; - request = ring->outstanding_lazy_request; if (WARN_ON(request == NULL)) - return -ENOMEM; + return; - if (i915.enable_execlists) { - ringbuf = request->ctx->engine[ring->id].ringbuf; - } else - ringbuf = ring->buffer; + ring = request->ring; + dev_priv = ring->dev->dev_private; + ringbuf = request->ringbuf; + + /* + * To ensure that this call will not fail, space for its emissions + * should already have been reserved in the ring buffer. Let the ring + * know that it is time to use that space up. + */ + intel_ring_reserved_space_use(ringbuf); request_start = intel_ring_get_tail(ringbuf); /* @@ -2499,14 +2508,13 @@ int __i915_add_request(struct intel_engine_cs *ring, * is that the flush _must_ happen before the next request, no matter * what. */ - if (i915.enable_execlists) { - ret = logical_ring_flush_all_caches(ringbuf, request->ctx); - if (ret) - return ret; - } else { - ret = intel_ring_flush_all_caches(ring); - if (ret) - return ret; + if (flush_caches) { + if (i915.enable_execlists) + ret = logical_ring_flush_all_caches(request); + else + ret = intel_ring_flush_all_caches(request); + /* Not allowed to fail! */ + WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret); } /* Record the position of the start of the request so that @@ -2516,17 +2524,15 @@ int __i915_add_request(struct intel_engine_cs *ring, */ request->postfix = intel_ring_get_tail(ringbuf); - if (i915.enable_execlists) { - ret = ring->emit_request(ringbuf, request); - if (ret) - return ret; - } else { - ret = ring->add_request(ring); - if (ret) - return ret; + if (i915.enable_execlists) + ret = ring->emit_request(request); + else { + ret = ring->add_request(request); request->tail = intel_ring_get_tail(ringbuf); } + /* Not allowed to fail! */ + WARN(ret, "emit|add_request failed: %d!\n", ret); request->head = request_start; @@ -2538,34 +2544,11 @@ int __i915_add_request(struct intel_engine_cs *ring, */ request->batch_obj = obj; - if (!i915.enable_execlists) { - /* Hold a reference to the current context so that we can inspect - * it later in case a hangcheck error event fires. - */ - request->ctx = ring->last_context; - if (request->ctx) - i915_gem_context_reference(request->ctx); - } - request->emitted_jiffies = jiffies; ring->last_submitted_seqno = request->seqno; list_add_tail(&request->list, &ring->request_list); - request->file_priv = NULL; - - if (file) { - struct drm_i915_file_private *file_priv = file->driver_priv; - - spin_lock(&file_priv->mm.lock); - request->file_priv = file_priv; - list_add_tail(&request->client_list, - &file_priv->mm.request_list); - spin_unlock(&file_priv->mm.lock); - - request->pid = get_pid(task_pid(current)); - } trace_i915_gem_request_add(request); - ring->outstanding_lazy_request = NULL; i915_queue_hangcheck(ring->dev); @@ -2574,7 +2557,8 @@ int __i915_add_request(struct intel_engine_cs *ring, round_jiffies_up_relative(HZ)); intel_mark_busy(dev_priv->dev); - return 0; + /* Sanity check that the reserved size was large enough. */ + intel_ring_reserved_space_end(ringbuf); } static bool i915_context_is_banned(struct drm_i915_private *dev_priv, @@ -2628,12 +2612,13 @@ void i915_gem_request_free(struct kref *req_ref) typeof(*req), ref); struct intel_context *ctx = req->ctx; + if (req->file_priv) + i915_gem_request_remove_from_client(req); + if (ctx) { if (i915.enable_execlists) { - struct intel_engine_cs *ring = req->ring; - - if (ctx != ring->default_context) - intel_lr_context_unpin(ring, ctx); + if (ctx != req->ring->default_context) + intel_lr_context_unpin(req); } i915_gem_context_unreference(ctx); @@ -2643,36 +2628,63 @@ void i915_gem_request_free(struct kref *req_ref) } int i915_gem_request_alloc(struct intel_engine_cs *ring, - struct intel_context *ctx) + struct intel_context *ctx, + struct drm_i915_gem_request **req_out) { struct drm_i915_private *dev_priv = to_i915(ring->dev); struct drm_i915_gem_request *req; int ret; - if (ring->outstanding_lazy_request) - return 0; + if (!req_out) + return -EINVAL; + + *req_out = NULL; req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL); if (req == NULL) return -ENOMEM; - kref_init(&req->ref); - req->i915 = dev_priv; - ret = i915_gem_get_seqno(ring->dev, &req->seqno); if (ret) goto err; + kref_init(&req->ref); + req->i915 = dev_priv; req->ring = ring; + req->ctx = ctx; + i915_gem_context_reference(req->ctx); if (i915.enable_execlists) - ret = intel_logical_ring_alloc_request_extras(req, ctx); + ret = intel_logical_ring_alloc_request_extras(req); else ret = intel_ring_alloc_request_extras(req); - if (ret) + if (ret) { + i915_gem_context_unreference(req->ctx); goto err; + } + + /* + * Reserve space in the ring buffer for all the commands required to + * eventually emit this request. This is to guarantee that the + * i915_add_request() call can't fail. Note that the reserve may need + * to be redone if the request is not actually submitted straight + * away, e.g. because a GPU scheduler has deferred it. + */ + if (i915.enable_execlists) + ret = intel_logical_ring_reserve_space(req); + else + ret = intel_ring_reserve_space(req); + if (ret) { + /* + * At this point, the request is fully allocated even if not + * fully prepared. Thus it can be cleaned up using the proper + * free code. + */ + i915_gem_request_cancel(req); + return ret; + } - ring->outstanding_lazy_request = req; + *req_out = req; return 0; err: @@ -2680,6 +2692,13 @@ err: return ret; } +void i915_gem_request_cancel(struct drm_i915_gem_request *req) +{ + intel_ring_reserved_space_cancel(req->ringbuf); + + i915_gem_request_unreference(req); +} + struct drm_i915_gem_request * i915_gem_find_active_request(struct intel_engine_cs *ring) { @@ -2741,7 +2760,7 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, list_del(&submit_req->execlist_link); if (submit_req->ctx != ring->default_context) - intel_lr_context_unpin(ring, submit_req->ctx); + intel_lr_context_unpin(submit_req); i915_gem_request_unreference(submit_req); } @@ -2762,30 +2781,6 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, i915_gem_request_retire(request); } - - /* This may not have been flushed before the reset, so clean it now */ - i915_gem_request_assign(&ring->outstanding_lazy_request, NULL); -} - -void i915_gem_restore_fences(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - int i; - - for (i = 0; i < dev_priv->num_fence_regs; i++) { - struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; - - /* - * Commit delayed tiling changes if we have an object still - * attached to the fence, otherwise just clear the fence. - */ - if (reg->obj) { - i915_gem_object_update_fence(reg->obj, reg, - reg->obj->tiling_mode); - } else { - i915_gem_write_fence(dev, i, NULL); - } - } } void i915_gem_reset(struct drm_device *dev) @@ -2947,7 +2942,7 @@ i915_gem_idle_work_handler(struct work_struct *work) static int i915_gem_object_flush_active(struct drm_i915_gem_object *obj) { - int ret, i; + int i; if (!obj->active) return 0; @@ -2962,10 +2957,6 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj) if (list_empty(&req->list)) goto retire; - ret = i915_gem_check_olr(req); - if (ret) - return ret; - if (i915_gem_request_completed(req, true)) { __i915_gem_request_retire__upto(req); retire: @@ -3068,25 +3059,22 @@ out: static int __i915_gem_object_sync(struct drm_i915_gem_object *obj, struct intel_engine_cs *to, - struct drm_i915_gem_request *req) + struct drm_i915_gem_request *from_req, + struct drm_i915_gem_request **to_req) { struct intel_engine_cs *from; int ret; - from = i915_gem_request_get_ring(req); + from = i915_gem_request_get_ring(from_req); if (to == from) return 0; - if (i915_gem_request_completed(req, true)) + if (i915_gem_request_completed(from_req, true)) return 0; - ret = i915_gem_check_olr(req); - if (ret) - return ret; - if (!i915_semaphore_is_enabled(obj->base.dev)) { struct drm_i915_private *i915 = to_i915(obj->base.dev); - ret = __i915_wait_request(req, + ret = __i915_wait_request(from_req, atomic_read(&i915->gpu_error.reset_counter), i915->mm.interruptible, NULL, @@ -3094,16 +3082,24 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, if (ret) return ret; - i915_gem_object_retire_request(obj, req); + i915_gem_object_retire_request(obj, from_req); } else { int idx = intel_ring_sync_index(from, to); - u32 seqno = i915_gem_request_get_seqno(req); + u32 seqno = i915_gem_request_get_seqno(from_req); + + WARN_ON(!to_req); if (seqno <= from->semaphore.sync_seqno[idx]) return 0; - trace_i915_gem_ring_sync_to(from, to, req); - ret = to->semaphore.sync_to(to, from, seqno); + if (*to_req == NULL) { + ret = i915_gem_request_alloc(to, to->default_context, to_req); + if (ret) + return ret; + } + + trace_i915_gem_ring_sync_to(*to_req, from, from_req); + ret = to->semaphore.sync_to(*to_req, from, seqno); if (ret) return ret; @@ -3123,11 +3119,14 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, * * @obj: object which may be in use on another ring. * @to: ring we wish to use the object on. May be NULL. + * @to_req: request we wish to use the object for. See below. + * This will be allocated and returned if a request is + * required but not passed in. * * This code is meant to abstract object synchronization with the GPU. * Calling with NULL implies synchronizing the object with the CPU * rather than a particular GPU ring. Conceptually we serialise writes - * between engines inside the GPU. We only allow on engine to write + * between engines inside the GPU. We only allow one engine to write * into a buffer at any time, but multiple readers. To ensure each has * a coherent view of memory, we must: * @@ -3138,11 +3137,22 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, * - If we are a write request (pending_write_domain is set), the new * request must wait for outstanding read requests to complete. * + * For CPU synchronisation (NULL to) no request is required. For syncing with + * rings to_req must be non-NULL. However, a request does not have to be + * pre-allocated. If *to_req is NULL and sync commands will be emitted then a + * request will be allocated automatically and returned through *to_req. Note + * that it is not guaranteed that commands will be emitted (because the system + * might already be idle). Hence there is no need to create a request that + * might never have any work submitted. Note further that if a request is + * returned in *to_req, it is the responsibility of the caller to submit + * that request (after potentially adding more work to it). + * * Returns 0 if successful, else propagates up the lower layer error. */ int i915_gem_object_sync(struct drm_i915_gem_object *obj, - struct intel_engine_cs *to) + struct intel_engine_cs *to, + struct drm_i915_gem_request **to_req) { const bool readonly = obj->base.pending_write_domain == 0; struct drm_i915_gem_request *req[I915_NUM_RINGS]; @@ -3164,7 +3174,7 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, req[n++] = obj->last_read_req[i]; } for (i = 0; i < n; i++) { - ret = __i915_gem_object_sync(obj, to, req[i]); + ret = __i915_gem_object_sync(obj, to, req[i], to_req); if (ret) return ret; } @@ -3275,354 +3285,27 @@ int i915_gpu_idle(struct drm_device *dev) /* Flush everything onto the inactive list. */ for_each_ring(ring, dev_priv, i) { if (!i915.enable_execlists) { - ret = i915_switch_context(ring, ring->default_context); + struct drm_i915_gem_request *req; + + ret = i915_gem_request_alloc(ring, ring->default_context, &req); if (ret) return ret; - } - ret = intel_ring_idle(ring); - if (ret) - return ret; - } - - WARN_ON(i915_verify_lists(dev)); - return 0; -} - -static void i965_write_fence_reg(struct drm_device *dev, int reg, - struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - int fence_reg; - int fence_pitch_shift; - - if (INTEL_INFO(dev)->gen >= 6) { - fence_reg = FENCE_REG_SANDYBRIDGE_0; - fence_pitch_shift = SANDYBRIDGE_FENCE_PITCH_SHIFT; - } else { - fence_reg = FENCE_REG_965_0; - fence_pitch_shift = I965_FENCE_PITCH_SHIFT; - } - - fence_reg += reg * 8; + ret = i915_switch_context(req); + if (ret) { + i915_gem_request_cancel(req); + return ret; + } - /* To w/a incoherency with non-atomic 64-bit register updates, - * we split the 64-bit update into two 32-bit writes. In order - * for a partial fence not to be evaluated between writes, we - * precede the update with write to turn off the fence register, - * and only enable the fence as the last step. - * - * For extra levels of paranoia, we make sure each step lands - * before applying the next step. - */ - I915_WRITE(fence_reg, 0); - POSTING_READ(fence_reg); - - if (obj) { - u32 size = i915_gem_obj_ggtt_size(obj); - uint64_t val; - - /* Adjust fence size to match tiled area */ - if (obj->tiling_mode != I915_TILING_NONE) { - uint32_t row_size = obj->stride * - (obj->tiling_mode == I915_TILING_Y ? 32 : 8); - size = (size / row_size) * row_size; + i915_add_request_no_flush(req); } - val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) & - 0xfffff000) << 32; - val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000; - val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift; - if (obj->tiling_mode == I915_TILING_Y) - val |= 1 << I965_FENCE_TILING_Y_SHIFT; - val |= I965_FENCE_REG_VALID; - - I915_WRITE(fence_reg + 4, val >> 32); - POSTING_READ(fence_reg + 4); - - I915_WRITE(fence_reg + 0, val); - POSTING_READ(fence_reg); - } else { - I915_WRITE(fence_reg + 4, 0); - POSTING_READ(fence_reg + 4); - } -} - -static void i915_write_fence_reg(struct drm_device *dev, int reg, - struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - u32 val; - - if (obj) { - u32 size = i915_gem_obj_ggtt_size(obj); - int pitch_val; - int tile_width; - - WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) || - (size & -size) != size || - (i915_gem_obj_ggtt_offset(obj) & (size - 1)), - "object 0x%08lx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", - i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size); - - if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) - tile_width = 128; - else - tile_width = 512; - - /* Note: pitch better be a power of two tile widths */ - pitch_val = obj->stride / tile_width; - pitch_val = ffs(pitch_val) - 1; - - val = i915_gem_obj_ggtt_offset(obj); - if (obj->tiling_mode == I915_TILING_Y) - val |= 1 << I830_FENCE_TILING_Y_SHIFT; - val |= I915_FENCE_SIZE_BITS(size); - val |= pitch_val << I830_FENCE_PITCH_SHIFT; - val |= I830_FENCE_REG_VALID; - } else - val = 0; - - if (reg < 8) - reg = FENCE_REG_830_0 + reg * 4; - else - reg = FENCE_REG_945_8 + (reg - 8) * 4; - - I915_WRITE(reg, val); - POSTING_READ(reg); -} - -static void i830_write_fence_reg(struct drm_device *dev, int reg, - struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - uint32_t val; - - if (obj) { - u32 size = i915_gem_obj_ggtt_size(obj); - uint32_t pitch_val; - - WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) || - (size & -size) != size || - (i915_gem_obj_ggtt_offset(obj) & (size - 1)), - "object 0x%08lx not 512K or pot-size 0x%08x aligned\n", - i915_gem_obj_ggtt_offset(obj), size); - - pitch_val = obj->stride / 128; - pitch_val = ffs(pitch_val) - 1; - - val = i915_gem_obj_ggtt_offset(obj); - if (obj->tiling_mode == I915_TILING_Y) - val |= 1 << I830_FENCE_TILING_Y_SHIFT; - val |= I830_FENCE_SIZE_BITS(size); - val |= pitch_val << I830_FENCE_PITCH_SHIFT; - val |= I830_FENCE_REG_VALID; - } else - val = 0; - - I915_WRITE(FENCE_REG_830_0 + reg * 4, val); - POSTING_READ(FENCE_REG_830_0 + reg * 4); -} - -inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj) -{ - return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT; -} - -static void i915_gem_write_fence(struct drm_device *dev, int reg, - struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - - /* Ensure that all CPU reads are completed before installing a fence - * and all writes before removing the fence. - */ - if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj)) - mb(); - - WARN(obj && (!obj->stride || !obj->tiling_mode), - "bogus fence setup with stride: 0x%x, tiling mode: %i\n", - obj->stride, obj->tiling_mode); - - if (IS_GEN2(dev)) - i830_write_fence_reg(dev, reg, obj); - else if (IS_GEN3(dev)) - i915_write_fence_reg(dev, reg, obj); - else if (INTEL_INFO(dev)->gen >= 4) - i965_write_fence_reg(dev, reg, obj); - - /* And similarly be paranoid that no direct access to this region - * is reordered to before the fence is installed. - */ - if (i915_gem_object_needs_mb(obj)) - mb(); -} - -static inline int fence_number(struct drm_i915_private *dev_priv, - struct drm_i915_fence_reg *fence) -{ - return fence - dev_priv->fence_regs; -} - -static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, - struct drm_i915_fence_reg *fence, - bool enable) -{ - struct drm_i915_private *dev_priv = obj->base.dev->dev_private; - int reg = fence_number(dev_priv, fence); - - i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL); - - if (enable) { - obj->fence_reg = reg; - fence->obj = obj; - list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list); - } else { - obj->fence_reg = I915_FENCE_REG_NONE; - fence->obj = NULL; - list_del_init(&fence->lru_list); - } - obj->fence_dirty = false; -} - -static int -i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) -{ - if (obj->last_fenced_req) { - int ret = i915_wait_request(obj->last_fenced_req); - if (ret) - return ret; - - i915_gem_request_assign(&obj->last_fenced_req, NULL); - } - - return 0; -} - -int -i915_gem_object_put_fence(struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *dev_priv = obj->base.dev->dev_private; - struct drm_i915_fence_reg *fence; - int ret; - - ret = i915_gem_object_wait_fence(obj); - if (ret) - return ret; - - if (obj->fence_reg == I915_FENCE_REG_NONE) - return 0; - - fence = &dev_priv->fence_regs[obj->fence_reg]; - - if (WARN_ON(fence->pin_count)) - return -EBUSY; - - i915_gem_object_fence_lost(obj); - i915_gem_object_update_fence(obj, fence, false); - - return 0; -} - -static struct drm_i915_fence_reg * -i915_find_fence_reg(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - struct drm_i915_fence_reg *reg, *avail; - int i; - - /* First try to find a free reg */ - avail = NULL; - for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { - reg = &dev_priv->fence_regs[i]; - if (!reg->obj) - return reg; - - if (!reg->pin_count) - avail = reg; - } - - if (avail == NULL) - goto deadlock; - - /* None available, try to steal one or wait for a user to finish */ - list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { - if (reg->pin_count) - continue; - - return reg; - } - -deadlock: - /* Wait for completion of pending flips which consume fences */ - if (intel_has_pending_fb_unpin(dev)) - return ERR_PTR(-EAGAIN); - - return ERR_PTR(-EDEADLK); -} - -/** - * i915_gem_object_get_fence - set up fencing for an object - * @obj: object to map through a fence reg - * - * When mapping objects through the GTT, userspace wants to be able to write - * to them without having to worry about swizzling if the object is tiled. - * This function walks the fence regs looking for a free one for @obj, - * stealing one if it can't find any. - * - * It then sets up the reg based on the object's properties: address, pitch - * and tiling format. - * - * For an untiled surface, this removes any existing fence. - */ -int -i915_gem_object_get_fence(struct drm_i915_gem_object *obj) -{ - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = dev->dev_private; - bool enable = obj->tiling_mode != I915_TILING_NONE; - struct drm_i915_fence_reg *reg; - int ret; - - /* Have we updated the tiling parameters upon the object and so - * will need to serialise the write to the associated fence register? - */ - if (obj->fence_dirty) { - ret = i915_gem_object_wait_fence(obj); + ret = intel_ring_idle(ring); if (ret) return ret; } - /* Just update our place in the LRU if our fence is getting reused. */ - if (obj->fence_reg != I915_FENCE_REG_NONE) { - reg = &dev_priv->fence_regs[obj->fence_reg]; - if (!obj->fence_dirty) { - list_move_tail(®->lru_list, - &dev_priv->mm.fence_list); - return 0; - } - } else if (enable) { - if (WARN_ON(!obj->map_and_fenceable)) - return -EINVAL; - - reg = i915_find_fence_reg(dev); - if (IS_ERR(reg)) - return PTR_ERR(reg); - - if (reg->obj) { - struct drm_i915_gem_object *old = reg->obj; - - ret = i915_gem_object_wait_fence(old); - if (ret) - return ret; - - i915_gem_object_fence_lost(old); - } - } else - return 0; - - i915_gem_object_update_fence(obj, reg, enable); - + WARN_ON(i915_verify_lists(dev)); return 0; } @@ -3673,9 +3356,9 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; u32 size, fence_size, fence_alignment, unfenced_alignment; - unsigned long start = + u64 start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; - unsigned long end = + u64 end = flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total; struct i915_vma *vma; int ret; @@ -3731,7 +3414,7 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, * attempt to find space. */ if (size > end) { - DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%u > %s aperture=%lu\n", + DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%u > %s aperture=%llu\n", ggtt_view ? ggtt_view->type : 0, size, flags & PIN_MAPPABLE ? "mappable" : "total", @@ -3853,7 +3536,7 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) old_write_domain = obj->base.write_domain; obj->base.write_domain = 0; - intel_fb_obj_flush(obj, false); + intel_fb_obj_flush(obj, false, ORIGIN_GTT); trace_i915_gem_object_change_domain(obj, obj->base.read_domains, @@ -3875,7 +3558,7 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) old_write_domain = obj->base.write_domain; obj->base.write_domain = 0; - intel_fb_obj_flush(obj, false); + intel_fb_obj_flush(obj, false, ORIGIN_CPU); trace_i915_gem_object_change_domain(obj, obj->base.read_domains, @@ -3937,9 +3620,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) obj->dirty = 1; } - if (write) - intel_fb_obj_invalidate(obj, NULL, ORIGIN_GTT); - trace_i915_gem_object_change_domain(obj, old_read_domains, old_write_domain); @@ -4094,12 +3774,13 @@ int i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, u32 alignment, struct intel_engine_cs *pipelined, + struct drm_i915_gem_request **pipelined_request, const struct i915_ggtt_view *view) { u32 old_read_domains, old_write_domain; int ret; - ret = i915_gem_object_sync(obj, pipelined); + ret = i915_gem_object_sync(obj, pipelined, pipelined_request); if (ret) return ret; @@ -4210,9 +3891,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) obj->base.write_domain = I915_GEM_DOMAIN_CPU; } - if (write) - intel_fb_obj_invalidate(obj, NULL, ORIGIN_CPU); - trace_i915_gem_object_change_domain(obj, old_read_domains, old_write_domain); @@ -4253,6 +3931,13 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) if (time_after_eq(request->emitted_jiffies, recent_enough)) break; + /* + * Note that the request might not have been submitted yet. + * In which case emitted_jiffies will be zero. + */ + if (!request->emitted_jiffies) + continue; + target = request; } reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); @@ -4423,32 +4108,6 @@ i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, --vma->pin_count; } -bool -i915_gem_object_pin_fence(struct drm_i915_gem_object *obj) -{ - if (obj->fence_reg != I915_FENCE_REG_NONE) { - struct drm_i915_private *dev_priv = obj->base.dev->dev_private; - struct i915_vma *ggtt_vma = i915_gem_obj_to_ggtt(obj); - - WARN_ON(!ggtt_vma || - dev_priv->fence_regs[obj->fence_reg].pin_count > - ggtt_vma->pin_count); - dev_priv->fence_regs[obj->fence_reg].pin_count++; - return true; - } else - return false; -} - -void -i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj) -{ - if (obj->fence_reg != I915_FENCE_REG_NONE) { - struct drm_i915_private *dev_priv = obj->base.dev->dev_private; - WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count <= 0); - dev_priv->fence_regs[obj->fence_reg].pin_count--; - } -} - int i915_gem_busy_ioctl(struct drm_device *dev, void *data, struct drm_file *file) @@ -4810,8 +4469,9 @@ err: return ret; } -int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice) +int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice) { + struct intel_engine_cs *ring = req->ring; struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200); @@ -4821,7 +4481,7 @@ int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice) if (!HAS_L3_DPF(dev) || !remap_info) return 0; - ret = intel_ring_begin(ring, GEN7_L3LOG_SIZE / 4 * 3); + ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3); if (ret) return ret; @@ -4967,7 +4627,7 @@ i915_gem_init_hw(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *ring; - int ret, i; + int ret, i, j; if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) return -EIO; @@ -5004,27 +4664,55 @@ i915_gem_init_hw(struct drm_device *dev) */ init_unused_rings(dev); + BUG_ON(!dev_priv->ring[RCS].default_context); + + ret = i915_ppgtt_init_hw(dev); + if (ret) { + DRM_ERROR("PPGTT enable HW failed %d\n", ret); + goto out; + } + + /* Need to do basic initialisation of all rings first: */ for_each_ring(ring, dev_priv, i) { ret = ring->init_hw(ring); if (ret) goto out; } - for (i = 0; i < NUM_L3_SLICES(dev); i++) - i915_gem_l3_remap(&dev_priv->ring[RCS], i); + /* Now it is safe to go back round and do everything else: */ + for_each_ring(ring, dev_priv, i) { + struct drm_i915_gem_request *req; - ret = i915_ppgtt_init_hw(dev); - if (ret && ret != -EIO) { - DRM_ERROR("PPGTT enable failed %d\n", ret); - i915_gem_cleanup_ringbuffer(dev); - } + WARN_ON(!ring->default_context); + + ret = i915_gem_request_alloc(ring, ring->default_context, &req); + if (ret) { + i915_gem_cleanup_ringbuffer(dev); + goto out; + } - ret = i915_gem_context_enable(dev_priv); - if (ret && ret != -EIO) { - DRM_ERROR("Context enable failed %d\n", ret); - i915_gem_cleanup_ringbuffer(dev); + if (ring->id == RCS) { + for (j = 0; j < NUM_L3_SLICES(dev); j++) + i915_gem_l3_remap(req, j); + } - goto out; + ret = i915_ppgtt_init_ring(req); + if (ret && ret != -EIO) { + DRM_ERROR("PPGTT enable ring #%d failed %d\n", i, ret); + i915_gem_request_cancel(req); + i915_gem_cleanup_ringbuffer(dev); + goto out; + } + + ret = i915_gem_context_enable(req); + if (ret && ret != -EIO) { + DRM_ERROR("Context enable ring #%d failed %d\n", i, ret); + i915_gem_request_cancel(req); + i915_gem_cleanup_ringbuffer(dev); + goto out; + } + + i915_add_request_no_flush(req); } out: @@ -5111,6 +4799,14 @@ i915_gem_cleanup_ringbuffer(struct drm_device *dev) for_each_ring(ring, dev_priv, i) dev_priv->gt.cleanup_ring(ring); + + if (i915.enable_execlists) + /* + * Neither the BIOS, ourselves or any other kernel + * expects the system to be in execlists mode on startup, + * so we need to reset the GPU back to legacy mode. + */ + intel_gpu_reset(dev); } static void @@ -5388,3 +5084,42 @@ bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) return false; } +/* Allocate a new GEM object and fill it with the supplied data */ +struct drm_i915_gem_object * +i915_gem_object_create_from_data(struct drm_device *dev, + const void *data, size_t size) +{ + struct drm_i915_gem_object *obj; + struct sg_table *sg; + size_t bytes; + int ret; + + obj = i915_gem_alloc_object(dev, round_up(size, PAGE_SIZE)); + if (IS_ERR_OR_NULL(obj)) + return obj; + + ret = i915_gem_object_set_to_cpu_domain(obj, true); + if (ret) + goto fail; + + ret = i915_gem_object_get_pages(obj); + if (ret) + goto fail; + + i915_gem_object_pin_pages(obj); + sg = obj->pages; + bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size); + i915_gem_object_unpin_pages(obj); + + if (WARN_ON(bytes != size)) { + DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size); + ret = -EFAULT; + goto fail; + } + + return obj; + +fail: + drm_gem_object_unreference(&obj->base); + return ERR_PTR(ret); +} |