summaryrefslogtreecommitdiff
path: root/drivers/accel/habanalabs/common/command_submission.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/accel/habanalabs/common/command_submission.c')
-rw-r--r--drivers/accel/habanalabs/common/command_submission.c184
1 files changed, 131 insertions, 53 deletions
diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c
index af9d2e22c6e7..104c3ce60655 100644
--- a/drivers/accel/habanalabs/common/command_submission.c
+++ b/drivers/accel/habanalabs/common/command_submission.c
@@ -280,14 +280,8 @@ bool cs_needs_timeout(struct hl_cs *cs)
static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job)
{
- /*
- * Patched CB is created for external queues jobs, and for H/W queues
- * jobs if the user CB was allocated by driver and MMU is disabled.
- */
- return (job->queue_type == QUEUE_TYPE_EXT ||
- (job->queue_type == QUEUE_TYPE_HW &&
- job->is_kernel_allocated_cb &&
- !hdev->mmu_enable));
+ /* Patched CB is created for external queues jobs */
+ return (job->queue_type == QUEUE_TYPE_EXT);
}
/*
@@ -363,14 +357,13 @@ static void hl_complete_job(struct hl_device *hdev, struct hl_cs_job *job)
}
}
- /* For H/W queue jobs, if a user CB was allocated by driver and MMU is
- * enabled, the user CB isn't released in cs_parser() and thus should be
+ /* For H/W queue jobs, if a user CB was allocated by driver,
+ * the user CB isn't released in cs_parser() and thus should be
* released here. This is also true for INT queues jobs which were
* allocated by driver.
*/
- if ((job->is_kernel_allocated_cb &&
- ((job->queue_type == QUEUE_TYPE_HW && hdev->mmu_enable) ||
- job->queue_type == QUEUE_TYPE_INT))) {
+ if (job->is_kernel_allocated_cb &&
+ (job->queue_type == QUEUE_TYPE_HW || job->queue_type == QUEUE_TYPE_INT)) {
atomic_dec(&job->user_cb->cs_cnt);
hl_cb_put(job->user_cb);
}
@@ -804,12 +797,14 @@ out:
static void cs_timedout(struct work_struct *work)
{
+ struct hl_cs *cs = container_of(work, struct hl_cs, work_tdr.work);
+ bool skip_reset_on_timeout, device_reset = false;
struct hl_device *hdev;
u64 event_mask = 0x0;
+ uint timeout_sec;
int rc;
- struct hl_cs *cs = container_of(work, struct hl_cs,
- work_tdr.work);
- bool skip_reset_on_timeout = cs->skip_reset_on_timeout, device_reset = false;
+
+ skip_reset_on_timeout = cs->skip_reset_on_timeout;
rc = cs_get_unless_zero(cs);
if (!rc)
@@ -840,29 +835,31 @@ static void cs_timedout(struct work_struct *work)
event_mask |= HL_NOTIFIER_EVENT_CS_TIMEOUT;
}
+ timeout_sec = jiffies_to_msecs(hdev->timeout_jiffies) / 1000;
+
switch (cs->type) {
case CS_TYPE_SIGNAL:
dev_err(hdev->dev,
- "Signal command submission %llu has not finished in time!\n",
- cs->sequence);
+ "Signal command submission %llu has not finished in %u seconds!\n",
+ cs->sequence, timeout_sec);
break;
case CS_TYPE_WAIT:
dev_err(hdev->dev,
- "Wait command submission %llu has not finished in time!\n",
- cs->sequence);
+ "Wait command submission %llu has not finished in %u seconds!\n",
+ cs->sequence, timeout_sec);
break;
case CS_TYPE_COLLECTIVE_WAIT:
dev_err(hdev->dev,
- "Collective Wait command submission %llu has not finished in time!\n",
- cs->sequence);
+ "Collective Wait command submission %llu has not finished in %u seconds!\n",
+ cs->sequence, timeout_sec);
break;
default:
dev_err(hdev->dev,
- "Command submission %llu has not finished in time!\n",
- cs->sequence);
+ "Command submission %llu has not finished in %u seconds!\n",
+ cs->sequence, timeout_sec);
break;
}
@@ -1139,11 +1136,10 @@ static void force_complete_cs(struct hl_device *hdev)
spin_unlock(&hdev->cs_mirror_lock);
}
-void hl_abort_waitings_for_completion(struct hl_device *hdev)
+void hl_abort_waiting_for_cs_completions(struct hl_device *hdev)
{
force_complete_cs(hdev);
force_complete_multi_cs(hdev);
- hl_release_pending_user_interrupts(hdev);
}
static void job_wq_completion(struct work_struct *work)
@@ -1948,8 +1944,7 @@ static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
else
cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);
- cb = hl_cb_kernel_create(hdev, cb_size,
- q_type == QUEUE_TYPE_HW && hdev->mmu_enable);
+ cb = hl_cb_kernel_create(hdev, cb_size, q_type == QUEUE_TYPE_HW);
if (!cb) {
atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
atomic64_inc(&cntr->out_of_mem_drop_cnt);
@@ -2152,7 +2147,7 @@ static int cs_ioctl_unreserve_signals(struct hl_fpriv *hpriv, u32 handle_id)
hdev->asic_funcs->hw_queues_unlock(hdev);
rc = -EINVAL;
- goto out;
+ goto out_unlock;
}
/*
@@ -2167,15 +2162,21 @@ static int cs_ioctl_unreserve_signals(struct hl_fpriv *hpriv, u32 handle_id)
/* Release the id and free allocated memory of the handle */
idr_remove(&mgr->handles, handle_id);
+
+ /* unlock before calling ctx_put, where we might sleep */
+ spin_unlock(&mgr->lock);
hl_ctx_put(encaps_sig_hdl->ctx);
kfree(encaps_sig_hdl);
+ goto out;
} else {
rc = -EINVAL;
dev_err(hdev->dev, "failed to unreserve signals, cannot find handler\n");
}
-out:
+
+out_unlock:
spin_unlock(&mgr->lock);
+out:
return rc;
}
@@ -3196,34 +3197,57 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
return 0;
}
+static inline void set_record_cq_info(struct hl_user_pending_interrupt *record,
+ struct hl_cb *cq_cb, u32 cq_offset, u32 target_value)
+{
+ record->ts_reg_info.cq_cb = cq_cb;
+ record->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_offset;
+ record->cq_target_value = target_value;
+}
+
+static int validate_and_get_ts_record(struct device *dev,
+ struct hl_ts_buff *ts_buff, u64 ts_offset,
+ struct hl_user_pending_interrupt **req_event_record)
+{
+ struct hl_user_pending_interrupt *ts_cb_last;
+
+ *req_event_record = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
+ ts_offset;
+ ts_cb_last = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
+ (ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt));
+
+ /* Validate ts_offset not exceeding last max */
+ if (*req_event_record >= ts_cb_last) {
+ dev_err(dev, "Ts offset(%llx) exceeds max CB offset(0x%llx)\n",
+ ts_offset, (u64)(uintptr_t)ts_cb_last);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int ts_buff_get_kernel_ts_record(struct hl_mmap_mem_buf *buf,
struct hl_cb *cq_cb,
u64 ts_offset, u64 cq_offset, u64 target_value,
spinlock_t *wait_list_lock,
struct hl_user_pending_interrupt **pend)
{
+ struct hl_user_pending_interrupt *requested_offset_record;
struct hl_ts_buff *ts_buff = buf->private;
- struct hl_user_pending_interrupt *requested_offset_record =
- (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
- ts_offset;
- struct hl_user_pending_interrupt *cb_last =
- (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
- (ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt));
- unsigned long iter_counter = 0;
+ unsigned long iter_counter = 0, flags;
u64 current_cq_counter;
ktime_t timestamp;
+ int rc;
- /* Validate ts_offset not exceeding last max */
- if (requested_offset_record >= cb_last) {
- dev_err(buf->mmg->dev, "Ts offset exceeds max CB offset(0x%llx)\n",
- (u64)(uintptr_t)cb_last);
- return -EINVAL;
- }
+ rc = validate_and_get_ts_record(buf->mmg->dev, ts_buff, ts_offset,
+ &requested_offset_record);
+ if (rc)
+ return rc;
timestamp = ktime_get();
start_over:
- spin_lock(wait_list_lock);
+ spin_lock_irqsave(wait_list_lock, flags);
/* Unregister only if we didn't reach the target value
* since in this case there will be no handling in irq context
@@ -3234,7 +3258,9 @@ start_over:
current_cq_counter = *requested_offset_record->cq_kernel_addr;
if (current_cq_counter < requested_offset_record->cq_target_value) {
list_del(&requested_offset_record->wait_list_node);
- spin_unlock(wait_list_lock);
+ spin_unlock_irqrestore(wait_list_lock, flags);
+
+ set_record_cq_info(requested_offset_record, cq_cb, cq_offset, target_value);
hl_mmap_mem_buf_put(requested_offset_record->ts_reg_info.buf);
hl_cb_put(requested_offset_record->ts_reg_info.cq_cb);
@@ -3245,8 +3271,8 @@ start_over:
dev_dbg(buf->mmg->dev,
"ts node in middle of irq handling\n");
- /* irq thread handling in the middle give it time to finish */
- spin_unlock(wait_list_lock);
+ /* irq handling in the middle give it time to finish */
+ spin_unlock_irqrestore(wait_list_lock, flags);
usleep_range(100, 1000);
if (++iter_counter == MAX_TS_ITER_NUM) {
dev_err(buf->mmg->dev,
@@ -3260,14 +3286,11 @@ start_over:
} else {
/* Fill up the new registration node info */
requested_offset_record->ts_reg_info.buf = buf;
- requested_offset_record->ts_reg_info.cq_cb = cq_cb;
requested_offset_record->ts_reg_info.timestamp_kernel_addr =
(u64 *) ts_buff->user_buff_address + ts_offset;
- requested_offset_record->cq_kernel_addr =
- (u64 *) cq_cb->kernel_address + cq_offset;
- requested_offset_record->cq_target_value = target_value;
+ set_record_cq_info(requested_offset_record, cq_cb, cq_offset, target_value);
- spin_unlock(wait_list_lock);
+ spin_unlock_irqrestore(wait_list_lock, flags);
}
*pend = requested_offset_record;
@@ -3277,6 +3300,58 @@ start_over:
return 0;
}
+static int unregister_timestamp_node_ioctl(struct hl_device *hdev, struct hl_mem_mgr *mmg,
+ u64 ts_handle, u64 ts_offset, struct hl_user_interrupt *interrupt)
+{
+ struct hl_user_pending_interrupt *req_event_record, *pend, *temp_pend;
+ struct hl_mmap_mem_buf *buff;
+ struct hl_ts_buff *ts_buff;
+ bool ts_rec_found = false;
+ int rc;
+
+ buff = hl_mmap_mem_buf_get(mmg, ts_handle);
+ if (!buff) {
+ dev_err(hdev->dev, "invalid TS buff handle!\n");
+ return -EINVAL;
+ }
+
+ ts_buff = buff->private;
+
+ rc = validate_and_get_ts_record(hdev->dev, ts_buff, ts_offset, &req_event_record);
+ if (rc)
+ goto out;
+
+ /*
+ * Note: we don't use the ts in_use field here, but we rather scan the list
+ * because we cannot rely on the user to keep the order of register/unregister calls
+ * and since we might have races here all the time between the irq and register/unregister
+ * calls so it safer to lock the list and scan it to find the node.
+ * If the node found on the list we mark it as not in use and delete it from the list,
+ * if it's not here then the node was handled already in the irq before we get into
+ * this ioctl.
+ */
+ spin_lock(&interrupt->wait_list_lock);
+ list_for_each_entry_safe(pend, temp_pend, &interrupt->wait_list_head, wait_list_node) {
+ if (pend == req_event_record) {
+ pend->ts_reg_info.in_use = 0;
+ list_del(&pend->wait_list_node);
+ ts_rec_found = true;
+ break;
+ }
+ }
+ spin_unlock(&interrupt->wait_list_lock);
+
+ /* Put refcounts that were taken when we registered the event */
+ if (ts_rec_found) {
+ hl_mmap_mem_buf_put(pend->ts_reg_info.buf);
+ hl_cb_put(pend->ts_reg_info.cq_cb);
+ }
+out:
+ hl_mmap_mem_buf_put(buff);
+
+ return rc;
+}
+
static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
struct hl_mem_mgr *cb_mmg, struct hl_mem_mgr *mmg,
u64 timeout_us, u64 cq_counters_handle, u64 cq_counters_offset,
@@ -3610,7 +3685,10 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
return -EINVAL;
}
- if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ)
+ if (args->in.flags & HL_WAIT_CS_FLAGS_UNREGISTER_INTERRUPT)
+ rc = unregister_timestamp_node_ioctl(hdev, &hpriv->mem_mgr,
+ args->in.timestamp_handle, args->in.timestamp_offset, interrupt);
+ else if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ)
rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->mem_mgr, &hpriv->mem_mgr,
args->in.interrupt_timeout_us, args->in.cq_counters_handle,
args->in.cq_counters_offset,