summaryrefslogtreecommitdiff
path: root/drivers/scsi/qedi
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/scsi/qedi')
-rw-r--r--drivers/scsi/qedi/qedi.h6
-rw-r--r--drivers/scsi/qedi/qedi_fw.c30
-rw-r--r--drivers/scsi/qedi/qedi_iscsi.c7
-rw-r--r--drivers/scsi/qedi/qedi_main.c131
4 files changed, 162 insertions, 12 deletions
diff --git a/drivers/scsi/qedi/qedi.h b/drivers/scsi/qedi/qedi.h
index 9498279ae80d..c342defc3f52 100644
--- a/drivers/scsi/qedi/qedi.h
+++ b/drivers/scsi/qedi/qedi.h
@@ -274,6 +274,10 @@ struct qedi_ctx {
spinlock_t ll2_lock; /* Light L2 lock */
spinlock_t hba_lock; /* per port lock */
struct task_struct *ll2_recv_thread;
+ unsigned long qedi_err_flags;
+#define QEDI_ERR_ATTN_CLR_EN 0
+#define QEDI_ERR_IS_RECOVERABLE 2
+#define QEDI_ERR_OVERRIDE_EN 31
unsigned long flags;
#define UIO_DEV_OPENED 1
#define QEDI_IOTHREAD_WAKE 2
@@ -305,6 +309,7 @@ struct qedi_ctx {
u32 max_sqes;
u8 num_queues;
u32 max_active_conns;
+ s32 msix_count;
struct iscsi_cid_queue cid_que;
struct qedi_endpoint **ep_tbl;
@@ -334,6 +339,7 @@ struct qedi_ctx {
struct workqueue_struct *dpc_wq;
struct delayed_work recovery_work;
+ struct delayed_work board_disable_work;
spinlock_t task_idx_lock; /* To protect gbl context */
s32 last_tidx_alloc;
diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c
index 6ed74583b1b9..440ddd2309f1 100644
--- a/drivers/scsi/qedi/qedi_fw.c
+++ b/drivers/scsi/qedi/qedi_fw.c
@@ -59,6 +59,7 @@ static void qedi_process_logout_resp(struct qedi_ctx *qedi,
"Freeing tid=0x%x for cid=0x%x\n",
cmd->task_id, qedi_conn->iscsi_conn_id);
+ spin_lock(&qedi_conn->list_lock);
if (likely(cmd->io_cmd_in_list)) {
cmd->io_cmd_in_list = false;
list_del_init(&cmd->io_cmd);
@@ -69,6 +70,7 @@ static void qedi_process_logout_resp(struct qedi_ctx *qedi,
cmd->task_id, qedi_conn->iscsi_conn_id,
&cmd->io_cmd);
}
+ spin_unlock(&qedi_conn->list_lock);
cmd->state = RESPONSE_RECEIVED;
qedi_clear_task_idx(qedi, cmd->task_id);
@@ -122,6 +124,7 @@ static void qedi_process_text_resp(struct qedi_ctx *qedi,
"Freeing tid=0x%x for cid=0x%x\n",
cmd->task_id, qedi_conn->iscsi_conn_id);
+ spin_lock(&qedi_conn->list_lock);
if (likely(cmd->io_cmd_in_list)) {
cmd->io_cmd_in_list = false;
list_del_init(&cmd->io_cmd);
@@ -132,6 +135,7 @@ static void qedi_process_text_resp(struct qedi_ctx *qedi,
cmd->task_id, qedi_conn->iscsi_conn_id,
&cmd->io_cmd);
}
+ spin_unlock(&qedi_conn->list_lock);
cmd->state = RESPONSE_RECEIVED;
qedi_clear_task_idx(qedi, cmd->task_id);
@@ -222,11 +226,13 @@ static void qedi_process_tmf_resp(struct qedi_ctx *qedi,
tmf_hdr = (struct iscsi_tm *)qedi_cmd->task->hdr;
+ spin_lock(&qedi_conn->list_lock);
if (likely(qedi_cmd->io_cmd_in_list)) {
qedi_cmd->io_cmd_in_list = false;
list_del_init(&qedi_cmd->io_cmd);
qedi_conn->active_cmd_count--;
}
+ spin_unlock(&qedi_conn->list_lock);
if (((tmf_hdr->flags & ISCSI_FLAG_TM_FUNC_MASK) ==
ISCSI_TM_FUNC_LOGICAL_UNIT_RESET) ||
@@ -288,11 +294,13 @@ static void qedi_process_login_resp(struct qedi_ctx *qedi,
ISCSI_LOGIN_RESPONSE_HDR_DATA_SEG_LEN_MASK;
qedi_conn->gen_pdu.resp_wr_ptr = qedi_conn->gen_pdu.resp_buf + pld_len;
+ spin_lock(&qedi_conn->list_lock);
if (likely(cmd->io_cmd_in_list)) {
cmd->io_cmd_in_list = false;
list_del_init(&cmd->io_cmd);
qedi_conn->active_cmd_count--;
}
+ spin_unlock(&qedi_conn->list_lock);
memset(task_ctx, '\0', sizeof(*task_ctx));
@@ -816,8 +824,11 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi,
qedi_clear_task_idx(qedi_conn->qedi, rtid);
spin_lock(&qedi_conn->list_lock);
- list_del_init(&dbg_cmd->io_cmd);
- qedi_conn->active_cmd_count--;
+ if (likely(dbg_cmd->io_cmd_in_list)) {
+ dbg_cmd->io_cmd_in_list = false;
+ list_del_init(&dbg_cmd->io_cmd);
+ qedi_conn->active_cmd_count--;
+ }
spin_unlock(&qedi_conn->list_lock);
qedi_cmd->state = CLEANUP_RECV;
wake_up_interruptible(&qedi_conn->wait_queue);
@@ -1235,6 +1246,7 @@ int qedi_cleanup_all_io(struct qedi_ctx *qedi, struct qedi_conn *qedi_conn,
qedi_conn->cmd_cleanup_req++;
qedi_iscsi_cleanup_task(ctask, true);
+ cmd->io_cmd_in_list = false;
list_del_init(&cmd->io_cmd);
qedi_conn->active_cmd_count--;
QEDI_WARN(&qedi->dbg_ctx,
@@ -1255,7 +1267,8 @@ int qedi_cleanup_all_io(struct qedi_ctx *qedi, struct qedi_conn *qedi_conn,
rval = wait_event_interruptible_timeout(qedi_conn->wait_queue,
((qedi_conn->cmd_cleanup_req ==
qedi_conn->cmd_cleanup_cmpl) ||
- qedi_conn->ep),
+ test_bit(QEDI_IN_RECOVERY,
+ &qedi->flags)),
5 * HZ);
if (rval) {
QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM,
@@ -1280,7 +1293,9 @@ int qedi_cleanup_all_io(struct qedi_ctx *qedi, struct qedi_conn *qedi_conn,
/* Enable IOs for all other sessions except current.*/
if (!wait_event_interruptible_timeout(qedi_conn->wait_queue,
(qedi_conn->cmd_cleanup_req ==
- qedi_conn->cmd_cleanup_cmpl),
+ qedi_conn->cmd_cleanup_cmpl) ||
+ test_bit(QEDI_IN_RECOVERY,
+ &qedi->flags),
5 * HZ)) {
iscsi_host_for_each_session(qedi->shost,
qedi_mark_device_available);
@@ -1446,8 +1461,11 @@ ldel_exit:
spin_unlock_bh(&qedi_conn->tmf_work_lock);
spin_lock(&qedi_conn->list_lock);
- list_del_init(&cmd->io_cmd);
- qedi_conn->active_cmd_count--;
+ if (likely(cmd->io_cmd_in_list)) {
+ cmd->io_cmd_in_list = false;
+ list_del_init(&cmd->io_cmd);
+ qedi_conn->active_cmd_count--;
+ }
spin_unlock(&qedi_conn->list_lock);
clear_bit(QEDI_CONN_FW_CLEANUP, &qedi_conn->flags);
diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c
index c14ac7882afa..08c05403cd72 100644
--- a/drivers/scsi/qedi/qedi_iscsi.c
+++ b/drivers/scsi/qedi/qedi_iscsi.c
@@ -975,11 +975,13 @@ static void qedi_cleanup_active_cmd_list(struct qedi_conn *qedi_conn)
{
struct qedi_cmd *cmd, *cmd_tmp;
+ spin_lock(&qedi_conn->list_lock);
list_for_each_entry_safe(cmd, cmd_tmp, &qedi_conn->active_cmd_list,
io_cmd) {
list_del_init(&cmd->io_cmd);
qedi_conn->active_cmd_count--;
}
+ spin_unlock(&qedi_conn->list_lock);
}
static void qedi_ep_disconnect(struct iscsi_endpoint *ep)
@@ -1069,6 +1071,11 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep)
wait_delay += qedi->pf_params.iscsi_pf_params.two_msl_timer;
qedi_ep->state = EP_STATE_DISCONN_START;
+
+ if (test_bit(QEDI_IN_SHUTDOWN, &qedi->flags) ||
+ test_bit(QEDI_IN_RECOVERY, &qedi->flags))
+ goto ep_release_conn;
+
ret = qedi_ops->destroy_conn(qedi->cdev, qedi_ep->handle, abrt_conn);
if (ret) {
QEDI_WARN(&qedi->dbg_ctx,
diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c
index 6f038ae5efca..61fab01d2d52 100644
--- a/drivers/scsi/qedi/qedi_main.c
+++ b/drivers/scsi/qedi/qedi_main.c
@@ -50,6 +50,10 @@ module_param(qedi_ll2_buf_size, uint, 0644);
MODULE_PARM_DESC(qedi_ll2_buf_size,
"parameter to set ping packet size, default - 0x400, Jumbo packets - 0x2400.");
+static uint qedi_flags_override;
+module_param(qedi_flags_override, uint, 0644);
+MODULE_PARM_DESC(qedi_flags_override, "Disable/Enable MFW error flags bits action.");
+
const struct qed_iscsi_ops *qedi_ops;
static struct scsi_transport_template *qedi_scsi_transport;
static struct pci_driver qedi_pci_driver;
@@ -63,6 +67,8 @@ static void qedi_reset_uio_rings(struct qedi_uio_dev *udev);
static void qedi_ll2_free_skbs(struct qedi_ctx *qedi);
static struct nvm_iscsi_block *qedi_get_nvram_block(struct qedi_ctx *qedi);
static void qedi_recovery_handler(struct work_struct *work);
+static void qedi_schedule_hw_err_handler(void *dev,
+ enum qed_hw_err_type err_type);
static int qedi_iscsi_event_cb(void *context, u8 fw_event_code, void *fw_handle)
{
@@ -789,8 +795,7 @@ static void qedi_ll2_free_skbs(struct qedi_ctx *qedi)
spin_lock_bh(&qedi->ll2_lock);
list_for_each_entry_safe(work, work_tmp, &qedi->ll2_skb_list, list) {
list_del(&work->list);
- if (work->skb)
- kfree_skb(work->skb);
+ kfree_skb(work->skb);
kfree(work);
}
spin_unlock_bh(&qedi->ll2_lock);
@@ -1113,6 +1118,42 @@ exit_get_data:
return;
}
+void qedi_schedule_hw_err_handler(void *dev,
+ enum qed_hw_err_type err_type)
+{
+ struct qedi_ctx *qedi = (struct qedi_ctx *)dev;
+ unsigned long override_flags = qedi_flags_override;
+
+ if (override_flags && test_bit(QEDI_ERR_OVERRIDE_EN, &override_flags))
+ qedi->qedi_err_flags = qedi_flags_override;
+
+ QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
+ "HW error handler scheduled, err=%d err_flags=0x%x\n",
+ err_type, qedi->qedi_err_flags);
+
+ switch (err_type) {
+ case QED_HW_ERR_FAN_FAIL:
+ schedule_delayed_work(&qedi->board_disable_work, 0);
+ break;
+ case QED_HW_ERR_MFW_RESP_FAIL:
+ case QED_HW_ERR_HW_ATTN:
+ case QED_HW_ERR_DMAE_FAIL:
+ case QED_HW_ERR_RAMROD_FAIL:
+ case QED_HW_ERR_FW_ASSERT:
+ /* Prevent HW attentions from being reasserted */
+ if (test_bit(QEDI_ERR_ATTN_CLR_EN, &qedi->qedi_err_flags))
+ qedi_ops->common->attn_clr_enable(qedi->cdev, true);
+
+ if (err_type == QED_HW_ERR_RAMROD_FAIL &&
+ test_bit(QEDI_ERR_IS_RECOVERABLE, &qedi->qedi_err_flags))
+ qedi_ops->common->recovery_process(qedi->cdev);
+
+ break;
+ default:
+ break;
+ }
+}
+
static void qedi_schedule_recovery_handler(void *dev)
{
struct qedi_ctx *qedi = dev;
@@ -1127,6 +1168,15 @@ static void qedi_schedule_recovery_handler(void *dev)
schedule_delayed_work(&qedi->recovery_work, 0);
}
+static void qedi_set_conn_recovery(struct iscsi_cls_session *cls_session)
+{
+ struct iscsi_session *session = cls_session->dd_data;
+ struct iscsi_conn *conn = session->leadconn;
+ struct qedi_conn *qedi_conn = conn->dd_data;
+
+ qedi_start_conn_recovery(qedi_conn->qedi, qedi_conn);
+}
+
static void qedi_link_update(void *dev, struct qed_link_output *link)
{
struct qedi_ctx *qedi = (struct qedi_ctx *)dev;
@@ -1138,6 +1188,7 @@ static void qedi_link_update(void *dev, struct qed_link_output *link)
QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
"Link Down event.\n");
atomic_set(&qedi->link_state, QEDI_LINK_DOWN);
+ iscsi_host_for_each_session(qedi->shost, qedi_set_conn_recovery);
}
}
@@ -1145,6 +1196,7 @@ static struct qed_iscsi_cb_ops qedi_cb_ops = {
{
.link_update = qedi_link_update,
.schedule_recovery_handler = qedi_schedule_recovery_handler,
+ .schedule_hw_err_handler = qedi_schedule_hw_err_handler,
.get_protocol_tlv_data = qedi_get_protocol_tlv_data,
.get_generic_tlv_data = qedi_get_generic_tlv_data,
}
@@ -1357,7 +1409,7 @@ static int qedi_request_msix_irq(struct qedi_ctx *qedi)
u16 idx;
cpu = cpumask_first(cpu_online_mask);
- for (i = 0; i < qedi->int_info.msix_cnt; i++) {
+ for (i = 0; i < qedi->msix_count; i++) {
idx = i * qedi->dev_info.common.num_hwfns +
qedi_ops->common->get_affin_hwfn_idx(qedi->cdev);
@@ -1387,7 +1439,12 @@ static int qedi_setup_int(struct qedi_ctx *qedi)
{
int rc = 0;
- rc = qedi_ops->common->set_fp_int(qedi->cdev, num_online_cpus());
+ rc = qedi_ops->common->set_fp_int(qedi->cdev, qedi->num_queues);
+ if (rc < 0)
+ goto exit_setup_int;
+
+ qedi->msix_count = rc;
+
rc = qedi_ops->common->get_fp_int(qedi->cdev, &qedi->int_info);
if (rc)
goto exit_setup_int;
@@ -2336,10 +2393,30 @@ kset_free:
return -ENOMEM;
}
+static pci_ers_result_t qedi_io_error_detected(struct pci_dev *pdev,
+ pci_channel_state_t state)
+{
+ struct qedi_ctx *qedi = pci_get_drvdata(pdev);
+
+ QEDI_ERR(&qedi->dbg_ctx, "%s: PCI error detected [%d]\n",
+ __func__, state);
+
+ if (test_and_set_bit(QEDI_IN_RECOVERY, &qedi->flags)) {
+ QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
+ "Recovery already in progress.\n");
+ return PCI_ERS_RESULT_NONE;
+ }
+
+ qedi_ops->common->recovery_process(qedi->cdev);
+
+ return PCI_ERS_RESULT_CAN_RECOVER;
+}
+
static void __qedi_remove(struct pci_dev *pdev, int mode)
{
struct qedi_ctx *qedi = pci_get_drvdata(pdev);
int rval;
+ u16 retry = 10;
if (mode == QEDI_MODE_SHUTDOWN)
iscsi_host_for_each_session(qedi->shost,
@@ -2368,7 +2445,13 @@ static void __qedi_remove(struct pci_dev *pdev, int mode)
qedi_sync_free_irqs(qedi);
if (!test_bit(QEDI_IN_OFFLINE, &qedi->flags)) {
- qedi_ops->stop(qedi->cdev);
+ while (retry--) {
+ rval = qedi_ops->stop(qedi->cdev);
+ if (rval < 0)
+ msleep(1000);
+ else
+ break;
+ }
qedi_ops->ll2->stop(qedi->cdev);
}
@@ -2405,6 +2488,21 @@ static void __qedi_remove(struct pci_dev *pdev, int mode)
}
}
+static void qedi_board_disable_work(struct work_struct *work)
+{
+ struct qedi_ctx *qedi =
+ container_of(work, struct qedi_ctx,
+ board_disable_work.work);
+
+ QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
+ "Fan failure, Unloading firmware context.\n");
+
+ if (test_and_set_bit(QEDI_IN_SHUTDOWN, &qedi->flags))
+ return;
+
+ __qedi_remove(qedi->pdev, QEDI_MODE_SHUTDOWN);
+}
+
static void qedi_shutdown(struct pci_dev *pdev)
{
struct qedi_ctx *qedi = pci_get_drvdata(pdev);
@@ -2427,6 +2525,7 @@ static int __qedi_probe(struct pci_dev *pdev, int mode)
struct qed_probe_params qed_params;
void *task_start, *task_end;
int rc;
+ u16 retry = 10;
if (mode != QEDI_MODE_RECOVERY) {
qedi = qedi_host_alloc(pdev);
@@ -2438,6 +2537,10 @@ static int __qedi_probe(struct pci_dev *pdev, int mode)
qedi = pci_get_drvdata(pdev);
}
+retry_probe:
+ if (mode == QEDI_MODE_RECOVERY)
+ msleep(2000);
+
memset(&qed_params, 0, sizeof(qed_params));
qed_params.protocol = QED_PROTOCOL_ISCSI;
qed_params.dp_module = qedi_qed_debug;
@@ -2445,11 +2548,20 @@ static int __qedi_probe(struct pci_dev *pdev, int mode)
qed_params.is_vf = is_vf;
qedi->cdev = qedi_ops->common->probe(pdev, &qed_params);
if (!qedi->cdev) {
+ if (mode == QEDI_MODE_RECOVERY && retry) {
+ QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
+ "Retry %d initialize hardware\n", retry);
+ retry--;
+ goto retry_probe;
+ }
+
rc = -ENODEV;
QEDI_ERR(&qedi->dbg_ctx, "Cannot initialize hardware\n");
goto free_host;
}
+ set_bit(QEDI_ERR_ATTN_CLR_EN, &qedi->qedi_err_flags);
+ set_bit(QEDI_ERR_IS_RECOVERABLE, &qedi->qedi_err_flags);
atomic_set(&qedi->link_state, QEDI_LINK_DOWN);
rc = qedi_ops->fill_dev_info(qedi->cdev, &qedi->dev_info);
@@ -2533,7 +2645,7 @@ static int __qedi_probe(struct pci_dev *pdev, int mode)
QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_DISC, "MAC address is %pM.\n",
qedi->mac);
- sprintf(host_buf, "host_%d", qedi->shost->host_no);
+ snprintf(host_buf, sizeof(host_buf), "host_%d", qedi->shost->host_no);
qedi_ops->common->set_name(qedi->cdev, host_buf);
qedi_ops->register_ops(qedi->cdev, &qedi_cb_ops, qedi);
@@ -2658,6 +2770,8 @@ static int __qedi_probe(struct pci_dev *pdev, int mode)
}
INIT_DELAYED_WORK(&qedi->recovery_work, qedi_recovery_handler);
+ INIT_DELAYED_WORK(&qedi->board_disable_work,
+ qedi_board_disable_work);
/* F/w needs 1st task context memory entry for performance */
set_bit(QEDI_RESERVE_TASK_ID, qedi->task_idx_map);
@@ -2744,12 +2858,17 @@ MODULE_DEVICE_TABLE(pci, qedi_pci_tbl);
static enum cpuhp_state qedi_cpuhp_state;
+static struct pci_error_handlers qedi_err_handler = {
+ .error_detected = qedi_io_error_detected,
+};
+
static struct pci_driver qedi_pci_driver = {
.name = QEDI_MODULE_NAME,
.id_table = qedi_pci_tbl,
.probe = qedi_probe,
.remove = qedi_remove,
.shutdown = qedi_shutdown,
+ .err_handler = &qedi_err_handler,
};
static int __init qedi_init(void)