diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-10-14 15:15:35 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-10-14 15:15:35 -0700 |
commit | 55e0500eb5c0440a3d43074edbd8db3e95851b66 (patch) | |
tree | 874b9da7a764df298441242ce79b9fd89c2910df /drivers/scsi/qedi | |
parent | 4815519ed0af833884ce9c288183bf1ae3cb9caa (diff) | |
parent | 69f4ec1edb136d2d2511d1ef96f94ef0aeecefdf (diff) | |
download | linux-next-55e0500eb5c0440a3d43074edbd8db3e95851b66.tar.gz |
Merge tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi
Pull SCSI updates from James Bottomley:
"The usual driver updates (ufs, qla2xxx, tcmu, ibmvfc, lpfc, smartpqi,
hisi_sas, qedi, qedf, mpt3sas) and minor bug fixes.
There are only three core changes: adding sense codes, cleaning up
noretry and adding an option for limitless retries"
* tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi: (226 commits)
scsi: hisi_sas: Recover PHY state according to the status before reset
scsi: hisi_sas: Filter out new PHY up events during suspend
scsi: hisi_sas: Add device link between SCSI devices and hisi_hba
scsi: hisi_sas: Add check for methods _PS0 and _PR0
scsi: hisi_sas: Add controller runtime PM support for v3 hw
scsi: hisi_sas: Switch to new framework to support suspend and resume
scsi: hisi_sas: Use hisi_hba->cq_nvecs for calling calling synchronize_irq()
scsi: qedf: Remove redundant assignment to variable 'rc'
scsi: lpfc: Remove unneeded variable 'status' in lpfc_fcp_cpu_map_store()
scsi: snic: Convert to use DEFINE_SEQ_ATTRIBUTE macro
scsi: qla4xxx: Delete unneeded variable 'status' in qla4xxx_process_ddb_changed
scsi: sun_esp: Use module_platform_driver to simplify the code
scsi: sun3x_esp: Use module_platform_driver to simplify the code
scsi: sni_53c710: Use module_platform_driver to simplify the code
scsi: qlogicpti: Use module_platform_driver to simplify the code
scsi: mac_esp: Use module_platform_driver to simplify the code
scsi: jazz_esp: Use module_platform_driver to simplify the code
scsi: mvumi: Fix error return in mvumi_io_attach()
scsi: lpfc: Drop nodelist reference on error in lpfc_gen_req()
scsi: be2iscsi: Fix a theoretical leak in beiscsi_create_eqs()
...
Diffstat (limited to 'drivers/scsi/qedi')
-rw-r--r-- | drivers/scsi/qedi/qedi.h | 6 | ||||
-rw-r--r-- | drivers/scsi/qedi/qedi_fw.c | 30 | ||||
-rw-r--r-- | drivers/scsi/qedi/qedi_iscsi.c | 7 | ||||
-rw-r--r-- | drivers/scsi/qedi/qedi_main.c | 131 |
4 files changed, 162 insertions, 12 deletions
diff --git a/drivers/scsi/qedi/qedi.h b/drivers/scsi/qedi/qedi.h index 9498279ae80d..c342defc3f52 100644 --- a/drivers/scsi/qedi/qedi.h +++ b/drivers/scsi/qedi/qedi.h @@ -274,6 +274,10 @@ struct qedi_ctx { spinlock_t ll2_lock; /* Light L2 lock */ spinlock_t hba_lock; /* per port lock */ struct task_struct *ll2_recv_thread; + unsigned long qedi_err_flags; +#define QEDI_ERR_ATTN_CLR_EN 0 +#define QEDI_ERR_IS_RECOVERABLE 2 +#define QEDI_ERR_OVERRIDE_EN 31 unsigned long flags; #define UIO_DEV_OPENED 1 #define QEDI_IOTHREAD_WAKE 2 @@ -305,6 +309,7 @@ struct qedi_ctx { u32 max_sqes; u8 num_queues; u32 max_active_conns; + s32 msix_count; struct iscsi_cid_queue cid_que; struct qedi_endpoint **ep_tbl; @@ -334,6 +339,7 @@ struct qedi_ctx { struct workqueue_struct *dpc_wq; struct delayed_work recovery_work; + struct delayed_work board_disable_work; spinlock_t task_idx_lock; /* To protect gbl context */ s32 last_tidx_alloc; diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c index 6ed74583b1b9..440ddd2309f1 100644 --- a/drivers/scsi/qedi/qedi_fw.c +++ b/drivers/scsi/qedi/qedi_fw.c @@ -59,6 +59,7 @@ static void qedi_process_logout_resp(struct qedi_ctx *qedi, "Freeing tid=0x%x for cid=0x%x\n", cmd->task_id, qedi_conn->iscsi_conn_id); + spin_lock(&qedi_conn->list_lock); if (likely(cmd->io_cmd_in_list)) { cmd->io_cmd_in_list = false; list_del_init(&cmd->io_cmd); @@ -69,6 +70,7 @@ static void qedi_process_logout_resp(struct qedi_ctx *qedi, cmd->task_id, qedi_conn->iscsi_conn_id, &cmd->io_cmd); } + spin_unlock(&qedi_conn->list_lock); cmd->state = RESPONSE_RECEIVED; qedi_clear_task_idx(qedi, cmd->task_id); @@ -122,6 +124,7 @@ static void qedi_process_text_resp(struct qedi_ctx *qedi, "Freeing tid=0x%x for cid=0x%x\n", cmd->task_id, qedi_conn->iscsi_conn_id); + spin_lock(&qedi_conn->list_lock); if (likely(cmd->io_cmd_in_list)) { cmd->io_cmd_in_list = false; list_del_init(&cmd->io_cmd); @@ -132,6 +135,7 @@ static void qedi_process_text_resp(struct qedi_ctx *qedi, cmd->task_id, qedi_conn->iscsi_conn_id, &cmd->io_cmd); } + spin_unlock(&qedi_conn->list_lock); cmd->state = RESPONSE_RECEIVED; qedi_clear_task_idx(qedi, cmd->task_id); @@ -222,11 +226,13 @@ static void qedi_process_tmf_resp(struct qedi_ctx *qedi, tmf_hdr = (struct iscsi_tm *)qedi_cmd->task->hdr; + spin_lock(&qedi_conn->list_lock); if (likely(qedi_cmd->io_cmd_in_list)) { qedi_cmd->io_cmd_in_list = false; list_del_init(&qedi_cmd->io_cmd); qedi_conn->active_cmd_count--; } + spin_unlock(&qedi_conn->list_lock); if (((tmf_hdr->flags & ISCSI_FLAG_TM_FUNC_MASK) == ISCSI_TM_FUNC_LOGICAL_UNIT_RESET) || @@ -288,11 +294,13 @@ static void qedi_process_login_resp(struct qedi_ctx *qedi, ISCSI_LOGIN_RESPONSE_HDR_DATA_SEG_LEN_MASK; qedi_conn->gen_pdu.resp_wr_ptr = qedi_conn->gen_pdu.resp_buf + pld_len; + spin_lock(&qedi_conn->list_lock); if (likely(cmd->io_cmd_in_list)) { cmd->io_cmd_in_list = false; list_del_init(&cmd->io_cmd); qedi_conn->active_cmd_count--; } + spin_unlock(&qedi_conn->list_lock); memset(task_ctx, '\0', sizeof(*task_ctx)); @@ -816,8 +824,11 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx *qedi, qedi_clear_task_idx(qedi_conn->qedi, rtid); spin_lock(&qedi_conn->list_lock); - list_del_init(&dbg_cmd->io_cmd); - qedi_conn->active_cmd_count--; + if (likely(dbg_cmd->io_cmd_in_list)) { + dbg_cmd->io_cmd_in_list = false; + list_del_init(&dbg_cmd->io_cmd); + qedi_conn->active_cmd_count--; + } spin_unlock(&qedi_conn->list_lock); qedi_cmd->state = CLEANUP_RECV; wake_up_interruptible(&qedi_conn->wait_queue); @@ -1235,6 +1246,7 @@ int qedi_cleanup_all_io(struct qedi_ctx *qedi, struct qedi_conn *qedi_conn, qedi_conn->cmd_cleanup_req++; qedi_iscsi_cleanup_task(ctask, true); + cmd->io_cmd_in_list = false; list_del_init(&cmd->io_cmd); qedi_conn->active_cmd_count--; QEDI_WARN(&qedi->dbg_ctx, @@ -1255,7 +1267,8 @@ int qedi_cleanup_all_io(struct qedi_ctx *qedi, struct qedi_conn *qedi_conn, rval = wait_event_interruptible_timeout(qedi_conn->wait_queue, ((qedi_conn->cmd_cleanup_req == qedi_conn->cmd_cleanup_cmpl) || - qedi_conn->ep), + test_bit(QEDI_IN_RECOVERY, + &qedi->flags)), 5 * HZ); if (rval) { QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM, @@ -1280,7 +1293,9 @@ int qedi_cleanup_all_io(struct qedi_ctx *qedi, struct qedi_conn *qedi_conn, /* Enable IOs for all other sessions except current.*/ if (!wait_event_interruptible_timeout(qedi_conn->wait_queue, (qedi_conn->cmd_cleanup_req == - qedi_conn->cmd_cleanup_cmpl), + qedi_conn->cmd_cleanup_cmpl) || + test_bit(QEDI_IN_RECOVERY, + &qedi->flags), 5 * HZ)) { iscsi_host_for_each_session(qedi->shost, qedi_mark_device_available); @@ -1446,8 +1461,11 @@ ldel_exit: spin_unlock_bh(&qedi_conn->tmf_work_lock); spin_lock(&qedi_conn->list_lock); - list_del_init(&cmd->io_cmd); - qedi_conn->active_cmd_count--; + if (likely(cmd->io_cmd_in_list)) { + cmd->io_cmd_in_list = false; + list_del_init(&cmd->io_cmd); + qedi_conn->active_cmd_count--; + } spin_unlock(&qedi_conn->list_lock); clear_bit(QEDI_CONN_FW_CLEANUP, &qedi_conn->flags); diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c index c14ac7882afa..08c05403cd72 100644 --- a/drivers/scsi/qedi/qedi_iscsi.c +++ b/drivers/scsi/qedi/qedi_iscsi.c @@ -975,11 +975,13 @@ static void qedi_cleanup_active_cmd_list(struct qedi_conn *qedi_conn) { struct qedi_cmd *cmd, *cmd_tmp; + spin_lock(&qedi_conn->list_lock); list_for_each_entry_safe(cmd, cmd_tmp, &qedi_conn->active_cmd_list, io_cmd) { list_del_init(&cmd->io_cmd); qedi_conn->active_cmd_count--; } + spin_unlock(&qedi_conn->list_lock); } static void qedi_ep_disconnect(struct iscsi_endpoint *ep) @@ -1069,6 +1071,11 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep) wait_delay += qedi->pf_params.iscsi_pf_params.two_msl_timer; qedi_ep->state = EP_STATE_DISCONN_START; + + if (test_bit(QEDI_IN_SHUTDOWN, &qedi->flags) || + test_bit(QEDI_IN_RECOVERY, &qedi->flags)) + goto ep_release_conn; + ret = qedi_ops->destroy_conn(qedi->cdev, qedi_ep->handle, abrt_conn); if (ret) { QEDI_WARN(&qedi->dbg_ctx, diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index 6f038ae5efca..61fab01d2d52 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -50,6 +50,10 @@ module_param(qedi_ll2_buf_size, uint, 0644); MODULE_PARM_DESC(qedi_ll2_buf_size, "parameter to set ping packet size, default - 0x400, Jumbo packets - 0x2400."); +static uint qedi_flags_override; +module_param(qedi_flags_override, uint, 0644); +MODULE_PARM_DESC(qedi_flags_override, "Disable/Enable MFW error flags bits action."); + const struct qed_iscsi_ops *qedi_ops; static struct scsi_transport_template *qedi_scsi_transport; static struct pci_driver qedi_pci_driver; @@ -63,6 +67,8 @@ static void qedi_reset_uio_rings(struct qedi_uio_dev *udev); static void qedi_ll2_free_skbs(struct qedi_ctx *qedi); static struct nvm_iscsi_block *qedi_get_nvram_block(struct qedi_ctx *qedi); static void qedi_recovery_handler(struct work_struct *work); +static void qedi_schedule_hw_err_handler(void *dev, + enum qed_hw_err_type err_type); static int qedi_iscsi_event_cb(void *context, u8 fw_event_code, void *fw_handle) { @@ -789,8 +795,7 @@ static void qedi_ll2_free_skbs(struct qedi_ctx *qedi) spin_lock_bh(&qedi->ll2_lock); list_for_each_entry_safe(work, work_tmp, &qedi->ll2_skb_list, list) { list_del(&work->list); - if (work->skb) - kfree_skb(work->skb); + kfree_skb(work->skb); kfree(work); } spin_unlock_bh(&qedi->ll2_lock); @@ -1113,6 +1118,42 @@ exit_get_data: return; } +void qedi_schedule_hw_err_handler(void *dev, + enum qed_hw_err_type err_type) +{ + struct qedi_ctx *qedi = (struct qedi_ctx *)dev; + unsigned long override_flags = qedi_flags_override; + + if (override_flags && test_bit(QEDI_ERR_OVERRIDE_EN, &override_flags)) + qedi->qedi_err_flags = qedi_flags_override; + + QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO, + "HW error handler scheduled, err=%d err_flags=0x%x\n", + err_type, qedi->qedi_err_flags); + + switch (err_type) { + case QED_HW_ERR_FAN_FAIL: + schedule_delayed_work(&qedi->board_disable_work, 0); + break; + case QED_HW_ERR_MFW_RESP_FAIL: + case QED_HW_ERR_HW_ATTN: + case QED_HW_ERR_DMAE_FAIL: + case QED_HW_ERR_RAMROD_FAIL: + case QED_HW_ERR_FW_ASSERT: + /* Prevent HW attentions from being reasserted */ + if (test_bit(QEDI_ERR_ATTN_CLR_EN, &qedi->qedi_err_flags)) + qedi_ops->common->attn_clr_enable(qedi->cdev, true); + + if (err_type == QED_HW_ERR_RAMROD_FAIL && + test_bit(QEDI_ERR_IS_RECOVERABLE, &qedi->qedi_err_flags)) + qedi_ops->common->recovery_process(qedi->cdev); + + break; + default: + break; + } +} + static void qedi_schedule_recovery_handler(void *dev) { struct qedi_ctx *qedi = dev; @@ -1127,6 +1168,15 @@ static void qedi_schedule_recovery_handler(void *dev) schedule_delayed_work(&qedi->recovery_work, 0); } +static void qedi_set_conn_recovery(struct iscsi_cls_session *cls_session) +{ + struct iscsi_session *session = cls_session->dd_data; + struct iscsi_conn *conn = session->leadconn; + struct qedi_conn *qedi_conn = conn->dd_data; + + qedi_start_conn_recovery(qedi_conn->qedi, qedi_conn); +} + static void qedi_link_update(void *dev, struct qed_link_output *link) { struct qedi_ctx *qedi = (struct qedi_ctx *)dev; @@ -1138,6 +1188,7 @@ static void qedi_link_update(void *dev, struct qed_link_output *link) QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO, "Link Down event.\n"); atomic_set(&qedi->link_state, QEDI_LINK_DOWN); + iscsi_host_for_each_session(qedi->shost, qedi_set_conn_recovery); } } @@ -1145,6 +1196,7 @@ static struct qed_iscsi_cb_ops qedi_cb_ops = { { .link_update = qedi_link_update, .schedule_recovery_handler = qedi_schedule_recovery_handler, + .schedule_hw_err_handler = qedi_schedule_hw_err_handler, .get_protocol_tlv_data = qedi_get_protocol_tlv_data, .get_generic_tlv_data = qedi_get_generic_tlv_data, } @@ -1357,7 +1409,7 @@ static int qedi_request_msix_irq(struct qedi_ctx *qedi) u16 idx; cpu = cpumask_first(cpu_online_mask); - for (i = 0; i < qedi->int_info.msix_cnt; i++) { + for (i = 0; i < qedi->msix_count; i++) { idx = i * qedi->dev_info.common.num_hwfns + qedi_ops->common->get_affin_hwfn_idx(qedi->cdev); @@ -1387,7 +1439,12 @@ static int qedi_setup_int(struct qedi_ctx *qedi) { int rc = 0; - rc = qedi_ops->common->set_fp_int(qedi->cdev, num_online_cpus()); + rc = qedi_ops->common->set_fp_int(qedi->cdev, qedi->num_queues); + if (rc < 0) + goto exit_setup_int; + + qedi->msix_count = rc; + rc = qedi_ops->common->get_fp_int(qedi->cdev, &qedi->int_info); if (rc) goto exit_setup_int; @@ -2336,10 +2393,30 @@ kset_free: return -ENOMEM; } +static pci_ers_result_t qedi_io_error_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + struct qedi_ctx *qedi = pci_get_drvdata(pdev); + + QEDI_ERR(&qedi->dbg_ctx, "%s: PCI error detected [%d]\n", + __func__, state); + + if (test_and_set_bit(QEDI_IN_RECOVERY, &qedi->flags)) { + QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO, + "Recovery already in progress.\n"); + return PCI_ERS_RESULT_NONE; + } + + qedi_ops->common->recovery_process(qedi->cdev); + + return PCI_ERS_RESULT_CAN_RECOVER; +} + static void __qedi_remove(struct pci_dev *pdev, int mode) { struct qedi_ctx *qedi = pci_get_drvdata(pdev); int rval; + u16 retry = 10; if (mode == QEDI_MODE_SHUTDOWN) iscsi_host_for_each_session(qedi->shost, @@ -2368,7 +2445,13 @@ static void __qedi_remove(struct pci_dev *pdev, int mode) qedi_sync_free_irqs(qedi); if (!test_bit(QEDI_IN_OFFLINE, &qedi->flags)) { - qedi_ops->stop(qedi->cdev); + while (retry--) { + rval = qedi_ops->stop(qedi->cdev); + if (rval < 0) + msleep(1000); + else + break; + } qedi_ops->ll2->stop(qedi->cdev); } @@ -2405,6 +2488,21 @@ static void __qedi_remove(struct pci_dev *pdev, int mode) } } +static void qedi_board_disable_work(struct work_struct *work) +{ + struct qedi_ctx *qedi = + container_of(work, struct qedi_ctx, + board_disable_work.work); + + QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO, + "Fan failure, Unloading firmware context.\n"); + + if (test_and_set_bit(QEDI_IN_SHUTDOWN, &qedi->flags)) + return; + + __qedi_remove(qedi->pdev, QEDI_MODE_SHUTDOWN); +} + static void qedi_shutdown(struct pci_dev *pdev) { struct qedi_ctx *qedi = pci_get_drvdata(pdev); @@ -2427,6 +2525,7 @@ static int __qedi_probe(struct pci_dev *pdev, int mode) struct qed_probe_params qed_params; void *task_start, *task_end; int rc; + u16 retry = 10; if (mode != QEDI_MODE_RECOVERY) { qedi = qedi_host_alloc(pdev); @@ -2438,6 +2537,10 @@ static int __qedi_probe(struct pci_dev *pdev, int mode) qedi = pci_get_drvdata(pdev); } +retry_probe: + if (mode == QEDI_MODE_RECOVERY) + msleep(2000); + memset(&qed_params, 0, sizeof(qed_params)); qed_params.protocol = QED_PROTOCOL_ISCSI; qed_params.dp_module = qedi_qed_debug; @@ -2445,11 +2548,20 @@ static int __qedi_probe(struct pci_dev *pdev, int mode) qed_params.is_vf = is_vf; qedi->cdev = qedi_ops->common->probe(pdev, &qed_params); if (!qedi->cdev) { + if (mode == QEDI_MODE_RECOVERY && retry) { + QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO, + "Retry %d initialize hardware\n", retry); + retry--; + goto retry_probe; + } + rc = -ENODEV; QEDI_ERR(&qedi->dbg_ctx, "Cannot initialize hardware\n"); goto free_host; } + set_bit(QEDI_ERR_ATTN_CLR_EN, &qedi->qedi_err_flags); + set_bit(QEDI_ERR_IS_RECOVERABLE, &qedi->qedi_err_flags); atomic_set(&qedi->link_state, QEDI_LINK_DOWN); rc = qedi_ops->fill_dev_info(qedi->cdev, &qedi->dev_info); @@ -2533,7 +2645,7 @@ static int __qedi_probe(struct pci_dev *pdev, int mode) QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_DISC, "MAC address is %pM.\n", qedi->mac); - sprintf(host_buf, "host_%d", qedi->shost->host_no); + snprintf(host_buf, sizeof(host_buf), "host_%d", qedi->shost->host_no); qedi_ops->common->set_name(qedi->cdev, host_buf); qedi_ops->register_ops(qedi->cdev, &qedi_cb_ops, qedi); @@ -2658,6 +2770,8 @@ static int __qedi_probe(struct pci_dev *pdev, int mode) } INIT_DELAYED_WORK(&qedi->recovery_work, qedi_recovery_handler); + INIT_DELAYED_WORK(&qedi->board_disable_work, + qedi_board_disable_work); /* F/w needs 1st task context memory entry for performance */ set_bit(QEDI_RESERVE_TASK_ID, qedi->task_idx_map); @@ -2744,12 +2858,17 @@ MODULE_DEVICE_TABLE(pci, qedi_pci_tbl); static enum cpuhp_state qedi_cpuhp_state; +static struct pci_error_handlers qedi_err_handler = { + .error_detected = qedi_io_error_detected, +}; + static struct pci_driver qedi_pci_driver = { .name = QEDI_MODULE_NAME, .id_table = qedi_pci_tbl, .probe = qedi_probe, .remove = qedi_remove, .shutdown = qedi_shutdown, + .err_handler = &qedi_err_handler, }; static int __init qedi_init(void) |